File: | llvm/lib/Transforms/Scalar/LICM.cpp |
Warning: | line 1191, column 41 Called C++ object pointer is null |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===-- LICM.cpp - Loop Invariant Code Motion Pass ------------------------===// | ||||||||
2 | // | ||||||||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||||||||
4 | // See https://llvm.org/LICENSE.txt for license information. | ||||||||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||||||||
6 | // | ||||||||
7 | //===----------------------------------------------------------------------===// | ||||||||
8 | // | ||||||||
9 | // This pass performs loop invariant code motion, attempting to remove as much | ||||||||
10 | // code from the body of a loop as possible. It does this by either hoisting | ||||||||
11 | // code into the preheader block, or by sinking code to the exit blocks if it is | ||||||||
12 | // safe. This pass also promotes must-aliased memory locations in the loop to | ||||||||
13 | // live in registers, thus hoisting and sinking "invariant" loads and stores. | ||||||||
14 | // | ||||||||
15 | // This pass uses alias analysis for two purposes: | ||||||||
16 | // | ||||||||
17 | // 1. Moving loop invariant loads and calls out of loops. If we can determine | ||||||||
18 | // that a load or call inside of a loop never aliases anything stored to, | ||||||||
19 | // we can hoist it or sink it like any other instruction. | ||||||||
20 | // 2. Scalar Promotion of Memory - If there is a store instruction inside of | ||||||||
21 | // the loop, we try to move the store to happen AFTER the loop instead of | ||||||||
22 | // inside of the loop. This can only happen if a few conditions are true: | ||||||||
23 | // A. The pointer stored through is loop invariant | ||||||||
24 | // B. There are no stores or loads in the loop which _may_ alias the | ||||||||
25 | // pointer. There are no calls in the loop which mod/ref the pointer. | ||||||||
26 | // If these conditions are true, we can promote the loads and stores in the | ||||||||
27 | // loop of the pointer to use a temporary alloca'd variable. We then use | ||||||||
28 | // the SSAUpdater to construct the appropriate SSA form for the value. | ||||||||
29 | // | ||||||||
30 | //===----------------------------------------------------------------------===// | ||||||||
31 | |||||||||
32 | #include "llvm/Transforms/Scalar/LICM.h" | ||||||||
33 | #include "llvm/ADT/SetOperations.h" | ||||||||
34 | #include "llvm/ADT/Statistic.h" | ||||||||
35 | #include "llvm/Analysis/AliasAnalysis.h" | ||||||||
36 | #include "llvm/Analysis/AliasSetTracker.h" | ||||||||
37 | #include "llvm/Analysis/BasicAliasAnalysis.h" | ||||||||
38 | #include "llvm/Analysis/BlockFrequencyInfo.h" | ||||||||
39 | #include "llvm/Analysis/CaptureTracking.h" | ||||||||
40 | #include "llvm/Analysis/ConstantFolding.h" | ||||||||
41 | #include "llvm/Analysis/GlobalsModRef.h" | ||||||||
42 | #include "llvm/Analysis/GuardUtils.h" | ||||||||
43 | #include "llvm/Analysis/LazyBlockFrequencyInfo.h" | ||||||||
44 | #include "llvm/Analysis/Loads.h" | ||||||||
45 | #include "llvm/Analysis/LoopInfo.h" | ||||||||
46 | #include "llvm/Analysis/LoopIterator.h" | ||||||||
47 | #include "llvm/Analysis/LoopPass.h" | ||||||||
48 | #include "llvm/Analysis/MemoryBuiltins.h" | ||||||||
49 | #include "llvm/Analysis/MemorySSA.h" | ||||||||
50 | #include "llvm/Analysis/MemorySSAUpdater.h" | ||||||||
51 | #include "llvm/Analysis/MustExecute.h" | ||||||||
52 | #include "llvm/Analysis/OptimizationRemarkEmitter.h" | ||||||||
53 | #include "llvm/Analysis/ScalarEvolution.h" | ||||||||
54 | #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" | ||||||||
55 | #include "llvm/Analysis/TargetLibraryInfo.h" | ||||||||
56 | #include "llvm/Analysis/ValueTracking.h" | ||||||||
57 | #include "llvm/IR/CFG.h" | ||||||||
58 | #include "llvm/IR/Constants.h" | ||||||||
59 | #include "llvm/IR/DataLayout.h" | ||||||||
60 | #include "llvm/IR/DebugInfoMetadata.h" | ||||||||
61 | #include "llvm/IR/DerivedTypes.h" | ||||||||
62 | #include "llvm/IR/Dominators.h" | ||||||||
63 | #include "llvm/IR/Instructions.h" | ||||||||
64 | #include "llvm/IR/IntrinsicInst.h" | ||||||||
65 | #include "llvm/IR/LLVMContext.h" | ||||||||
66 | #include "llvm/IR/Metadata.h" | ||||||||
67 | #include "llvm/IR/PatternMatch.h" | ||||||||
68 | #include "llvm/IR/PredIteratorCache.h" | ||||||||
69 | #include "llvm/InitializePasses.h" | ||||||||
70 | #include "llvm/Support/CommandLine.h" | ||||||||
71 | #include "llvm/Support/Debug.h" | ||||||||
72 | #include "llvm/Support/raw_ostream.h" | ||||||||
73 | #include "llvm/Transforms/Scalar.h" | ||||||||
74 | #include "llvm/Transforms/Scalar/LoopPassManager.h" | ||||||||
75 | #include "llvm/Transforms/Utils/AssumeBundleBuilder.h" | ||||||||
76 | #include "llvm/Transforms/Utils/BasicBlockUtils.h" | ||||||||
77 | #include "llvm/Transforms/Utils/Local.h" | ||||||||
78 | #include "llvm/Transforms/Utils/LoopUtils.h" | ||||||||
79 | #include "llvm/Transforms/Utils/SSAUpdater.h" | ||||||||
80 | #include <algorithm> | ||||||||
81 | #include <utility> | ||||||||
82 | using namespace llvm; | ||||||||
83 | |||||||||
84 | #define DEBUG_TYPE"licm" "licm" | ||||||||
85 | |||||||||
86 | STATISTIC(NumCreatedBlocks, "Number of blocks created")static llvm::Statistic NumCreatedBlocks = {"licm", "NumCreatedBlocks" , "Number of blocks created"}; | ||||||||
87 | STATISTIC(NumClonedBranches, "Number of branches cloned")static llvm::Statistic NumClonedBranches = {"licm", "NumClonedBranches" , "Number of branches cloned"}; | ||||||||
88 | STATISTIC(NumSunk, "Number of instructions sunk out of loop")static llvm::Statistic NumSunk = {"licm", "NumSunk", "Number of instructions sunk out of loop" }; | ||||||||
89 | STATISTIC(NumHoisted, "Number of instructions hoisted out of loop")static llvm::Statistic NumHoisted = {"licm", "NumHoisted", "Number of instructions hoisted out of loop" }; | ||||||||
90 | STATISTIC(NumMovedLoads, "Number of load insts hoisted or sunk")static llvm::Statistic NumMovedLoads = {"licm", "NumMovedLoads" , "Number of load insts hoisted or sunk"}; | ||||||||
91 | STATISTIC(NumMovedCalls, "Number of call insts hoisted or sunk")static llvm::Statistic NumMovedCalls = {"licm", "NumMovedCalls" , "Number of call insts hoisted or sunk"}; | ||||||||
92 | STATISTIC(NumPromoted, "Number of memory locations promoted to registers")static llvm::Statistic NumPromoted = {"licm", "NumPromoted", "Number of memory locations promoted to registers" }; | ||||||||
93 | |||||||||
94 | /// Memory promotion is enabled by default. | ||||||||
95 | static cl::opt<bool> | ||||||||
96 | DisablePromotion("disable-licm-promotion", cl::Hidden, cl::init(false), | ||||||||
97 | cl::desc("Disable memory promotion in LICM pass")); | ||||||||
98 | |||||||||
99 | static cl::opt<bool> ControlFlowHoisting( | ||||||||
100 | "licm-control-flow-hoisting", cl::Hidden, cl::init(false), | ||||||||
101 | cl::desc("Enable control flow (and PHI) hoisting in LICM")); | ||||||||
102 | |||||||||
103 | static cl::opt<unsigned> HoistSinkColdnessThreshold( | ||||||||
104 | "licm-coldness-threshold", cl::Hidden, cl::init(4), | ||||||||
105 | cl::desc("Relative coldness Threshold of hoisting/sinking destination " | ||||||||
106 | "block for LICM to be considered beneficial")); | ||||||||
107 | |||||||||
108 | static cl::opt<uint32_t> MaxNumUsesTraversed( | ||||||||
109 | "licm-max-num-uses-traversed", cl::Hidden, cl::init(8), | ||||||||
110 | cl::desc("Max num uses visited for identifying load " | ||||||||
111 | "invariance in loop using invariant start (default = 8)")); | ||||||||
112 | |||||||||
113 | // Default value of zero implies we use the regular alias set tracker mechanism | ||||||||
114 | // instead of the cross product using AA to identify aliasing of the memory | ||||||||
115 | // location we are interested in. | ||||||||
116 | static cl::opt<int> | ||||||||
117 | LICMN2Theshold("licm-n2-threshold", cl::Hidden, cl::init(0), | ||||||||
118 | cl::desc("How many instruction to cross product using AA")); | ||||||||
119 | |||||||||
120 | // Experimental option to allow imprecision in LICM in pathological cases, in | ||||||||
121 | // exchange for faster compile. This is to be removed if MemorySSA starts to | ||||||||
122 | // address the same issue. This flag applies only when LICM uses MemorySSA | ||||||||
123 | // instead on AliasSetTracker. LICM calls MemorySSAWalker's | ||||||||
124 | // getClobberingMemoryAccess, up to the value of the Cap, getting perfect | ||||||||
125 | // accuracy. Afterwards, LICM will call into MemorySSA's getDefiningAccess, | ||||||||
126 | // which may not be precise, since optimizeUses is capped. The result is | ||||||||
127 | // correct, but we may not get as "far up" as possible to get which access is | ||||||||
128 | // clobbering the one queried. | ||||||||
129 | cl::opt<unsigned> llvm::SetLicmMssaOptCap( | ||||||||
130 | "licm-mssa-optimization-cap", cl::init(100), cl::Hidden, | ||||||||
131 | cl::desc("Enable imprecision in LICM in pathological cases, in exchange " | ||||||||
132 | "for faster compile. Caps the MemorySSA clobbering calls.")); | ||||||||
133 | |||||||||
134 | // Experimentally, memory promotion carries less importance than sinking and | ||||||||
135 | // hoisting. Limit when we do promotion when using MemorySSA, in order to save | ||||||||
136 | // compile time. | ||||||||
137 | cl::opt<unsigned> llvm::SetLicmMssaNoAccForPromotionCap( | ||||||||
138 | "licm-mssa-max-acc-promotion", cl::init(250), cl::Hidden, | ||||||||
139 | cl::desc("[LICM & MemorySSA] When MSSA in LICM is disabled, this has no " | ||||||||
140 | "effect. When MSSA in LICM is enabled, then this is the maximum " | ||||||||
141 | "number of accesses allowed to be present in a loop in order to " | ||||||||
142 | "enable memory promotion.")); | ||||||||
143 | |||||||||
144 | static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI); | ||||||||
145 | static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop, | ||||||||
146 | const LoopSafetyInfo *SafetyInfo, | ||||||||
147 | TargetTransformInfo *TTI, bool &FreeInLoop); | ||||||||
148 | static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, | ||||||||
149 | BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo, | ||||||||
150 | MemorySSAUpdater *MSSAU, ScalarEvolution *SE, | ||||||||
151 | OptimizationRemarkEmitter *ORE); | ||||||||
152 | static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT, | ||||||||
153 | BlockFrequencyInfo *BFI, const Loop *CurLoop, | ||||||||
154 | ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU, | ||||||||
155 | OptimizationRemarkEmitter *ORE); | ||||||||
156 | static bool isSafeToExecuteUnconditionally(Instruction &Inst, | ||||||||
157 | const DominatorTree *DT, | ||||||||
158 | const Loop *CurLoop, | ||||||||
159 | const LoopSafetyInfo *SafetyInfo, | ||||||||
160 | OptimizationRemarkEmitter *ORE, | ||||||||
161 | const Instruction *CtxI = nullptr); | ||||||||
162 | static bool pointerInvalidatedByLoop(MemoryLocation MemLoc, | ||||||||
163 | AliasSetTracker *CurAST, Loop *CurLoop, | ||||||||
164 | AAResults *AA); | ||||||||
165 | static bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU, | ||||||||
166 | Loop *CurLoop, | ||||||||
167 | SinkAndHoistLICMFlags &Flags); | ||||||||
168 | static Instruction *cloneInstructionInExitBlock( | ||||||||
169 | Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI, | ||||||||
170 | const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU); | ||||||||
171 | |||||||||
172 | static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, | ||||||||
173 | AliasSetTracker *AST, MemorySSAUpdater *MSSAU); | ||||||||
174 | |||||||||
175 | static void moveInstructionBefore(Instruction &I, Instruction &Dest, | ||||||||
176 | ICFLoopSafetyInfo &SafetyInfo, | ||||||||
177 | MemorySSAUpdater *MSSAU, ScalarEvolution *SE); | ||||||||
178 | |||||||||
179 | namespace { | ||||||||
180 | struct LoopInvariantCodeMotion { | ||||||||
181 | bool runOnLoop(Loop *L, AAResults *AA, LoopInfo *LI, DominatorTree *DT, | ||||||||
182 | BlockFrequencyInfo *BFI, TargetLibraryInfo *TLI, | ||||||||
183 | TargetTransformInfo *TTI, ScalarEvolution *SE, MemorySSA *MSSA, | ||||||||
184 | OptimizationRemarkEmitter *ORE); | ||||||||
185 | |||||||||
186 | LoopInvariantCodeMotion(unsigned LicmMssaOptCap, | ||||||||
187 | unsigned LicmMssaNoAccForPromotionCap) | ||||||||
188 | : LicmMssaOptCap(LicmMssaOptCap), | ||||||||
189 | LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap) {} | ||||||||
190 | |||||||||
191 | private: | ||||||||
192 | unsigned LicmMssaOptCap; | ||||||||
193 | unsigned LicmMssaNoAccForPromotionCap; | ||||||||
194 | |||||||||
195 | std::unique_ptr<AliasSetTracker> | ||||||||
196 | collectAliasInfoForLoop(Loop *L, LoopInfo *LI, AAResults *AA); | ||||||||
197 | std::unique_ptr<AliasSetTracker> | ||||||||
198 | collectAliasInfoForLoopWithMSSA(Loop *L, AAResults *AA, | ||||||||
199 | MemorySSAUpdater *MSSAU); | ||||||||
200 | }; | ||||||||
201 | |||||||||
202 | struct LegacyLICMPass : public LoopPass { | ||||||||
203 | static char ID; // Pass identification, replacement for typeid | ||||||||
204 | LegacyLICMPass( | ||||||||
205 | unsigned LicmMssaOptCap = SetLicmMssaOptCap, | ||||||||
206 | unsigned LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap) | ||||||||
207 | : LoopPass(ID), LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap) { | ||||||||
208 | initializeLegacyLICMPassPass(*PassRegistry::getPassRegistry()); | ||||||||
209 | } | ||||||||
210 | |||||||||
211 | bool runOnLoop(Loop *L, LPPassManager &LPM) override { | ||||||||
212 | if (skipLoop(L)) | ||||||||
213 | return false; | ||||||||
214 | |||||||||
215 | auto *SE = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>(); | ||||||||
216 | MemorySSA *MSSA = EnableMSSALoopDependency | ||||||||
217 | ? (&getAnalysis<MemorySSAWrapperPass>().getMSSA()) | ||||||||
218 | : nullptr; | ||||||||
219 | bool hasProfileData = L->getHeader()->getParent()->hasProfileData(); | ||||||||
220 | BlockFrequencyInfo *BFI = | ||||||||
221 | hasProfileData ? &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() | ||||||||
222 | : nullptr; | ||||||||
223 | // For the old PM, we can't use OptimizationRemarkEmitter as an analysis | ||||||||
224 | // pass. Function analyses need to be preserved across loop transformations | ||||||||
225 | // but ORE cannot be preserved (see comment before the pass definition). | ||||||||
226 | OptimizationRemarkEmitter ORE(L->getHeader()->getParent()); | ||||||||
227 | return LICM.runOnLoop( | ||||||||
228 | L, &getAnalysis<AAResultsWrapperPass>().getAAResults(), | ||||||||
229 | &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(), | ||||||||
230 | &getAnalysis<DominatorTreeWrapperPass>().getDomTree(), BFI, | ||||||||
231 | &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI( | ||||||||
232 | *L->getHeader()->getParent()), | ||||||||
233 | &getAnalysis<TargetTransformInfoWrapperPass>().getTTI( | ||||||||
234 | *L->getHeader()->getParent()), | ||||||||
235 | SE ? &SE->getSE() : nullptr, MSSA, &ORE); | ||||||||
236 | } | ||||||||
237 | |||||||||
238 | /// This transformation requires natural loop information & requires that | ||||||||
239 | /// loop preheaders be inserted into the CFG... | ||||||||
240 | /// | ||||||||
241 | void getAnalysisUsage(AnalysisUsage &AU) const override { | ||||||||
242 | AU.addPreserved<DominatorTreeWrapperPass>(); | ||||||||
243 | AU.addPreserved<LoopInfoWrapperPass>(); | ||||||||
244 | AU.addRequired<TargetLibraryInfoWrapperPass>(); | ||||||||
245 | if (EnableMSSALoopDependency) { | ||||||||
246 | AU.addRequired<MemorySSAWrapperPass>(); | ||||||||
247 | AU.addPreserved<MemorySSAWrapperPass>(); | ||||||||
248 | } | ||||||||
249 | AU.addRequired<TargetTransformInfoWrapperPass>(); | ||||||||
250 | getLoopAnalysisUsage(AU); | ||||||||
251 | LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU); | ||||||||
252 | AU.addPreserved<LazyBlockFrequencyInfoPass>(); | ||||||||
253 | AU.addPreserved<LazyBranchProbabilityInfoPass>(); | ||||||||
254 | } | ||||||||
255 | |||||||||
256 | private: | ||||||||
257 | LoopInvariantCodeMotion LICM; | ||||||||
258 | }; | ||||||||
259 | } // namespace | ||||||||
260 | |||||||||
261 | PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM, | ||||||||
262 | LoopStandardAnalysisResults &AR, LPMUpdater &) { | ||||||||
263 | // For the new PM, we also can't use OptimizationRemarkEmitter as an analysis | ||||||||
264 | // pass. Function analyses need to be preserved across loop transformations | ||||||||
265 | // but ORE cannot be preserved (see comment before the pass definition). | ||||||||
266 | OptimizationRemarkEmitter ORE(L.getHeader()->getParent()); | ||||||||
267 | |||||||||
268 | LoopInvariantCodeMotion LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap); | ||||||||
269 | if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, AR.BFI, &AR.TLI, &AR.TTI, | ||||||||
270 | &AR.SE, AR.MSSA, &ORE)) | ||||||||
271 | return PreservedAnalyses::all(); | ||||||||
272 | |||||||||
273 | auto PA = getLoopPassPreservedAnalyses(); | ||||||||
274 | |||||||||
275 | PA.preserve<DominatorTreeAnalysis>(); | ||||||||
276 | PA.preserve<LoopAnalysis>(); | ||||||||
277 | if (AR.MSSA) | ||||||||
278 | PA.preserve<MemorySSAAnalysis>(); | ||||||||
279 | |||||||||
280 | return PA; | ||||||||
281 | } | ||||||||
282 | |||||||||
283 | char LegacyLICMPass::ID = 0; | ||||||||
284 | INITIALIZE_PASS_BEGIN(LegacyLICMPass, "licm", "Loop Invariant Code Motion",static void *initializeLegacyLICMPassPassOnce(PassRegistry & Registry) { | ||||||||
285 | false, false)static void *initializeLegacyLICMPassPassOnce(PassRegistry & Registry) { | ||||||||
286 | INITIALIZE_PASS_DEPENDENCY(LoopPass)initializeLoopPassPass(Registry); | ||||||||
287 | INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)initializeTargetLibraryInfoWrapperPassPass(Registry); | ||||||||
288 | INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)initializeTargetTransformInfoWrapperPassPass(Registry); | ||||||||
289 | INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)initializeMemorySSAWrapperPassPass(Registry); | ||||||||
290 | INITIALIZE_PASS_DEPENDENCY(LazyBFIPass)initializeLazyBFIPassPass(Registry); | ||||||||
291 | INITIALIZE_PASS_END(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false,PassInfo *PI = new PassInfo( "Loop Invariant Code Motion", "licm" , &LegacyLICMPass::ID, PassInfo::NormalCtor_t(callDefaultCtor <LegacyLICMPass>), false, false); Registry.registerPass (*PI, true); return PI; } static llvm::once_flag InitializeLegacyLICMPassPassFlag ; void llvm::initializeLegacyLICMPassPass(PassRegistry &Registry ) { llvm::call_once(InitializeLegacyLICMPassPassFlag, initializeLegacyLICMPassPassOnce , std::ref(Registry)); } | ||||||||
292 | false)PassInfo *PI = new PassInfo( "Loop Invariant Code Motion", "licm" , &LegacyLICMPass::ID, PassInfo::NormalCtor_t(callDefaultCtor <LegacyLICMPass>), false, false); Registry.registerPass (*PI, true); return PI; } static llvm::once_flag InitializeLegacyLICMPassPassFlag ; void llvm::initializeLegacyLICMPassPass(PassRegistry &Registry ) { llvm::call_once(InitializeLegacyLICMPassPassFlag, initializeLegacyLICMPassPassOnce , std::ref(Registry)); } | ||||||||
293 | |||||||||
294 | Pass *llvm::createLICMPass() { return new LegacyLICMPass(); } | ||||||||
295 | Pass *llvm::createLICMPass(unsigned LicmMssaOptCap, | ||||||||
296 | unsigned LicmMssaNoAccForPromotionCap) { | ||||||||
297 | return new LegacyLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap); | ||||||||
298 | } | ||||||||
299 | |||||||||
300 | /// Hoist expressions out of the specified loop. Note, alias info for inner | ||||||||
301 | /// loop is not preserved so it is not a good idea to run LICM multiple | ||||||||
302 | /// times on one loop. | ||||||||
303 | bool LoopInvariantCodeMotion::runOnLoop( | ||||||||
304 | Loop *L, AAResults *AA, LoopInfo *LI, DominatorTree *DT, | ||||||||
305 | BlockFrequencyInfo *BFI, TargetLibraryInfo *TLI, TargetTransformInfo *TTI, | ||||||||
306 | ScalarEvolution *SE, MemorySSA *MSSA, OptimizationRemarkEmitter *ORE) { | ||||||||
307 | bool Changed = false; | ||||||||
308 | |||||||||
309 | assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form.")((L->isLCSSAForm(*DT) && "Loop is not in LCSSA form." ) ? static_cast<void> (0) : __assert_fail ("L->isLCSSAForm(*DT) && \"Loop is not in LCSSA form.\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 309, __PRETTY_FUNCTION__)); | ||||||||
310 | |||||||||
311 | // If this loop has metadata indicating that LICM is not to be performed then | ||||||||
312 | // just exit. | ||||||||
313 | if (hasDisableLICMTransformsHint(L)) { | ||||||||
314 | return false; | ||||||||
315 | } | ||||||||
316 | |||||||||
317 | std::unique_ptr<AliasSetTracker> CurAST; | ||||||||
318 | std::unique_ptr<MemorySSAUpdater> MSSAU; | ||||||||
319 | bool NoOfMemAccTooLarge = false; | ||||||||
320 | unsigned LicmMssaOptCounter = 0; | ||||||||
321 | |||||||||
322 | if (!MSSA) { | ||||||||
323 | LLVM_DEBUG(dbgs() << "LICM: Using Alias Set Tracker.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("licm")) { dbgs() << "LICM: Using Alias Set Tracker.\n" ; } } while (false); | ||||||||
324 | CurAST = collectAliasInfoForLoop(L, LI, AA); | ||||||||
325 | } else { | ||||||||
326 | LLVM_DEBUG(dbgs() << "LICM: Using MemorySSA.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("licm")) { dbgs() << "LICM: Using MemorySSA.\n"; } } while (false); | ||||||||
327 | MSSAU = std::make_unique<MemorySSAUpdater>(MSSA); | ||||||||
328 | |||||||||
329 | unsigned AccessCapCount = 0; | ||||||||
330 | for (auto *BB : L->getBlocks()) { | ||||||||
331 | if (auto *Accesses = MSSA->getBlockAccesses(BB)) { | ||||||||
332 | for (const auto &MA : *Accesses) { | ||||||||
333 | (void)MA; | ||||||||
334 | AccessCapCount++; | ||||||||
335 | if (AccessCapCount > LicmMssaNoAccForPromotionCap) { | ||||||||
336 | NoOfMemAccTooLarge = true; | ||||||||
337 | break; | ||||||||
338 | } | ||||||||
339 | } | ||||||||
340 | } | ||||||||
341 | if (NoOfMemAccTooLarge) | ||||||||
342 | break; | ||||||||
343 | } | ||||||||
344 | } | ||||||||
345 | |||||||||
346 | // Get the preheader block to move instructions into... | ||||||||
347 | BasicBlock *Preheader = L->getLoopPreheader(); | ||||||||
348 | |||||||||
349 | // Compute loop safety information. | ||||||||
350 | ICFLoopSafetyInfo SafetyInfo; | ||||||||
351 | SafetyInfo.computeLoopSafetyInfo(L); | ||||||||
352 | |||||||||
353 | // We want to visit all of the instructions in this loop... that are not parts | ||||||||
354 | // of our subloops (they have already had their invariants hoisted out of | ||||||||
355 | // their loop, into this loop, so there is no need to process the BODIES of | ||||||||
356 | // the subloops). | ||||||||
357 | // | ||||||||
358 | // Traverse the body of the loop in depth first order on the dominator tree so | ||||||||
359 | // that we are guaranteed to see definitions before we see uses. This allows | ||||||||
360 | // us to sink instructions in one pass, without iteration. After sinking | ||||||||
361 | // instructions, we perform another pass to hoist them out of the loop. | ||||||||
362 | SinkAndHoistLICMFlags Flags = {NoOfMemAccTooLarge, LicmMssaOptCounter, | ||||||||
363 | LicmMssaOptCap, LicmMssaNoAccForPromotionCap, | ||||||||
364 | /*IsSink=*/true}; | ||||||||
365 | if (L->hasDedicatedExits()) | ||||||||
366 | Changed |= | ||||||||
367 | sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, TTI, L, | ||||||||
368 | CurAST.get(), MSSAU.get(), &SafetyInfo, Flags, ORE); | ||||||||
369 | Flags.IsSink = false; | ||||||||
370 | if (Preheader) | ||||||||
371 | Changed |= | ||||||||
372 | hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, L, | ||||||||
373 | CurAST.get(), MSSAU.get(), SE, &SafetyInfo, Flags, ORE); | ||||||||
374 | |||||||||
375 | // Now that all loop invariants have been removed from the loop, promote any | ||||||||
376 | // memory references to scalars that we can. | ||||||||
377 | // Don't sink stores from loops without dedicated block exits. Exits | ||||||||
378 | // containing indirect branches are not transformed by loop simplify, | ||||||||
379 | // make sure we catch that. An additional load may be generated in the | ||||||||
380 | // preheader for SSA updater, so also avoid sinking when no preheader | ||||||||
381 | // is available. | ||||||||
382 | if (!DisablePromotion && Preheader && L->hasDedicatedExits() && | ||||||||
383 | !NoOfMemAccTooLarge) { | ||||||||
384 | // Figure out the loop exits and their insertion points | ||||||||
385 | SmallVector<BasicBlock *, 8> ExitBlocks; | ||||||||
386 | L->getUniqueExitBlocks(ExitBlocks); | ||||||||
387 | |||||||||
388 | // We can't insert into a catchswitch. | ||||||||
389 | bool HasCatchSwitch = llvm::any_of(ExitBlocks, [](BasicBlock *Exit) { | ||||||||
390 | return isa<CatchSwitchInst>(Exit->getTerminator()); | ||||||||
391 | }); | ||||||||
392 | |||||||||
393 | if (!HasCatchSwitch) { | ||||||||
394 | SmallVector<Instruction *, 8> InsertPts; | ||||||||
395 | SmallVector<MemoryAccess *, 8> MSSAInsertPts; | ||||||||
396 | InsertPts.reserve(ExitBlocks.size()); | ||||||||
397 | if (MSSAU) | ||||||||
398 | MSSAInsertPts.reserve(ExitBlocks.size()); | ||||||||
399 | for (BasicBlock *ExitBlock : ExitBlocks) { | ||||||||
400 | InsertPts.push_back(&*ExitBlock->getFirstInsertionPt()); | ||||||||
401 | if (MSSAU) | ||||||||
402 | MSSAInsertPts.push_back(nullptr); | ||||||||
403 | } | ||||||||
404 | |||||||||
405 | PredIteratorCache PIC; | ||||||||
406 | |||||||||
407 | bool Promoted = false; | ||||||||
408 | |||||||||
409 | // Build an AST using MSSA. | ||||||||
410 | if (!CurAST.get()) | ||||||||
411 | CurAST = collectAliasInfoForLoopWithMSSA(L, AA, MSSAU.get()); | ||||||||
412 | |||||||||
413 | // Loop over all of the alias sets in the tracker object. | ||||||||
414 | for (AliasSet &AS : *CurAST) { | ||||||||
415 | // We can promote this alias set if it has a store, if it is a "Must" | ||||||||
416 | // alias set, if the pointer is loop invariant, and if we are not | ||||||||
417 | // eliminating any volatile loads or stores. | ||||||||
418 | if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() || | ||||||||
419 | !L->isLoopInvariant(AS.begin()->getValue())) | ||||||||
420 | continue; | ||||||||
421 | |||||||||
422 | assert(((!AS.empty() && "Must alias set should have at least one pointer element in it!" ) ? static_cast<void> (0) : __assert_fail ("!AS.empty() && \"Must alias set should have at least one pointer element in it!\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 424, __PRETTY_FUNCTION__)) | ||||||||
423 | !AS.empty() &&((!AS.empty() && "Must alias set should have at least one pointer element in it!" ) ? static_cast<void> (0) : __assert_fail ("!AS.empty() && \"Must alias set should have at least one pointer element in it!\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 424, __PRETTY_FUNCTION__)) | ||||||||
424 | "Must alias set should have at least one pointer element in it!")((!AS.empty() && "Must alias set should have at least one pointer element in it!" ) ? static_cast<void> (0) : __assert_fail ("!AS.empty() && \"Must alias set should have at least one pointer element in it!\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 424, __PRETTY_FUNCTION__)); | ||||||||
425 | |||||||||
426 | SmallSetVector<Value *, 8> PointerMustAliases; | ||||||||
427 | for (const auto &ASI : AS) | ||||||||
428 | PointerMustAliases.insert(ASI.getValue()); | ||||||||
429 | |||||||||
430 | Promoted |= promoteLoopAccessesToScalars( | ||||||||
431 | PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, LI, | ||||||||
432 | DT, TLI, L, CurAST.get(), MSSAU.get(), &SafetyInfo, ORE); | ||||||||
433 | } | ||||||||
434 | |||||||||
435 | // Once we have promoted values across the loop body we have to | ||||||||
436 | // recursively reform LCSSA as any nested loop may now have values defined | ||||||||
437 | // within the loop used in the outer loop. | ||||||||
438 | // FIXME: This is really heavy handed. It would be a bit better to use an | ||||||||
439 | // SSAUpdater strategy during promotion that was LCSSA aware and reformed | ||||||||
440 | // it as it went. | ||||||||
441 | if (Promoted) | ||||||||
442 | formLCSSARecursively(*L, *DT, LI, SE); | ||||||||
443 | |||||||||
444 | Changed |= Promoted; | ||||||||
445 | } | ||||||||
446 | } | ||||||||
447 | |||||||||
448 | // Check that neither this loop nor its parent have had LCSSA broken. LICM is | ||||||||
449 | // specifically moving instructions across the loop boundary and so it is | ||||||||
450 | // especially in need of sanity checking here. | ||||||||
451 | assert(L->isLCSSAForm(*DT) && "Loop not left in LCSSA form after LICM!")((L->isLCSSAForm(*DT) && "Loop not left in LCSSA form after LICM!" ) ? static_cast<void> (0) : __assert_fail ("L->isLCSSAForm(*DT) && \"Loop not left in LCSSA form after LICM!\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 451, __PRETTY_FUNCTION__)); | ||||||||
452 | assert((L->isOutermost() || L->getParentLoop()->isLCSSAForm(*DT)) &&(((L->isOutermost() || L->getParentLoop()->isLCSSAForm (*DT)) && "Parent loop not left in LCSSA form after LICM!" ) ? static_cast<void> (0) : __assert_fail ("(L->isOutermost() || L->getParentLoop()->isLCSSAForm(*DT)) && \"Parent loop not left in LCSSA form after LICM!\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 453, __PRETTY_FUNCTION__)) | ||||||||
453 | "Parent loop not left in LCSSA form after LICM!")(((L->isOutermost() || L->getParentLoop()->isLCSSAForm (*DT)) && "Parent loop not left in LCSSA form after LICM!" ) ? static_cast<void> (0) : __assert_fail ("(L->isOutermost() || L->getParentLoop()->isLCSSAForm(*DT)) && \"Parent loop not left in LCSSA form after LICM!\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 453, __PRETTY_FUNCTION__)); | ||||||||
454 | |||||||||
455 | if (MSSAU.get() && VerifyMemorySSA) | ||||||||
456 | MSSAU->getMemorySSA()->verifyMemorySSA(); | ||||||||
457 | |||||||||
458 | if (Changed && SE) | ||||||||
459 | SE->forgetLoopDispositions(L); | ||||||||
460 | return Changed; | ||||||||
461 | } | ||||||||
462 | |||||||||
463 | /// Walk the specified region of the CFG (defined by all blocks dominated by | ||||||||
464 | /// the specified block, and that are in the current loop) in reverse depth | ||||||||
465 | /// first order w.r.t the DominatorTree. This allows us to visit uses before | ||||||||
466 | /// definitions, allowing us to sink a loop body in one pass without iteration. | ||||||||
467 | /// | ||||||||
468 | bool llvm::sinkRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI, | ||||||||
469 | DominatorTree *DT, BlockFrequencyInfo *BFI, | ||||||||
470 | TargetLibraryInfo *TLI, TargetTransformInfo *TTI, | ||||||||
471 | Loop *CurLoop, AliasSetTracker *CurAST, | ||||||||
472 | MemorySSAUpdater *MSSAU, ICFLoopSafetyInfo *SafetyInfo, | ||||||||
473 | SinkAndHoistLICMFlags &Flags, | ||||||||
474 | OptimizationRemarkEmitter *ORE) { | ||||||||
475 | |||||||||
476 | // Verify inputs. | ||||||||
477 | assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&((N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && CurLoop != nullptr && SafetyInfo != nullptr && "Unexpected input to sinkRegion." ) ? static_cast<void> (0) : __assert_fail ("N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && CurLoop != nullptr && SafetyInfo != nullptr && \"Unexpected input to sinkRegion.\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 479, __PRETTY_FUNCTION__)) | ||||||||
478 | CurLoop != nullptr && SafetyInfo != nullptr &&((N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && CurLoop != nullptr && SafetyInfo != nullptr && "Unexpected input to sinkRegion." ) ? static_cast<void> (0) : __assert_fail ("N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && CurLoop != nullptr && SafetyInfo != nullptr && \"Unexpected input to sinkRegion.\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 479, __PRETTY_FUNCTION__)) | ||||||||
479 | "Unexpected input to sinkRegion.")((N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && CurLoop != nullptr && SafetyInfo != nullptr && "Unexpected input to sinkRegion." ) ? static_cast<void> (0) : __assert_fail ("N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && CurLoop != nullptr && SafetyInfo != nullptr && \"Unexpected input to sinkRegion.\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 479, __PRETTY_FUNCTION__)); | ||||||||
480 | assert(((CurAST != nullptr) ^ (MSSAU != nullptr)) &&((((CurAST != nullptr) ^ (MSSAU != nullptr)) && "Either AliasSetTracker or MemorySSA should be initialized." ) ? static_cast<void> (0) : __assert_fail ("((CurAST != nullptr) ^ (MSSAU != nullptr)) && \"Either AliasSetTracker or MemorySSA should be initialized.\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 481, __PRETTY_FUNCTION__)) | ||||||||
481 | "Either AliasSetTracker or MemorySSA should be initialized.")((((CurAST != nullptr) ^ (MSSAU != nullptr)) && "Either AliasSetTracker or MemorySSA should be initialized." ) ? static_cast<void> (0) : __assert_fail ("((CurAST != nullptr) ^ (MSSAU != nullptr)) && \"Either AliasSetTracker or MemorySSA should be initialized.\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 481, __PRETTY_FUNCTION__)); | ||||||||
482 | |||||||||
483 | // We want to visit children before parents. We will enque all the parents | ||||||||
484 | // before their children in the worklist and process the worklist in reverse | ||||||||
485 | // order. | ||||||||
486 | SmallVector<DomTreeNode *, 16> Worklist = collectChildrenInLoop(N, CurLoop); | ||||||||
487 | |||||||||
488 | bool Changed = false; | ||||||||
489 | for (DomTreeNode *DTN : reverse(Worklist)) { | ||||||||
490 | BasicBlock *BB = DTN->getBlock(); | ||||||||
491 | // Only need to process the contents of this block if it is not part of a | ||||||||
492 | // subloop (which would already have been processed). | ||||||||
493 | if (inSubLoop(BB, CurLoop, LI)) | ||||||||
494 | continue; | ||||||||
495 | |||||||||
496 | for (BasicBlock::iterator II = BB->end(); II != BB->begin();) { | ||||||||
497 | Instruction &I = *--II; | ||||||||
498 | |||||||||
499 | // If the instruction is dead, we would try to sink it because it isn't | ||||||||
500 | // used in the loop, instead, just delete it. | ||||||||
501 | if (isInstructionTriviallyDead(&I, TLI)) { | ||||||||
502 | LLVM_DEBUG(dbgs() << "LICM deleting dead inst: " << I << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("licm")) { dbgs() << "LICM deleting dead inst: " << I << '\n'; } } while (false); | ||||||||
503 | salvageKnowledge(&I); | ||||||||
504 | salvageDebugInfo(I); | ||||||||
505 | ++II; | ||||||||
506 | eraseInstruction(I, *SafetyInfo, CurAST, MSSAU); | ||||||||
507 | Changed = true; | ||||||||
508 | continue; | ||||||||
509 | } | ||||||||
510 | |||||||||
511 | // Check to see if we can sink this instruction to the exit blocks | ||||||||
512 | // of the loop. We can do this if the all users of the instruction are | ||||||||
513 | // outside of the loop. In this case, it doesn't even matter if the | ||||||||
514 | // operands of the instruction are loop invariant. | ||||||||
515 | // | ||||||||
516 | bool FreeInLoop = false; | ||||||||
517 | if (!I.mayHaveSideEffects() && | ||||||||
518 | isNotUsedOrFreeInLoop(I, CurLoop, SafetyInfo, TTI, FreeInLoop) && | ||||||||
519 | canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, &Flags, | ||||||||
520 | ORE)) { | ||||||||
521 | if (sink(I, LI, DT, BFI, CurLoop, SafetyInfo, MSSAU, ORE)) { | ||||||||
522 | if (!FreeInLoop) { | ||||||||
523 | ++II; | ||||||||
524 | salvageDebugInfo(I); | ||||||||
525 | eraseInstruction(I, *SafetyInfo, CurAST, MSSAU); | ||||||||
526 | } | ||||||||
527 | Changed = true; | ||||||||
528 | } | ||||||||
529 | } | ||||||||
530 | } | ||||||||
531 | } | ||||||||
532 | if (MSSAU && VerifyMemorySSA) | ||||||||
533 | MSSAU->getMemorySSA()->verifyMemorySSA(); | ||||||||
534 | return Changed; | ||||||||
535 | } | ||||||||
536 | |||||||||
537 | namespace { | ||||||||
538 | // This is a helper class for hoistRegion to make it able to hoist control flow | ||||||||
539 | // in order to be able to hoist phis. The way this works is that we initially | ||||||||
540 | // start hoisting to the loop preheader, and when we see a loop invariant branch | ||||||||
541 | // we make note of this. When we then come to hoist an instruction that's | ||||||||
542 | // conditional on such a branch we duplicate the branch and the relevant control | ||||||||
543 | // flow, then hoist the instruction into the block corresponding to its original | ||||||||
544 | // block in the duplicated control flow. | ||||||||
545 | class ControlFlowHoister { | ||||||||
546 | private: | ||||||||
547 | // Information about the loop we are hoisting from | ||||||||
548 | LoopInfo *LI; | ||||||||
549 | DominatorTree *DT; | ||||||||
550 | Loop *CurLoop; | ||||||||
551 | MemorySSAUpdater *MSSAU; | ||||||||
552 | |||||||||
553 | // A map of blocks in the loop to the block their instructions will be hoisted | ||||||||
554 | // to. | ||||||||
555 | DenseMap<BasicBlock *, BasicBlock *> HoistDestinationMap; | ||||||||
556 | |||||||||
557 | // The branches that we can hoist, mapped to the block that marks a | ||||||||
558 | // convergence point of their control flow. | ||||||||
559 | DenseMap<BranchInst *, BasicBlock *> HoistableBranches; | ||||||||
560 | |||||||||
561 | public: | ||||||||
562 | ControlFlowHoister(LoopInfo *LI, DominatorTree *DT, Loop *CurLoop, | ||||||||
563 | MemorySSAUpdater *MSSAU) | ||||||||
564 | : LI(LI), DT(DT), CurLoop(CurLoop), MSSAU(MSSAU) {} | ||||||||
565 | |||||||||
566 | void registerPossiblyHoistableBranch(BranchInst *BI) { | ||||||||
567 | // We can only hoist conditional branches with loop invariant operands. | ||||||||
568 | if (!ControlFlowHoisting || !BI->isConditional() || | ||||||||
569 | !CurLoop->hasLoopInvariantOperands(BI)) | ||||||||
570 | return; | ||||||||
571 | |||||||||
572 | // The branch destinations need to be in the loop, and we don't gain | ||||||||
573 | // anything by duplicating conditional branches with duplicate successors, | ||||||||
574 | // as it's essentially the same as an unconditional branch. | ||||||||
575 | BasicBlock *TrueDest = BI->getSuccessor(0); | ||||||||
576 | BasicBlock *FalseDest = BI->getSuccessor(1); | ||||||||
577 | if (!CurLoop->contains(TrueDest) || !CurLoop->contains(FalseDest) || | ||||||||
578 | TrueDest == FalseDest) | ||||||||
579 | return; | ||||||||
580 | |||||||||
581 | // We can hoist BI if one branch destination is the successor of the other, | ||||||||
582 | // or both have common successor which we check by seeing if the | ||||||||
583 | // intersection of their successors is non-empty. | ||||||||
584 | // TODO: This could be expanded to allowing branches where both ends | ||||||||
585 | // eventually converge to a single block. | ||||||||
586 | SmallPtrSet<BasicBlock *, 4> TrueDestSucc, FalseDestSucc; | ||||||||
587 | TrueDestSucc.insert(succ_begin(TrueDest), succ_end(TrueDest)); | ||||||||
588 | FalseDestSucc.insert(succ_begin(FalseDest), succ_end(FalseDest)); | ||||||||
589 | BasicBlock *CommonSucc = nullptr; | ||||||||
590 | if (TrueDestSucc.count(FalseDest)) { | ||||||||
591 | CommonSucc = FalseDest; | ||||||||
592 | } else if (FalseDestSucc.count(TrueDest)) { | ||||||||
593 | CommonSucc = TrueDest; | ||||||||
594 | } else { | ||||||||
595 | set_intersect(TrueDestSucc, FalseDestSucc); | ||||||||
596 | // If there's one common successor use that. | ||||||||
597 | if (TrueDestSucc.size() == 1) | ||||||||
598 | CommonSucc = *TrueDestSucc.begin(); | ||||||||
599 | // If there's more than one pick whichever appears first in the block list | ||||||||
600 | // (we can't use the value returned by TrueDestSucc.begin() as it's | ||||||||
601 | // unpredicatable which element gets returned). | ||||||||
602 | else if (!TrueDestSucc.empty()) { | ||||||||
603 | Function *F = TrueDest->getParent(); | ||||||||
604 | auto IsSucc = [&](BasicBlock &BB) { return TrueDestSucc.count(&BB); }; | ||||||||
605 | auto It = std::find_if(F->begin(), F->end(), IsSucc); | ||||||||
606 | assert(It != F->end() && "Could not find successor in function")((It != F->end() && "Could not find successor in function" ) ? static_cast<void> (0) : __assert_fail ("It != F->end() && \"Could not find successor in function\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 606, __PRETTY_FUNCTION__)); | ||||||||
607 | CommonSucc = &*It; | ||||||||
608 | } | ||||||||
609 | } | ||||||||
610 | // The common successor has to be dominated by the branch, as otherwise | ||||||||
611 | // there will be some other path to the successor that will not be | ||||||||
612 | // controlled by this branch so any phi we hoist would be controlled by the | ||||||||
613 | // wrong condition. This also takes care of avoiding hoisting of loop back | ||||||||
614 | // edges. | ||||||||
615 | // TODO: In some cases this could be relaxed if the successor is dominated | ||||||||
616 | // by another block that's been hoisted and we can guarantee that the | ||||||||
617 | // control flow has been replicated exactly. | ||||||||
618 | if (CommonSucc && DT->dominates(BI, CommonSucc)) | ||||||||
619 | HoistableBranches[BI] = CommonSucc; | ||||||||
620 | } | ||||||||
621 | |||||||||
622 | bool canHoistPHI(PHINode *PN) { | ||||||||
623 | // The phi must have loop invariant operands. | ||||||||
624 | if (!ControlFlowHoisting || !CurLoop->hasLoopInvariantOperands(PN)) | ||||||||
625 | return false; | ||||||||
626 | // We can hoist phis if the block they are in is the target of hoistable | ||||||||
627 | // branches which cover all of the predecessors of the block. | ||||||||
628 | SmallPtrSet<BasicBlock *, 8> PredecessorBlocks; | ||||||||
629 | BasicBlock *BB = PN->getParent(); | ||||||||
630 | for (BasicBlock *PredBB : predecessors(BB)) | ||||||||
631 | PredecessorBlocks.insert(PredBB); | ||||||||
632 | // If we have less predecessor blocks than predecessors then the phi will | ||||||||
633 | // have more than one incoming value for the same block which we can't | ||||||||
634 | // handle. | ||||||||
635 | // TODO: This could be handled be erasing some of the duplicate incoming | ||||||||
636 | // values. | ||||||||
637 | if (PredecessorBlocks.size() != pred_size(BB)) | ||||||||
638 | return false; | ||||||||
639 | for (auto &Pair : HoistableBranches) { | ||||||||
640 | if (Pair.second == BB) { | ||||||||
641 | // Which blocks are predecessors via this branch depends on if the | ||||||||
642 | // branch is triangle-like or diamond-like. | ||||||||
643 | if (Pair.first->getSuccessor(0) == BB) { | ||||||||
644 | PredecessorBlocks.erase(Pair.first->getParent()); | ||||||||
645 | PredecessorBlocks.erase(Pair.first->getSuccessor(1)); | ||||||||
646 | } else if (Pair.first->getSuccessor(1) == BB) { | ||||||||
647 | PredecessorBlocks.erase(Pair.first->getParent()); | ||||||||
648 | PredecessorBlocks.erase(Pair.first->getSuccessor(0)); | ||||||||
649 | } else { | ||||||||
650 | PredecessorBlocks.erase(Pair.first->getSuccessor(0)); | ||||||||
651 | PredecessorBlocks.erase(Pair.first->getSuccessor(1)); | ||||||||
652 | } | ||||||||
653 | } | ||||||||
654 | } | ||||||||
655 | // PredecessorBlocks will now be empty if for every predecessor of BB we | ||||||||
656 | // found a hoistable branch source. | ||||||||
657 | return PredecessorBlocks.empty(); | ||||||||
658 | } | ||||||||
659 | |||||||||
660 | BasicBlock *getOrCreateHoistedBlock(BasicBlock *BB) { | ||||||||
661 | if (!ControlFlowHoisting) | ||||||||
662 | return CurLoop->getLoopPreheader(); | ||||||||
663 | // If BB has already been hoisted, return that | ||||||||
664 | if (HoistDestinationMap.count(BB)) | ||||||||
665 | return HoistDestinationMap[BB]; | ||||||||
666 | |||||||||
667 | // Check if this block is conditional based on a pending branch | ||||||||
668 | auto HasBBAsSuccessor = | ||||||||
669 | [&](DenseMap<BranchInst *, BasicBlock *>::value_type &Pair) { | ||||||||
670 | return BB != Pair.second && (Pair.first->getSuccessor(0) == BB || | ||||||||
671 | Pair.first->getSuccessor(1) == BB); | ||||||||
672 | }; | ||||||||
673 | auto It = std::find_if(HoistableBranches.begin(), HoistableBranches.end(), | ||||||||
674 | HasBBAsSuccessor); | ||||||||
675 | |||||||||
676 | // If not involved in a pending branch, hoist to preheader | ||||||||
677 | BasicBlock *InitialPreheader = CurLoop->getLoopPreheader(); | ||||||||
678 | if (It == HoistableBranches.end()) { | ||||||||
679 | LLVM_DEBUG(dbgs() << "LICM using " << InitialPreheader->getName()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("licm")) { dbgs() << "LICM using " << InitialPreheader ->getName() << " as hoist destination for " << BB->getName() << "\n"; } } while (false) | ||||||||
680 | << " as hoist destination for " << BB->getName()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("licm")) { dbgs() << "LICM using " << InitialPreheader ->getName() << " as hoist destination for " << BB->getName() << "\n"; } } while (false) | ||||||||
681 | << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("licm")) { dbgs() << "LICM using " << InitialPreheader ->getName() << " as hoist destination for " << BB->getName() << "\n"; } } while (false); | ||||||||
682 | HoistDestinationMap[BB] = InitialPreheader; | ||||||||
683 | return InitialPreheader; | ||||||||
684 | } | ||||||||
685 | BranchInst *BI = It->first; | ||||||||
686 | assert(std::find_if(++It, HoistableBranches.end(), HasBBAsSuccessor) ==((std::find_if(++It, HoistableBranches.end(), HasBBAsSuccessor ) == HoistableBranches.end() && "BB is expected to be the target of at most one branch" ) ? static_cast<void> (0) : __assert_fail ("std::find_if(++It, HoistableBranches.end(), HasBBAsSuccessor) == HoistableBranches.end() && \"BB is expected to be the target of at most one branch\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 688, __PRETTY_FUNCTION__)) | ||||||||
687 | HoistableBranches.end() &&((std::find_if(++It, HoistableBranches.end(), HasBBAsSuccessor ) == HoistableBranches.end() && "BB is expected to be the target of at most one branch" ) ? static_cast<void> (0) : __assert_fail ("std::find_if(++It, HoistableBranches.end(), HasBBAsSuccessor) == HoistableBranches.end() && \"BB is expected to be the target of at most one branch\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 688, __PRETTY_FUNCTION__)) | ||||||||
688 | "BB is expected to be the target of at most one branch")((std::find_if(++It, HoistableBranches.end(), HasBBAsSuccessor ) == HoistableBranches.end() && "BB is expected to be the target of at most one branch" ) ? static_cast<void> (0) : __assert_fail ("std::find_if(++It, HoistableBranches.end(), HasBBAsSuccessor) == HoistableBranches.end() && \"BB is expected to be the target of at most one branch\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 688, __PRETTY_FUNCTION__)); | ||||||||
689 | |||||||||
690 | LLVMContext &C = BB->getContext(); | ||||||||
691 | BasicBlock *TrueDest = BI->getSuccessor(0); | ||||||||
692 | BasicBlock *FalseDest = BI->getSuccessor(1); | ||||||||
693 | BasicBlock *CommonSucc = HoistableBranches[BI]; | ||||||||
694 | BasicBlock *HoistTarget = getOrCreateHoistedBlock(BI->getParent()); | ||||||||
695 | |||||||||
696 | // Create hoisted versions of blocks that currently don't have them | ||||||||
697 | auto CreateHoistedBlock = [&](BasicBlock *Orig) { | ||||||||
698 | if (HoistDestinationMap.count(Orig)) | ||||||||
699 | return HoistDestinationMap[Orig]; | ||||||||
700 | BasicBlock *New = | ||||||||
701 | BasicBlock::Create(C, Orig->getName() + ".licm", Orig->getParent()); | ||||||||
702 | HoistDestinationMap[Orig] = New; | ||||||||
703 | DT->addNewBlock(New, HoistTarget); | ||||||||
704 | if (CurLoop->getParentLoop()) | ||||||||
705 | CurLoop->getParentLoop()->addBasicBlockToLoop(New, *LI); | ||||||||
706 | ++NumCreatedBlocks; | ||||||||
707 | LLVM_DEBUG(dbgs() << "LICM created " << New->getName()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("licm")) { dbgs() << "LICM created " << New-> getName() << " as hoist destination for " << Orig ->getName() << "\n"; } } while (false) | ||||||||
708 | << " as hoist destination for " << Orig->getName()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("licm")) { dbgs() << "LICM created " << New-> getName() << " as hoist destination for " << Orig ->getName() << "\n"; } } while (false) | ||||||||
709 | << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("licm")) { dbgs() << "LICM created " << New-> getName() << " as hoist destination for " << Orig ->getName() << "\n"; } } while (false); | ||||||||
710 | return New; | ||||||||
711 | }; | ||||||||
712 | BasicBlock *HoistTrueDest = CreateHoistedBlock(TrueDest); | ||||||||
713 | BasicBlock *HoistFalseDest = CreateHoistedBlock(FalseDest); | ||||||||
714 | BasicBlock *HoistCommonSucc = CreateHoistedBlock(CommonSucc); | ||||||||
715 | |||||||||
716 | // Link up these blocks with branches. | ||||||||
717 | if (!HoistCommonSucc->getTerminator()) { | ||||||||
718 | // The new common successor we've generated will branch to whatever that | ||||||||
719 | // hoist target branched to. | ||||||||
720 | BasicBlock *TargetSucc = HoistTarget->getSingleSuccessor(); | ||||||||
721 | assert(TargetSucc && "Expected hoist target to have a single successor")((TargetSucc && "Expected hoist target to have a single successor" ) ? static_cast<void> (0) : __assert_fail ("TargetSucc && \"Expected hoist target to have a single successor\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 721, __PRETTY_FUNCTION__)); | ||||||||
722 | HoistCommonSucc->moveBefore(TargetSucc); | ||||||||
723 | BranchInst::Create(TargetSucc, HoistCommonSucc); | ||||||||
724 | } | ||||||||
725 | if (!HoistTrueDest->getTerminator()) { | ||||||||
726 | HoistTrueDest->moveBefore(HoistCommonSucc); | ||||||||
727 | BranchInst::Create(HoistCommonSucc, HoistTrueDest); | ||||||||
728 | } | ||||||||
729 | if (!HoistFalseDest->getTerminator()) { | ||||||||
730 | HoistFalseDest->moveBefore(HoistCommonSucc); | ||||||||
731 | BranchInst::Create(HoistCommonSucc, HoistFalseDest); | ||||||||
732 | } | ||||||||
733 | |||||||||
734 | // If BI is being cloned to what was originally the preheader then | ||||||||
735 | // HoistCommonSucc will now be the new preheader. | ||||||||
736 | if (HoistTarget == InitialPreheader) { | ||||||||
737 | // Phis in the loop header now need to use the new preheader. | ||||||||
738 | InitialPreheader->replaceSuccessorsPhiUsesWith(HoistCommonSucc); | ||||||||
739 | if (MSSAU) | ||||||||
740 | MSSAU->wireOldPredecessorsToNewImmediatePredecessor( | ||||||||
741 | HoistTarget->getSingleSuccessor(), HoistCommonSucc, {HoistTarget}); | ||||||||
742 | // The new preheader dominates the loop header. | ||||||||
743 | DomTreeNode *PreheaderNode = DT->getNode(HoistCommonSucc); | ||||||||
744 | DomTreeNode *HeaderNode = DT->getNode(CurLoop->getHeader()); | ||||||||
745 | DT->changeImmediateDominator(HeaderNode, PreheaderNode); | ||||||||
746 | // The preheader hoist destination is now the new preheader, with the | ||||||||
747 | // exception of the hoist destination of this branch. | ||||||||
748 | for (auto &Pair : HoistDestinationMap) | ||||||||
749 | if (Pair.second == InitialPreheader && Pair.first != BI->getParent()) | ||||||||
750 | Pair.second = HoistCommonSucc; | ||||||||
751 | } | ||||||||
752 | |||||||||
753 | // Now finally clone BI. | ||||||||
754 | ReplaceInstWithInst( | ||||||||
755 | HoistTarget->getTerminator(), | ||||||||
756 | BranchInst::Create(HoistTrueDest, HoistFalseDest, BI->getCondition())); | ||||||||
757 | ++NumClonedBranches; | ||||||||
758 | |||||||||
759 | assert(CurLoop->getLoopPreheader() &&((CurLoop->getLoopPreheader() && "Hoisting blocks should not have destroyed preheader" ) ? static_cast<void> (0) : __assert_fail ("CurLoop->getLoopPreheader() && \"Hoisting blocks should not have destroyed preheader\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 760, __PRETTY_FUNCTION__)) | ||||||||
760 | "Hoisting blocks should not have destroyed preheader")((CurLoop->getLoopPreheader() && "Hoisting blocks should not have destroyed preheader" ) ? static_cast<void> (0) : __assert_fail ("CurLoop->getLoopPreheader() && \"Hoisting blocks should not have destroyed preheader\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 760, __PRETTY_FUNCTION__)); | ||||||||
761 | return HoistDestinationMap[BB]; | ||||||||
762 | } | ||||||||
763 | }; | ||||||||
764 | } // namespace | ||||||||
765 | |||||||||
766 | // Hoisting/sinking instruction out of a loop isn't always beneficial. It's only | ||||||||
767 | // only worthwhile if the destination block is actually colder than current | ||||||||
768 | // block. | ||||||||
769 | static bool worthSinkOrHoistInst(Instruction &I, BasicBlock *DstBlock, | ||||||||
770 | OptimizationRemarkEmitter *ORE, | ||||||||
771 | BlockFrequencyInfo *BFI) { | ||||||||
772 | // Check block frequency only when runtime profile is available | ||||||||
773 | // to avoid pathological cases. With static profile, lean towards | ||||||||
774 | // hosting because it helps canonicalize the loop for vectorizer. | ||||||||
775 | if (!DstBlock->getParent()->hasProfileData()) | ||||||||
776 | return true; | ||||||||
777 | |||||||||
778 | if (!HoistSinkColdnessThreshold || !BFI) | ||||||||
779 | return true; | ||||||||
780 | |||||||||
781 | BasicBlock *SrcBlock = I.getParent(); | ||||||||
782 | if (BFI->getBlockFreq(DstBlock).getFrequency() / HoistSinkColdnessThreshold > | ||||||||
783 | BFI->getBlockFreq(SrcBlock).getFrequency()) { | ||||||||
784 | ORE->emit([&]() { | ||||||||
785 | return OptimizationRemarkMissed(DEBUG_TYPE"licm", "SinkHoistInst", &I) | ||||||||
786 | << "failed to sink or hoist instruction because containing block " | ||||||||
787 | "has lower frequency than destination block"; | ||||||||
788 | }); | ||||||||
789 | return false; | ||||||||
790 | } | ||||||||
791 | |||||||||
792 | return true; | ||||||||
793 | } | ||||||||
794 | |||||||||
795 | /// Walk the specified region of the CFG (defined by all blocks dominated by | ||||||||
796 | /// the specified block, and that are in the current loop) in depth first | ||||||||
797 | /// order w.r.t the DominatorTree. This allows us to visit definitions before | ||||||||
798 | /// uses, allowing us to hoist a loop body in one pass without iteration. | ||||||||
799 | /// | ||||||||
800 | bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI, | ||||||||
801 | DominatorTree *DT, BlockFrequencyInfo *BFI, | ||||||||
802 | TargetLibraryInfo *TLI, Loop *CurLoop, | ||||||||
803 | AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU, | ||||||||
804 | ScalarEvolution *SE, ICFLoopSafetyInfo *SafetyInfo, | ||||||||
805 | SinkAndHoistLICMFlags &Flags, | ||||||||
806 | OptimizationRemarkEmitter *ORE) { | ||||||||
807 | // Verify inputs. | ||||||||
808 | assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&((N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && CurLoop != nullptr && SafetyInfo != nullptr && "Unexpected input to hoistRegion." ) ? static_cast<void> (0) : __assert_fail ("N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && CurLoop != nullptr && SafetyInfo != nullptr && \"Unexpected input to hoistRegion.\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 810, __PRETTY_FUNCTION__)) | ||||||||
809 | CurLoop != nullptr && SafetyInfo != nullptr &&((N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && CurLoop != nullptr && SafetyInfo != nullptr && "Unexpected input to hoistRegion." ) ? static_cast<void> (0) : __assert_fail ("N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && CurLoop != nullptr && SafetyInfo != nullptr && \"Unexpected input to hoistRegion.\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 810, __PRETTY_FUNCTION__)) | ||||||||
810 | "Unexpected input to hoistRegion.")((N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && CurLoop != nullptr && SafetyInfo != nullptr && "Unexpected input to hoistRegion." ) ? static_cast<void> (0) : __assert_fail ("N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && CurLoop != nullptr && SafetyInfo != nullptr && \"Unexpected input to hoistRegion.\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 810, __PRETTY_FUNCTION__)); | ||||||||
811 | assert(((CurAST != nullptr) ^ (MSSAU != nullptr)) &&((((CurAST != nullptr) ^ (MSSAU != nullptr)) && "Either AliasSetTracker or MemorySSA should be initialized." ) ? static_cast<void> (0) : __assert_fail ("((CurAST != nullptr) ^ (MSSAU != nullptr)) && \"Either AliasSetTracker or MemorySSA should be initialized.\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 812, __PRETTY_FUNCTION__)) | ||||||||
812 | "Either AliasSetTracker or MemorySSA should be initialized.")((((CurAST != nullptr) ^ (MSSAU != nullptr)) && "Either AliasSetTracker or MemorySSA should be initialized." ) ? static_cast<void> (0) : __assert_fail ("((CurAST != nullptr) ^ (MSSAU != nullptr)) && \"Either AliasSetTracker or MemorySSA should be initialized.\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 812, __PRETTY_FUNCTION__)); | ||||||||
813 | |||||||||
814 | ControlFlowHoister CFH(LI, DT, CurLoop, MSSAU); | ||||||||
815 | |||||||||
816 | // Keep track of instructions that have been hoisted, as they may need to be | ||||||||
817 | // re-hoisted if they end up not dominating all of their uses. | ||||||||
818 | SmallVector<Instruction *, 16> HoistedInstructions; | ||||||||
819 | |||||||||
820 | // For PHI hoisting to work we need to hoist blocks before their successors. | ||||||||
821 | // We can do this by iterating through the blocks in the loop in reverse | ||||||||
822 | // post-order. | ||||||||
823 | LoopBlocksRPO Worklist(CurLoop); | ||||||||
824 | Worklist.perform(LI); | ||||||||
825 | bool Changed = false; | ||||||||
826 | for (BasicBlock *BB : Worklist) { | ||||||||
827 | // Only need to process the contents of this block if it is not part of a | ||||||||
828 | // subloop (which would already have been processed). | ||||||||
829 | if (inSubLoop(BB, CurLoop, LI)) | ||||||||
830 | continue; | ||||||||
831 | |||||||||
832 | for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) { | ||||||||
833 | Instruction &I = *II++; | ||||||||
834 | // Try constant folding this instruction. If all the operands are | ||||||||
835 | // constants, it is technically hoistable, but it would be better to | ||||||||
836 | // just fold it. | ||||||||
837 | if (Constant *C = ConstantFoldInstruction( | ||||||||
838 | &I, I.getModule()->getDataLayout(), TLI)) { | ||||||||
839 | LLVM_DEBUG(dbgs() << "LICM folding inst: " << I << " --> " << *Cdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("licm")) { dbgs() << "LICM folding inst: " << I << " --> " << *C << '\n'; } } while (false) | ||||||||
840 | << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("licm")) { dbgs() << "LICM folding inst: " << I << " --> " << *C << '\n'; } } while (false); | ||||||||
841 | if (CurAST) | ||||||||
842 | CurAST->copyValue(&I, C); | ||||||||
843 | // FIXME MSSA: Such replacements may make accesses unoptimized (D51960). | ||||||||
844 | I.replaceAllUsesWith(C); | ||||||||
845 | if (isInstructionTriviallyDead(&I, TLI)) | ||||||||
846 | eraseInstruction(I, *SafetyInfo, CurAST, MSSAU); | ||||||||
847 | Changed = true; | ||||||||
848 | continue; | ||||||||
849 | } | ||||||||
850 | |||||||||
851 | // Try hoisting the instruction out to the preheader. We can only do | ||||||||
852 | // this if all of the operands of the instruction are loop invariant and | ||||||||
853 | // if it is safe to hoist the instruction. We also check block frequency | ||||||||
854 | // to make sure instruction only gets hoisted into colder blocks. | ||||||||
855 | // TODO: It may be safe to hoist if we are hoisting to a conditional block | ||||||||
856 | // and we have accurately duplicated the control flow from the loop header | ||||||||
857 | // to that block. | ||||||||
858 | if (CurLoop->hasLoopInvariantOperands(&I) && | ||||||||
859 | canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, &Flags, | ||||||||
860 | ORE) && | ||||||||
861 | worthSinkOrHoistInst(I, CurLoop->getLoopPreheader(), ORE, BFI) && | ||||||||
862 | isSafeToExecuteUnconditionally( | ||||||||
863 | I, DT, CurLoop, SafetyInfo, ORE, | ||||||||
864 | CurLoop->getLoopPreheader()->getTerminator())) { | ||||||||
865 | hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo, | ||||||||
866 | MSSAU, SE, ORE); | ||||||||
867 | HoistedInstructions.push_back(&I); | ||||||||
868 | Changed = true; | ||||||||
869 | continue; | ||||||||
870 | } | ||||||||
871 | |||||||||
872 | // Attempt to remove floating point division out of the loop by | ||||||||
873 | // converting it to a reciprocal multiplication. | ||||||||
874 | if (I.getOpcode() == Instruction::FDiv && I.hasAllowReciprocal() && | ||||||||
875 | CurLoop->isLoopInvariant(I.getOperand(1))) { | ||||||||
876 | auto Divisor = I.getOperand(1); | ||||||||
877 | auto One = llvm::ConstantFP::get(Divisor->getType(), 1.0); | ||||||||
878 | auto ReciprocalDivisor = BinaryOperator::CreateFDiv(One, Divisor); | ||||||||
879 | ReciprocalDivisor->setFastMathFlags(I.getFastMathFlags()); | ||||||||
880 | SafetyInfo->insertInstructionTo(ReciprocalDivisor, I.getParent()); | ||||||||
881 | ReciprocalDivisor->insertBefore(&I); | ||||||||
882 | |||||||||
883 | auto Product = | ||||||||
884 | BinaryOperator::CreateFMul(I.getOperand(0), ReciprocalDivisor); | ||||||||
885 | Product->setFastMathFlags(I.getFastMathFlags()); | ||||||||
886 | SafetyInfo->insertInstructionTo(Product, I.getParent()); | ||||||||
887 | Product->insertAfter(&I); | ||||||||
888 | I.replaceAllUsesWith(Product); | ||||||||
889 | eraseInstruction(I, *SafetyInfo, CurAST, MSSAU); | ||||||||
890 | |||||||||
891 | hoist(*ReciprocalDivisor, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), | ||||||||
892 | SafetyInfo, MSSAU, SE, ORE); | ||||||||
893 | HoistedInstructions.push_back(ReciprocalDivisor); | ||||||||
894 | Changed = true; | ||||||||
895 | continue; | ||||||||
896 | } | ||||||||
897 | |||||||||
898 | auto IsInvariantStart = [&](Instruction &I) { | ||||||||
899 | using namespace PatternMatch; | ||||||||
900 | return I.use_empty() && | ||||||||
901 | match(&I, m_Intrinsic<Intrinsic::invariant_start>()); | ||||||||
902 | }; | ||||||||
903 | auto MustExecuteWithoutWritesBefore = [&](Instruction &I) { | ||||||||
904 | return SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop) && | ||||||||
905 | SafetyInfo->doesNotWriteMemoryBefore(I, CurLoop); | ||||||||
906 | }; | ||||||||
907 | if ((IsInvariantStart(I) || isGuard(&I)) && | ||||||||
908 | CurLoop->hasLoopInvariantOperands(&I) && | ||||||||
909 | MustExecuteWithoutWritesBefore(I)) { | ||||||||
910 | hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo, | ||||||||
911 | MSSAU, SE, ORE); | ||||||||
912 | HoistedInstructions.push_back(&I); | ||||||||
913 | Changed = true; | ||||||||
914 | continue; | ||||||||
915 | } | ||||||||
916 | |||||||||
917 | if (PHINode *PN = dyn_cast<PHINode>(&I)) { | ||||||||
918 | if (CFH.canHoistPHI(PN)) { | ||||||||
919 | // Redirect incoming blocks first to ensure that we create hoisted | ||||||||
920 | // versions of those blocks before we hoist the phi. | ||||||||
921 | for (unsigned int i = 0; i < PN->getNumIncomingValues(); ++i) | ||||||||
922 | PN->setIncomingBlock( | ||||||||
923 | i, CFH.getOrCreateHoistedBlock(PN->getIncomingBlock(i))); | ||||||||
924 | hoist(*PN, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo, | ||||||||
925 | MSSAU, SE, ORE); | ||||||||
926 | assert(DT->dominates(PN, BB) && "Conditional PHIs not expected")((DT->dominates(PN, BB) && "Conditional PHIs not expected" ) ? static_cast<void> (0) : __assert_fail ("DT->dominates(PN, BB) && \"Conditional PHIs not expected\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 926, __PRETTY_FUNCTION__)); | ||||||||
927 | Changed = true; | ||||||||
928 | continue; | ||||||||
929 | } | ||||||||
930 | } | ||||||||
931 | |||||||||
932 | // Remember possibly hoistable branches so we can actually hoist them | ||||||||
933 | // later if needed. | ||||||||
934 | if (BranchInst *BI = dyn_cast<BranchInst>(&I)) | ||||||||
935 | CFH.registerPossiblyHoistableBranch(BI); | ||||||||
936 | } | ||||||||
937 | } | ||||||||
938 | |||||||||
939 | // If we hoisted instructions to a conditional block they may not dominate | ||||||||
940 | // their uses that weren't hoisted (such as phis where some operands are not | ||||||||
941 | // loop invariant). If so make them unconditional by moving them to their | ||||||||
942 | // immediate dominator. We iterate through the instructions in reverse order | ||||||||
943 | // which ensures that when we rehoist an instruction we rehoist its operands, | ||||||||
944 | // and also keep track of where in the block we are rehoisting to to make sure | ||||||||
945 | // that we rehoist instructions before the instructions that use them. | ||||||||
946 | Instruction *HoistPoint = nullptr; | ||||||||
947 | if (ControlFlowHoisting) { | ||||||||
948 | for (Instruction *I : reverse(HoistedInstructions)) { | ||||||||
949 | if (!llvm::all_of(I->uses(), | ||||||||
950 | [&](Use &U) { return DT->dominates(I, U); })) { | ||||||||
951 | BasicBlock *Dominator = | ||||||||
952 | DT->getNode(I->getParent())->getIDom()->getBlock(); | ||||||||
953 | if (!HoistPoint || !DT->dominates(HoistPoint->getParent(), Dominator)) { | ||||||||
954 | if (HoistPoint) | ||||||||
955 | assert(DT->dominates(Dominator, HoistPoint->getParent()) &&((DT->dominates(Dominator, HoistPoint->getParent()) && "New hoist point expected to dominate old hoist point") ? static_cast <void> (0) : __assert_fail ("DT->dominates(Dominator, HoistPoint->getParent()) && \"New hoist point expected to dominate old hoist point\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 956, __PRETTY_FUNCTION__)) | ||||||||
956 | "New hoist point expected to dominate old hoist point")((DT->dominates(Dominator, HoistPoint->getParent()) && "New hoist point expected to dominate old hoist point") ? static_cast <void> (0) : __assert_fail ("DT->dominates(Dominator, HoistPoint->getParent()) && \"New hoist point expected to dominate old hoist point\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 956, __PRETTY_FUNCTION__)); | ||||||||
957 | HoistPoint = Dominator->getTerminator(); | ||||||||
958 | } | ||||||||
959 | LLVM_DEBUG(dbgs() << "LICM rehoisting to "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("licm")) { dbgs() << "LICM rehoisting to " << HoistPoint ->getParent()->getName() << ": " << *I << "\n"; } } while (false) | ||||||||
960 | << HoistPoint->getParent()->getName()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("licm")) { dbgs() << "LICM rehoisting to " << HoistPoint ->getParent()->getName() << ": " << *I << "\n"; } } while (false) | ||||||||
961 | << ": " << *I << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("licm")) { dbgs() << "LICM rehoisting to " << HoistPoint ->getParent()->getName() << ": " << *I << "\n"; } } while (false); | ||||||||
962 | moveInstructionBefore(*I, *HoistPoint, *SafetyInfo, MSSAU, SE); | ||||||||
963 | HoistPoint = I; | ||||||||
964 | Changed = true; | ||||||||
965 | } | ||||||||
966 | } | ||||||||
967 | } | ||||||||
968 | if (MSSAU && VerifyMemorySSA) | ||||||||
969 | MSSAU->getMemorySSA()->verifyMemorySSA(); | ||||||||
970 | |||||||||
971 | // Now that we've finished hoisting make sure that LI and DT are still | ||||||||
972 | // valid. | ||||||||
973 | #ifdef EXPENSIVE_CHECKS | ||||||||
974 | if (Changed) { | ||||||||
975 | assert(DT->verify(DominatorTree::VerificationLevel::Fast) &&((DT->verify(DominatorTree::VerificationLevel::Fast) && "Dominator tree verification failed") ? static_cast<void> (0) : __assert_fail ("DT->verify(DominatorTree::VerificationLevel::Fast) && \"Dominator tree verification failed\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 976, __PRETTY_FUNCTION__)) | ||||||||
976 | "Dominator tree verification failed")((DT->verify(DominatorTree::VerificationLevel::Fast) && "Dominator tree verification failed") ? static_cast<void> (0) : __assert_fail ("DT->verify(DominatorTree::VerificationLevel::Fast) && \"Dominator tree verification failed\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 976, __PRETTY_FUNCTION__)); | ||||||||
977 | LI->verify(*DT); | ||||||||
978 | } | ||||||||
979 | #endif | ||||||||
980 | |||||||||
981 | return Changed; | ||||||||
982 | } | ||||||||
983 | |||||||||
984 | // Return true if LI is invariant within scope of the loop. LI is invariant if | ||||||||
985 | // CurLoop is dominated by an invariant.start representing the same memory | ||||||||
986 | // location and size as the memory location LI loads from, and also the | ||||||||
987 | // invariant.start has no uses. | ||||||||
988 | static bool isLoadInvariantInLoop(LoadInst *LI, DominatorTree *DT, | ||||||||
989 | Loop *CurLoop) { | ||||||||
990 | Value *Addr = LI->getOperand(0); | ||||||||
991 | const DataLayout &DL = LI->getModule()->getDataLayout(); | ||||||||
992 | const TypeSize LocSizeInBits = DL.getTypeSizeInBits(LI->getType()); | ||||||||
993 | |||||||||
994 | // It is not currently possible for clang to generate an invariant.start | ||||||||
995 | // intrinsic with scalable vector types because we don't support thread local | ||||||||
996 | // sizeless types and we don't permit sizeless types in structs or classes. | ||||||||
997 | // Furthermore, even if support is added for this in future the intrinsic | ||||||||
998 | // itself is defined to have a size of -1 for variable sized objects. This | ||||||||
999 | // makes it impossible to verify if the intrinsic envelops our region of | ||||||||
1000 | // interest. For example, both <vscale x 32 x i8> and <vscale x 16 x i8> | ||||||||
1001 | // types would have a -1 parameter, but the former is clearly double the size | ||||||||
1002 | // of the latter. | ||||||||
1003 | if (LocSizeInBits.isScalable()) | ||||||||
1004 | return false; | ||||||||
1005 | |||||||||
1006 | // if the type is i8 addrspace(x)*, we know this is the type of | ||||||||
1007 | // llvm.invariant.start operand | ||||||||
1008 | auto *PtrInt8Ty = PointerType::get(Type::getInt8Ty(LI->getContext()), | ||||||||
1009 | LI->getPointerAddressSpace()); | ||||||||
1010 | unsigned BitcastsVisited = 0; | ||||||||
1011 | // Look through bitcasts until we reach the i8* type (this is invariant.start | ||||||||
1012 | // operand type). | ||||||||
1013 | while (Addr->getType() != PtrInt8Ty) { | ||||||||
1014 | auto *BC = dyn_cast<BitCastInst>(Addr); | ||||||||
1015 | // Avoid traversing high number of bitcast uses. | ||||||||
1016 | if (++BitcastsVisited > MaxNumUsesTraversed || !BC) | ||||||||
1017 | return false; | ||||||||
1018 | Addr = BC->getOperand(0); | ||||||||
1019 | } | ||||||||
1020 | |||||||||
1021 | unsigned UsesVisited = 0; | ||||||||
1022 | // Traverse all uses of the load operand value, to see if invariant.start is | ||||||||
1023 | // one of the uses, and whether it dominates the load instruction. | ||||||||
1024 | for (auto *U : Addr->users()) { | ||||||||
1025 | // Avoid traversing for Load operand with high number of users. | ||||||||
1026 | if (++UsesVisited > MaxNumUsesTraversed) | ||||||||
1027 | return false; | ||||||||
1028 | IntrinsicInst *II = dyn_cast<IntrinsicInst>(U); | ||||||||
1029 | // If there are escaping uses of invariant.start instruction, the load maybe | ||||||||
1030 | // non-invariant. | ||||||||
1031 | if (!II || II->getIntrinsicID() != Intrinsic::invariant_start || | ||||||||
1032 | !II->use_empty()) | ||||||||
1033 | continue; | ||||||||
1034 | ConstantInt *InvariantSize = cast<ConstantInt>(II->getArgOperand(0)); | ||||||||
1035 | // The intrinsic supports having a -1 argument for variable sized objects | ||||||||
1036 | // so we should check for that here. | ||||||||
1037 | if (InvariantSize->isNegative()) | ||||||||
1038 | continue; | ||||||||
1039 | uint64_t InvariantSizeInBits = InvariantSize->getSExtValue() * 8; | ||||||||
1040 | // Confirm the invariant.start location size contains the load operand size | ||||||||
1041 | // in bits. Also, the invariant.start should dominate the load, and we | ||||||||
1042 | // should not hoist the load out of a loop that contains this dominating | ||||||||
1043 | // invariant.start. | ||||||||
1044 | if (LocSizeInBits.getFixedSize() <= InvariantSizeInBits && | ||||||||
1045 | DT->properlyDominates(II->getParent(), CurLoop->getHeader())) | ||||||||
1046 | return true; | ||||||||
1047 | } | ||||||||
1048 | |||||||||
1049 | return false; | ||||||||
1050 | } | ||||||||
1051 | |||||||||
1052 | namespace { | ||||||||
1053 | /// Return true if-and-only-if we know how to (mechanically) both hoist and | ||||||||
1054 | /// sink a given instruction out of a loop. Does not address legality | ||||||||
1055 | /// concerns such as aliasing or speculation safety. | ||||||||
1056 | bool isHoistableAndSinkableInst(Instruction &I) { | ||||||||
1057 | // Only these instructions are hoistable/sinkable. | ||||||||
1058 | return (isa<LoadInst>(I) || isa<StoreInst>(I) || isa<CallInst>(I) || | ||||||||
1059 | isa<FenceInst>(I) || isa<CastInst>(I) || isa<UnaryOperator>(I) || | ||||||||
1060 | isa<BinaryOperator>(I) || isa<SelectInst>(I) || | ||||||||
1061 | isa<GetElementPtrInst>(I) || isa<CmpInst>(I) || | ||||||||
1062 | isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) || | ||||||||
1063 | isa<ShuffleVectorInst>(I) || isa<ExtractValueInst>(I) || | ||||||||
1064 | isa<InsertValueInst>(I) || isa<FreezeInst>(I)); | ||||||||
1065 | } | ||||||||
1066 | /// Return true if all of the alias sets within this AST are known not to | ||||||||
1067 | /// contain a Mod, or if MSSA knows thare are no MemoryDefs in the loop. | ||||||||
1068 | bool isReadOnly(AliasSetTracker *CurAST, const MemorySSAUpdater *MSSAU, | ||||||||
1069 | const Loop *L) { | ||||||||
1070 | if (CurAST) { | ||||||||
1071 | for (AliasSet &AS : *CurAST) { | ||||||||
1072 | if (!AS.isForwardingAliasSet() && AS.isMod()) { | ||||||||
1073 | return false; | ||||||||
1074 | } | ||||||||
1075 | } | ||||||||
1076 | return true; | ||||||||
1077 | } else { /*MSSAU*/ | ||||||||
1078 | for (auto *BB : L->getBlocks()) | ||||||||
1079 | if (MSSAU->getMemorySSA()->getBlockDefs(BB)) | ||||||||
1080 | return false; | ||||||||
1081 | return true; | ||||||||
1082 | } | ||||||||
1083 | } | ||||||||
1084 | |||||||||
1085 | /// Return true if I is the only Instruction with a MemoryAccess in L. | ||||||||
1086 | bool isOnlyMemoryAccess(const Instruction *I, const Loop *L, | ||||||||
1087 | const MemorySSAUpdater *MSSAU) { | ||||||||
1088 | for (auto *BB : L->getBlocks()) | ||||||||
1089 | if (auto *Accs = MSSAU->getMemorySSA()->getBlockAccesses(BB)) { | ||||||||
1090 | int NotAPhi = 0; | ||||||||
1091 | for (const auto &Acc : *Accs) { | ||||||||
1092 | if (isa<MemoryPhi>(&Acc)) | ||||||||
1093 | continue; | ||||||||
1094 | const auto *MUD = cast<MemoryUseOrDef>(&Acc); | ||||||||
1095 | if (MUD->getMemoryInst() != I || NotAPhi++ == 1) | ||||||||
1096 | return false; | ||||||||
1097 | } | ||||||||
1098 | } | ||||||||
1099 | return true; | ||||||||
1100 | } | ||||||||
1101 | } | ||||||||
1102 | |||||||||
1103 | bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT, | ||||||||
1104 | Loop *CurLoop, AliasSetTracker *CurAST, | ||||||||
1105 | MemorySSAUpdater *MSSAU, | ||||||||
1106 | bool TargetExecutesOncePerLoop, | ||||||||
1107 | SinkAndHoistLICMFlags *Flags, | ||||||||
1108 | OptimizationRemarkEmitter *ORE) { | ||||||||
1109 | // If we don't understand the instruction, bail early. | ||||||||
1110 | if (!isHoistableAndSinkableInst(I)) | ||||||||
| |||||||||
1111 | return false; | ||||||||
1112 | |||||||||
1113 | MemorySSA *MSSA = MSSAU ? MSSAU->getMemorySSA() : nullptr; | ||||||||
1114 | if (MSSA
| ||||||||
1115 | assert(Flags != nullptr && "Flags cannot be null.")((Flags != nullptr && "Flags cannot be null.") ? static_cast <void> (0) : __assert_fail ("Flags != nullptr && \"Flags cannot be null.\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 1115, __PRETTY_FUNCTION__)); | ||||||||
1116 | |||||||||
1117 | // Loads have extra constraints we have to verify before we can hoist them. | ||||||||
1118 | if (LoadInst *LI
| ||||||||
1119 | if (!LI->isUnordered()) | ||||||||
1120 | return false; // Don't sink/hoist volatile or ordered atomic loads! | ||||||||
1121 | |||||||||
1122 | // Loads from constant memory are always safe to move, even if they end up | ||||||||
1123 | // in the same alias set as something that ends up being modified. | ||||||||
1124 | if (AA->pointsToConstantMemory(LI->getOperand(0))) | ||||||||
1125 | return true; | ||||||||
1126 | if (LI->hasMetadata(LLVMContext::MD_invariant_load)) | ||||||||
1127 | return true; | ||||||||
1128 | |||||||||
1129 | if (LI->isAtomic() && !TargetExecutesOncePerLoop) | ||||||||
1130 | return false; // Don't risk duplicating unordered loads | ||||||||
1131 | |||||||||
1132 | // This checks for an invariant.start dominating the load. | ||||||||
1133 | if (isLoadInvariantInLoop(LI, DT, CurLoop)) | ||||||||
1134 | return true; | ||||||||
1135 | |||||||||
1136 | bool Invalidated; | ||||||||
1137 | if (CurAST) | ||||||||
1138 | Invalidated = pointerInvalidatedByLoop(MemoryLocation::get(LI), CurAST, | ||||||||
1139 | CurLoop, AA); | ||||||||
1140 | else | ||||||||
1141 | Invalidated = pointerInvalidatedByLoopWithMSSA( | ||||||||
1142 | MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(LI)), CurLoop, *Flags); | ||||||||
1143 | // Check loop-invariant address because this may also be a sinkable load | ||||||||
1144 | // whose address is not necessarily loop-invariant. | ||||||||
1145 | if (ORE && Invalidated && CurLoop->isLoopInvariant(LI->getPointerOperand())) | ||||||||
1146 | ORE->emit([&]() { | ||||||||
1147 | return OptimizationRemarkMissed( | ||||||||
1148 | DEBUG_TYPE"licm", "LoadWithLoopInvariantAddressInvalidated", LI) | ||||||||
1149 | << "failed to move load with loop-invariant address " | ||||||||
1150 | "because the loop may invalidate its value"; | ||||||||
1151 | }); | ||||||||
1152 | |||||||||
1153 | return !Invalidated; | ||||||||
1154 | } else if (CallInst *CI
| ||||||||
1155 | // Don't sink or hoist dbg info; it's legal, but not useful. | ||||||||
1156 | if (isa<DbgInfoIntrinsic>(I)) | ||||||||
1157 | return false; | ||||||||
1158 | |||||||||
1159 | // Don't sink calls which can throw. | ||||||||
1160 | if (CI->mayThrow()) | ||||||||
1161 | return false; | ||||||||
1162 | |||||||||
1163 | using namespace PatternMatch; | ||||||||
1164 | if (match(CI, m_Intrinsic<Intrinsic::assume>())) | ||||||||
1165 | // Assumes don't actually alias anything or throw | ||||||||
1166 | return true; | ||||||||
1167 | |||||||||
1168 | if (match(CI, m_Intrinsic<Intrinsic::experimental_widenable_condition>())) | ||||||||
1169 | // Widenable conditions don't actually alias anything or throw | ||||||||
1170 | return true; | ||||||||
1171 | |||||||||
1172 | // Handle simple cases by querying alias analysis. | ||||||||
1173 | FunctionModRefBehavior Behavior = AA->getModRefBehavior(CI); | ||||||||
1174 | if (Behavior == FMRB_DoesNotAccessMemory) | ||||||||
1175 | return true; | ||||||||
1176 | if (AAResults::onlyReadsMemory(Behavior)) { | ||||||||
1177 | // A readonly argmemonly function only reads from memory pointed to by | ||||||||
1178 | // it's arguments with arbitrary offsets. If we can prove there are no | ||||||||
1179 | // writes to this memory in the loop, we can hoist or sink. | ||||||||
1180 | if (AAResults::onlyAccessesArgPointees(Behavior)) { | ||||||||
1181 | // TODO: expand to writeable arguments | ||||||||
1182 | for (Value *Op : CI->arg_operands()) | ||||||||
1183 | if (Op->getType()->isPointerTy()) { | ||||||||
1184 | bool Invalidated; | ||||||||
1185 | if (CurAST) | ||||||||
1186 | Invalidated = pointerInvalidatedByLoop( | ||||||||
1187 | MemoryLocation(Op, LocationSize::unknown(), AAMDNodes()), | ||||||||
1188 | CurAST, CurLoop, AA); | ||||||||
1189 | else | ||||||||
1190 | Invalidated = pointerInvalidatedByLoopWithMSSA( | ||||||||
1191 | MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(CI)), CurLoop, | ||||||||
| |||||||||
1192 | *Flags); | ||||||||
1193 | if (Invalidated) | ||||||||
1194 | return false; | ||||||||
1195 | } | ||||||||
1196 | return true; | ||||||||
1197 | } | ||||||||
1198 | |||||||||
1199 | // If this call only reads from memory and there are no writes to memory | ||||||||
1200 | // in the loop, we can hoist or sink the call as appropriate. | ||||||||
1201 | if (isReadOnly(CurAST, MSSAU, CurLoop)) | ||||||||
1202 | return true; | ||||||||
1203 | } | ||||||||
1204 | |||||||||
1205 | // FIXME: This should use mod/ref information to see if we can hoist or | ||||||||
1206 | // sink the call. | ||||||||
1207 | |||||||||
1208 | return false; | ||||||||
1209 | } else if (auto *FI = dyn_cast<FenceInst>(&I)) { | ||||||||
1210 | // Fences alias (most) everything to provide ordering. For the moment, | ||||||||
1211 | // just give up if there are any other memory operations in the loop. | ||||||||
1212 | if (CurAST) { | ||||||||
1213 | auto Begin = CurAST->begin(); | ||||||||
1214 | assert(Begin != CurAST->end() && "must contain FI")((Begin != CurAST->end() && "must contain FI") ? static_cast <void> (0) : __assert_fail ("Begin != CurAST->end() && \"must contain FI\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 1214, __PRETTY_FUNCTION__)); | ||||||||
1215 | if (std::next(Begin) != CurAST->end()) | ||||||||
1216 | // constant memory for instance, TODO: handle better | ||||||||
1217 | return false; | ||||||||
1218 | auto *UniqueI = Begin->getUniqueInstruction(); | ||||||||
1219 | if (!UniqueI) | ||||||||
1220 | // other memory op, give up | ||||||||
1221 | return false; | ||||||||
1222 | (void)FI; // suppress unused variable warning | ||||||||
1223 | assert(UniqueI == FI && "AS must contain FI")((UniqueI == FI && "AS must contain FI") ? static_cast <void> (0) : __assert_fail ("UniqueI == FI && \"AS must contain FI\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 1223, __PRETTY_FUNCTION__)); | ||||||||
1224 | return true; | ||||||||
1225 | } else // MSSAU | ||||||||
1226 | return isOnlyMemoryAccess(FI, CurLoop, MSSAU); | ||||||||
1227 | } else if (auto *SI = dyn_cast<StoreInst>(&I)) { | ||||||||
1228 | if (!SI->isUnordered()) | ||||||||
1229 | return false; // Don't sink/hoist volatile or ordered atomic store! | ||||||||
1230 | |||||||||
1231 | // We can only hoist a store that we can prove writes a value which is not | ||||||||
1232 | // read or overwritten within the loop. For those cases, we fallback to | ||||||||
1233 | // load store promotion instead. TODO: We can extend this to cases where | ||||||||
1234 | // there is exactly one write to the location and that write dominates an | ||||||||
1235 | // arbitrary number of reads in the loop. | ||||||||
1236 | if (CurAST) { | ||||||||
1237 | auto &AS = CurAST->getAliasSetFor(MemoryLocation::get(SI)); | ||||||||
1238 | |||||||||
1239 | if (AS.isRef() || !AS.isMustAlias()) | ||||||||
1240 | // Quick exit test, handled by the full path below as well. | ||||||||
1241 | return false; | ||||||||
1242 | auto *UniqueI = AS.getUniqueInstruction(); | ||||||||
1243 | if (!UniqueI) | ||||||||
1244 | // other memory op, give up | ||||||||
1245 | return false; | ||||||||
1246 | assert(UniqueI == SI && "AS must contain SI")((UniqueI == SI && "AS must contain SI") ? static_cast <void> (0) : __assert_fail ("UniqueI == SI && \"AS must contain SI\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 1246, __PRETTY_FUNCTION__)); | ||||||||
1247 | return true; | ||||||||
1248 | } else { // MSSAU | ||||||||
1249 | if (isOnlyMemoryAccess(SI, CurLoop, MSSAU)) | ||||||||
1250 | return true; | ||||||||
1251 | // If there are more accesses than the Promotion cap, give up, we're not | ||||||||
1252 | // walking a list that long. | ||||||||
1253 | if (Flags->NoOfMemAccTooLarge) | ||||||||
1254 | return false; | ||||||||
1255 | // Check store only if there's still "quota" to check clobber. | ||||||||
1256 | if (Flags->LicmMssaOptCounter >= Flags->LicmMssaOptCap) | ||||||||
1257 | return false; | ||||||||
1258 | // If there are interfering Uses (i.e. their defining access is in the | ||||||||
1259 | // loop), or ordered loads (stored as Defs!), don't move this store. | ||||||||
1260 | // Could do better here, but this is conservatively correct. | ||||||||
1261 | // TODO: Cache set of Uses on the first walk in runOnLoop, update when | ||||||||
1262 | // moving accesses. Can also extend to dominating uses. | ||||||||
1263 | auto *SIMD = MSSA->getMemoryAccess(SI); | ||||||||
1264 | for (auto *BB : CurLoop->getBlocks()) | ||||||||
1265 | if (auto *Accesses = MSSA->getBlockAccesses(BB)) { | ||||||||
1266 | for (const auto &MA : *Accesses) | ||||||||
1267 | if (const auto *MU = dyn_cast<MemoryUse>(&MA)) { | ||||||||
1268 | auto *MD = MU->getDefiningAccess(); | ||||||||
1269 | if (!MSSA->isLiveOnEntryDef(MD) && | ||||||||
1270 | CurLoop->contains(MD->getBlock())) | ||||||||
1271 | return false; | ||||||||
1272 | // Disable hoisting past potentially interfering loads. Optimized | ||||||||
1273 | // Uses may point to an access outside the loop, as getClobbering | ||||||||
1274 | // checks the previous iteration when walking the backedge. | ||||||||
1275 | // FIXME: More precise: no Uses that alias SI. | ||||||||
1276 | if (!Flags->IsSink && !MSSA->dominates(SIMD, MU)) | ||||||||
1277 | return false; | ||||||||
1278 | } else if (const auto *MD = dyn_cast<MemoryDef>(&MA)) { | ||||||||
1279 | if (auto *LI = dyn_cast<LoadInst>(MD->getMemoryInst())) { | ||||||||
1280 | (void)LI; // Silence warning. | ||||||||
1281 | assert(!LI->isUnordered() && "Expected unordered load")((!LI->isUnordered() && "Expected unordered load") ? static_cast<void> (0) : __assert_fail ("!LI->isUnordered() && \"Expected unordered load\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 1281, __PRETTY_FUNCTION__)); | ||||||||
1282 | return false; | ||||||||
1283 | } | ||||||||
1284 | // Any call, while it may not be clobbering SI, it may be a use. | ||||||||
1285 | if (auto *CI = dyn_cast<CallInst>(MD->getMemoryInst())) { | ||||||||
1286 | // Check if the call may read from the memory locattion written | ||||||||
1287 | // to by SI. Check CI's attributes and arguments; the number of | ||||||||
1288 | // such checks performed is limited above by NoOfMemAccTooLarge. | ||||||||
1289 | ModRefInfo MRI = AA->getModRefInfo(CI, MemoryLocation::get(SI)); | ||||||||
1290 | if (isModOrRefSet(MRI)) | ||||||||
1291 | return false; | ||||||||
1292 | } | ||||||||
1293 | } | ||||||||
1294 | } | ||||||||
1295 | |||||||||
1296 | auto *Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(SI); | ||||||||
1297 | Flags->LicmMssaOptCounter++; | ||||||||
1298 | // If there are no clobbering Defs in the loop, store is safe to hoist. | ||||||||
1299 | return MSSA->isLiveOnEntryDef(Source) || | ||||||||
1300 | !CurLoop->contains(Source->getBlock()); | ||||||||
1301 | } | ||||||||
1302 | } | ||||||||
1303 | |||||||||
1304 | assert(!I.mayReadOrWriteMemory() && "unhandled aliasing")((!I.mayReadOrWriteMemory() && "unhandled aliasing") ? static_cast<void> (0) : __assert_fail ("!I.mayReadOrWriteMemory() && \"unhandled aliasing\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 1304, __PRETTY_FUNCTION__)); | ||||||||
1305 | |||||||||
1306 | // We've established mechanical ability and aliasing, it's up to the caller | ||||||||
1307 | // to check fault safety | ||||||||
1308 | return true; | ||||||||
1309 | } | ||||||||
1310 | |||||||||
1311 | /// Returns true if a PHINode is a trivially replaceable with an | ||||||||
1312 | /// Instruction. | ||||||||
1313 | /// This is true when all incoming values are that instruction. | ||||||||
1314 | /// This pattern occurs most often with LCSSA PHI nodes. | ||||||||
1315 | /// | ||||||||
1316 | static bool isTriviallyReplaceablePHI(const PHINode &PN, const Instruction &I) { | ||||||||
1317 | for (const Value *IncValue : PN.incoming_values()) | ||||||||
1318 | if (IncValue != &I) | ||||||||
1319 | return false; | ||||||||
1320 | |||||||||
1321 | return true; | ||||||||
1322 | } | ||||||||
1323 | |||||||||
1324 | /// Return true if the instruction is free in the loop. | ||||||||
1325 | static bool isFreeInLoop(const Instruction &I, const Loop *CurLoop, | ||||||||
1326 | const TargetTransformInfo *TTI) { | ||||||||
1327 | |||||||||
1328 | if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I)) { | ||||||||
1329 | if (TTI->getUserCost(GEP, TargetTransformInfo::TCK_SizeAndLatency) != | ||||||||
1330 | TargetTransformInfo::TCC_Free) | ||||||||
1331 | return false; | ||||||||
1332 | // For a GEP, we cannot simply use getUserCost because currently it | ||||||||
1333 | // optimistically assume that a GEP will fold into addressing mode | ||||||||
1334 | // regardless of its users. | ||||||||
1335 | const BasicBlock *BB = GEP->getParent(); | ||||||||
1336 | for (const User *U : GEP->users()) { | ||||||||
1337 | const Instruction *UI = cast<Instruction>(U); | ||||||||
1338 | if (CurLoop->contains(UI) && | ||||||||
1339 | (BB != UI->getParent() || | ||||||||
1340 | (!isa<StoreInst>(UI) && !isa<LoadInst>(UI)))) | ||||||||
1341 | return false; | ||||||||
1342 | } | ||||||||
1343 | return true; | ||||||||
1344 | } else | ||||||||
1345 | return TTI->getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency) == | ||||||||
1346 | TargetTransformInfo::TCC_Free; | ||||||||
1347 | } | ||||||||
1348 | |||||||||
1349 | /// Return true if the only users of this instruction are outside of | ||||||||
1350 | /// the loop. If this is true, we can sink the instruction to the exit | ||||||||
1351 | /// blocks of the loop. | ||||||||
1352 | /// | ||||||||
1353 | /// We also return true if the instruction could be folded away in lowering. | ||||||||
1354 | /// (e.g., a GEP can be folded into a load as an addressing mode in the loop). | ||||||||
1355 | static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop, | ||||||||
1356 | const LoopSafetyInfo *SafetyInfo, | ||||||||
1357 | TargetTransformInfo *TTI, bool &FreeInLoop) { | ||||||||
1358 | const auto &BlockColors = SafetyInfo->getBlockColors(); | ||||||||
1359 | bool IsFree = isFreeInLoop(I, CurLoop, TTI); | ||||||||
1360 | for (const User *U : I.users()) { | ||||||||
1361 | const Instruction *UI = cast<Instruction>(U); | ||||||||
1362 | if (const PHINode *PN = dyn_cast<PHINode>(UI)) { | ||||||||
1363 | const BasicBlock *BB = PN->getParent(); | ||||||||
1364 | // We cannot sink uses in catchswitches. | ||||||||
1365 | if (isa<CatchSwitchInst>(BB->getTerminator())) | ||||||||
1366 | return false; | ||||||||
1367 | |||||||||
1368 | // We need to sink a callsite to a unique funclet. Avoid sinking if the | ||||||||
1369 | // phi use is too muddled. | ||||||||
1370 | if (isa<CallInst>(I)) | ||||||||
1371 | if (!BlockColors.empty() && | ||||||||
1372 | BlockColors.find(const_cast<BasicBlock *>(BB))->second.size() != 1) | ||||||||
1373 | return false; | ||||||||
1374 | } | ||||||||
1375 | |||||||||
1376 | if (CurLoop->contains(UI)) { | ||||||||
1377 | if (IsFree) { | ||||||||
1378 | FreeInLoop = true; | ||||||||
1379 | continue; | ||||||||
1380 | } | ||||||||
1381 | return false; | ||||||||
1382 | } | ||||||||
1383 | } | ||||||||
1384 | return true; | ||||||||
1385 | } | ||||||||
1386 | |||||||||
1387 | static Instruction *cloneInstructionInExitBlock( | ||||||||
1388 | Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI, | ||||||||
1389 | const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU) { | ||||||||
1390 | Instruction *New; | ||||||||
1391 | if (auto *CI = dyn_cast<CallInst>(&I)) { | ||||||||
1392 | const auto &BlockColors = SafetyInfo->getBlockColors(); | ||||||||
1393 | |||||||||
1394 | // Sinking call-sites need to be handled differently from other | ||||||||
1395 | // instructions. The cloned call-site needs a funclet bundle operand | ||||||||
1396 | // appropriate for its location in the CFG. | ||||||||
1397 | SmallVector<OperandBundleDef, 1> OpBundles; | ||||||||
1398 | for (unsigned BundleIdx = 0, BundleEnd = CI->getNumOperandBundles(); | ||||||||
1399 | BundleIdx != BundleEnd; ++BundleIdx) { | ||||||||
1400 | OperandBundleUse Bundle = CI->getOperandBundleAt(BundleIdx); | ||||||||
1401 | if (Bundle.getTagID() == LLVMContext::OB_funclet) | ||||||||
1402 | continue; | ||||||||
1403 | |||||||||
1404 | OpBundles.emplace_back(Bundle); | ||||||||
1405 | } | ||||||||
1406 | |||||||||
1407 | if (!BlockColors.empty()) { | ||||||||
1408 | const ColorVector &CV = BlockColors.find(&ExitBlock)->second; | ||||||||
1409 | assert(CV.size() == 1 && "non-unique color for exit block!")((CV.size() == 1 && "non-unique color for exit block!" ) ? static_cast<void> (0) : __assert_fail ("CV.size() == 1 && \"non-unique color for exit block!\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 1409, __PRETTY_FUNCTION__)); | ||||||||
1410 | BasicBlock *BBColor = CV.front(); | ||||||||
1411 | Instruction *EHPad = BBColor->getFirstNonPHI(); | ||||||||
1412 | if (EHPad->isEHPad()) | ||||||||
1413 | OpBundles.emplace_back("funclet", EHPad); | ||||||||
1414 | } | ||||||||
1415 | |||||||||
1416 | New = CallInst::Create(CI, OpBundles); | ||||||||
1417 | } else { | ||||||||
1418 | New = I.clone(); | ||||||||
1419 | } | ||||||||
1420 | |||||||||
1421 | ExitBlock.getInstList().insert(ExitBlock.getFirstInsertionPt(), New); | ||||||||
1422 | if (!I.getName().empty()) | ||||||||
1423 | New->setName(I.getName() + ".le"); | ||||||||
1424 | |||||||||
1425 | if (MSSAU && MSSAU->getMemorySSA()->getMemoryAccess(&I)) { | ||||||||
1426 | // Create a new MemoryAccess and let MemorySSA set its defining access. | ||||||||
1427 | MemoryAccess *NewMemAcc = MSSAU->createMemoryAccessInBB( | ||||||||
1428 | New, nullptr, New->getParent(), MemorySSA::Beginning); | ||||||||
1429 | if (NewMemAcc) { | ||||||||
1430 | if (auto *MemDef = dyn_cast<MemoryDef>(NewMemAcc)) | ||||||||
1431 | MSSAU->insertDef(MemDef, /*RenameUses=*/true); | ||||||||
1432 | else { | ||||||||
1433 | auto *MemUse = cast<MemoryUse>(NewMemAcc); | ||||||||
1434 | MSSAU->insertUse(MemUse, /*RenameUses=*/true); | ||||||||
1435 | } | ||||||||
1436 | } | ||||||||
1437 | } | ||||||||
1438 | |||||||||
1439 | // Build LCSSA PHI nodes for any in-loop operands. Note that this is | ||||||||
1440 | // particularly cheap because we can rip off the PHI node that we're | ||||||||
1441 | // replacing for the number and blocks of the predecessors. | ||||||||
1442 | // OPT: If this shows up in a profile, we can instead finish sinking all | ||||||||
1443 | // invariant instructions, and then walk their operands to re-establish | ||||||||
1444 | // LCSSA. That will eliminate creating PHI nodes just to nuke them when | ||||||||
1445 | // sinking bottom-up. | ||||||||
1446 | for (User::op_iterator OI = New->op_begin(), OE = New->op_end(); OI != OE; | ||||||||
1447 | ++OI) | ||||||||
1448 | if (Instruction *OInst = dyn_cast<Instruction>(*OI)) | ||||||||
1449 | if (Loop *OLoop = LI->getLoopFor(OInst->getParent())) | ||||||||
1450 | if (!OLoop->contains(&PN)) { | ||||||||
1451 | PHINode *OpPN = | ||||||||
1452 | PHINode::Create(OInst->getType(), PN.getNumIncomingValues(), | ||||||||
1453 | OInst->getName() + ".lcssa", &ExitBlock.front()); | ||||||||
1454 | for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) | ||||||||
1455 | OpPN->addIncoming(OInst, PN.getIncomingBlock(i)); | ||||||||
1456 | *OI = OpPN; | ||||||||
1457 | } | ||||||||
1458 | return New; | ||||||||
1459 | } | ||||||||
1460 | |||||||||
1461 | static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, | ||||||||
1462 | AliasSetTracker *AST, MemorySSAUpdater *MSSAU) { | ||||||||
1463 | if (AST) | ||||||||
1464 | AST->deleteValue(&I); | ||||||||
1465 | if (MSSAU) | ||||||||
1466 | MSSAU->removeMemoryAccess(&I); | ||||||||
1467 | SafetyInfo.removeInstruction(&I); | ||||||||
1468 | I.eraseFromParent(); | ||||||||
1469 | } | ||||||||
1470 | |||||||||
1471 | static void moveInstructionBefore(Instruction &I, Instruction &Dest, | ||||||||
1472 | ICFLoopSafetyInfo &SafetyInfo, | ||||||||
1473 | MemorySSAUpdater *MSSAU, | ||||||||
1474 | ScalarEvolution *SE) { | ||||||||
1475 | SafetyInfo.removeInstruction(&I); | ||||||||
1476 | SafetyInfo.insertInstructionTo(&I, Dest.getParent()); | ||||||||
1477 | I.moveBefore(&Dest); | ||||||||
1478 | if (MSSAU) | ||||||||
1479 | if (MemoryUseOrDef *OldMemAcc = cast_or_null<MemoryUseOrDef>( | ||||||||
1480 | MSSAU->getMemorySSA()->getMemoryAccess(&I))) | ||||||||
1481 | MSSAU->moveToPlace(OldMemAcc, Dest.getParent(), | ||||||||
1482 | MemorySSA::BeforeTerminator); | ||||||||
1483 | if (SE) | ||||||||
1484 | SE->forgetValue(&I); | ||||||||
1485 | } | ||||||||
1486 | |||||||||
1487 | static Instruction *sinkThroughTriviallyReplaceablePHI( | ||||||||
1488 | PHINode *TPN, Instruction *I, LoopInfo *LI, | ||||||||
1489 | SmallDenseMap<BasicBlock *, Instruction *, 32> &SunkCopies, | ||||||||
1490 | const LoopSafetyInfo *SafetyInfo, const Loop *CurLoop, | ||||||||
1491 | MemorySSAUpdater *MSSAU) { | ||||||||
1492 | assert(isTriviallyReplaceablePHI(*TPN, *I) &&((isTriviallyReplaceablePHI(*TPN, *I) && "Expect only trivially replaceable PHI" ) ? static_cast<void> (0) : __assert_fail ("isTriviallyReplaceablePHI(*TPN, *I) && \"Expect only trivially replaceable PHI\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 1493, __PRETTY_FUNCTION__)) | ||||||||
1493 | "Expect only trivially replaceable PHI")((isTriviallyReplaceablePHI(*TPN, *I) && "Expect only trivially replaceable PHI" ) ? static_cast<void> (0) : __assert_fail ("isTriviallyReplaceablePHI(*TPN, *I) && \"Expect only trivially replaceable PHI\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 1493, __PRETTY_FUNCTION__)); | ||||||||
1494 | BasicBlock *ExitBlock = TPN->getParent(); | ||||||||
1495 | Instruction *New; | ||||||||
1496 | auto It = SunkCopies.find(ExitBlock); | ||||||||
1497 | if (It != SunkCopies.end()) | ||||||||
1498 | New = It->second; | ||||||||
1499 | else | ||||||||
1500 | New = SunkCopies[ExitBlock] = cloneInstructionInExitBlock( | ||||||||
1501 | *I, *ExitBlock, *TPN, LI, SafetyInfo, MSSAU); | ||||||||
1502 | return New; | ||||||||
1503 | } | ||||||||
1504 | |||||||||
1505 | static bool canSplitPredecessors(PHINode *PN, LoopSafetyInfo *SafetyInfo) { | ||||||||
1506 | BasicBlock *BB = PN->getParent(); | ||||||||
1507 | if (!BB->canSplitPredecessors()) | ||||||||
1508 | return false; | ||||||||
1509 | // It's not impossible to split EHPad blocks, but if BlockColors already exist | ||||||||
1510 | // it require updating BlockColors for all offspring blocks accordingly. By | ||||||||
1511 | // skipping such corner case, we can make updating BlockColors after splitting | ||||||||
1512 | // predecessor fairly simple. | ||||||||
1513 | if (!SafetyInfo->getBlockColors().empty() && BB->getFirstNonPHI()->isEHPad()) | ||||||||
1514 | return false; | ||||||||
1515 | for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { | ||||||||
1516 | BasicBlock *BBPred = *PI; | ||||||||
1517 | if (isa<IndirectBrInst>(BBPred->getTerminator()) || | ||||||||
1518 | isa<CallBrInst>(BBPred->getTerminator())) | ||||||||
1519 | return false; | ||||||||
1520 | } | ||||||||
1521 | return true; | ||||||||
1522 | } | ||||||||
1523 | |||||||||
1524 | static void splitPredecessorsOfLoopExit(PHINode *PN, DominatorTree *DT, | ||||||||
1525 | LoopInfo *LI, const Loop *CurLoop, | ||||||||
1526 | LoopSafetyInfo *SafetyInfo, | ||||||||
1527 | MemorySSAUpdater *MSSAU) { | ||||||||
1528 | #ifndef NDEBUG | ||||||||
1529 | SmallVector<BasicBlock *, 32> ExitBlocks; | ||||||||
1530 | CurLoop->getUniqueExitBlocks(ExitBlocks); | ||||||||
1531 | SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(), | ||||||||
1532 | ExitBlocks.end()); | ||||||||
1533 | #endif | ||||||||
1534 | BasicBlock *ExitBB = PN->getParent(); | ||||||||
1535 | assert(ExitBlockSet.count(ExitBB) && "Expect the PHI is in an exit block.")((ExitBlockSet.count(ExitBB) && "Expect the PHI is in an exit block." ) ? static_cast<void> (0) : __assert_fail ("ExitBlockSet.count(ExitBB) && \"Expect the PHI is in an exit block.\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 1535, __PRETTY_FUNCTION__)); | ||||||||
1536 | |||||||||
1537 | // Split predecessors of the loop exit to make instructions in the loop are | ||||||||
1538 | // exposed to exit blocks through trivially replaceable PHIs while keeping the | ||||||||
1539 | // loop in the canonical form where each predecessor of each exit block should | ||||||||
1540 | // be contained within the loop. For example, this will convert the loop below | ||||||||
1541 | // from | ||||||||
1542 | // | ||||||||
1543 | // LB1: | ||||||||
1544 | // %v1 = | ||||||||
1545 | // br %LE, %LB2 | ||||||||
1546 | // LB2: | ||||||||
1547 | // %v2 = | ||||||||
1548 | // br %LE, %LB1 | ||||||||
1549 | // LE: | ||||||||
1550 | // %p = phi [%v1, %LB1], [%v2, %LB2] <-- non-trivially replaceable | ||||||||
1551 | // | ||||||||
1552 | // to | ||||||||
1553 | // | ||||||||
1554 | // LB1: | ||||||||
1555 | // %v1 = | ||||||||
1556 | // br %LE.split, %LB2 | ||||||||
1557 | // LB2: | ||||||||
1558 | // %v2 = | ||||||||
1559 | // br %LE.split2, %LB1 | ||||||||
1560 | // LE.split: | ||||||||
1561 | // %p1 = phi [%v1, %LB1] <-- trivially replaceable | ||||||||
1562 | // br %LE | ||||||||
1563 | // LE.split2: | ||||||||
1564 | // %p2 = phi [%v2, %LB2] <-- trivially replaceable | ||||||||
1565 | // br %LE | ||||||||
1566 | // LE: | ||||||||
1567 | // %p = phi [%p1, %LE.split], [%p2, %LE.split2] | ||||||||
1568 | // | ||||||||
1569 | const auto &BlockColors = SafetyInfo->getBlockColors(); | ||||||||
1570 | SmallSetVector<BasicBlock *, 8> PredBBs(pred_begin(ExitBB), pred_end(ExitBB)); | ||||||||
1571 | while (!PredBBs.empty()) { | ||||||||
1572 | BasicBlock *PredBB = *PredBBs.begin(); | ||||||||
1573 | assert(CurLoop->contains(PredBB) &&((CurLoop->contains(PredBB) && "Expect all predecessors are in the loop" ) ? static_cast<void> (0) : __assert_fail ("CurLoop->contains(PredBB) && \"Expect all predecessors are in the loop\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 1574, __PRETTY_FUNCTION__)) | ||||||||
1574 | "Expect all predecessors are in the loop")((CurLoop->contains(PredBB) && "Expect all predecessors are in the loop" ) ? static_cast<void> (0) : __assert_fail ("CurLoop->contains(PredBB) && \"Expect all predecessors are in the loop\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 1574, __PRETTY_FUNCTION__)); | ||||||||
1575 | if (PN->getBasicBlockIndex(PredBB) >= 0) { | ||||||||
1576 | BasicBlock *NewPred = SplitBlockPredecessors( | ||||||||
1577 | ExitBB, PredBB, ".split.loop.exit", DT, LI, MSSAU, true); | ||||||||
1578 | // Since we do not allow splitting EH-block with BlockColors in | ||||||||
1579 | // canSplitPredecessors(), we can simply assign predecessor's color to | ||||||||
1580 | // the new block. | ||||||||
1581 | if (!BlockColors.empty()) | ||||||||
1582 | // Grab a reference to the ColorVector to be inserted before getting the | ||||||||
1583 | // reference to the vector we are copying because inserting the new | ||||||||
1584 | // element in BlockColors might cause the map to be reallocated. | ||||||||
1585 | SafetyInfo->copyColors(NewPred, PredBB); | ||||||||
1586 | } | ||||||||
1587 | PredBBs.remove(PredBB); | ||||||||
1588 | } | ||||||||
1589 | } | ||||||||
1590 | |||||||||
1591 | /// When an instruction is found to only be used outside of the loop, this | ||||||||
1592 | /// function moves it to the exit blocks and patches up SSA form as needed. | ||||||||
1593 | /// This method is guaranteed to remove the original instruction from its | ||||||||
1594 | /// position, and may either delete it or move it to outside of the loop. | ||||||||
1595 | /// | ||||||||
1596 | static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT, | ||||||||
1597 | BlockFrequencyInfo *BFI, const Loop *CurLoop, | ||||||||
1598 | ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU, | ||||||||
1599 | OptimizationRemarkEmitter *ORE) { | ||||||||
1600 | LLVM_DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("licm")) { dbgs() << "LICM sinking instruction: " << I << "\n"; } } while (false); | ||||||||
1601 | ORE->emit([&]() { | ||||||||
1602 | return OptimizationRemark(DEBUG_TYPE"licm", "InstSunk", &I) | ||||||||
1603 | << "sinking " << ore::NV("Inst", &I); | ||||||||
1604 | }); | ||||||||
1605 | bool Changed = false; | ||||||||
1606 | if (isa<LoadInst>(I)) | ||||||||
1607 | ++NumMovedLoads; | ||||||||
1608 | else if (isa<CallInst>(I)) | ||||||||
1609 | ++NumMovedCalls; | ||||||||
1610 | ++NumSunk; | ||||||||
1611 | |||||||||
1612 | // Iterate over users to be ready for actual sinking. Replace users via | ||||||||
1613 | // unreachable blocks with undef and make all user PHIs trivially replaceable. | ||||||||
1614 | SmallPtrSet<Instruction *, 8> VisitedUsers; | ||||||||
1615 | for (Value::user_iterator UI = I.user_begin(), UE = I.user_end(); UI != UE;) { | ||||||||
1616 | auto *User = cast<Instruction>(*UI); | ||||||||
1617 | Use &U = UI.getUse(); | ||||||||
1618 | ++UI; | ||||||||
1619 | |||||||||
1620 | if (VisitedUsers.count(User) || CurLoop->contains(User)) | ||||||||
1621 | continue; | ||||||||
1622 | |||||||||
1623 | if (!DT->isReachableFromEntry(User->getParent())) { | ||||||||
1624 | U = UndefValue::get(I.getType()); | ||||||||
1625 | Changed = true; | ||||||||
1626 | continue; | ||||||||
1627 | } | ||||||||
1628 | |||||||||
1629 | // The user must be a PHI node. | ||||||||
1630 | PHINode *PN = cast<PHINode>(User); | ||||||||
1631 | |||||||||
1632 | // Surprisingly, instructions can be used outside of loops without any | ||||||||
1633 | // exits. This can only happen in PHI nodes if the incoming block is | ||||||||
1634 | // unreachable. | ||||||||
1635 | BasicBlock *BB = PN->getIncomingBlock(U); | ||||||||
1636 | if (!DT->isReachableFromEntry(BB)) { | ||||||||
1637 | U = UndefValue::get(I.getType()); | ||||||||
1638 | Changed = true; | ||||||||
1639 | continue; | ||||||||
1640 | } | ||||||||
1641 | |||||||||
1642 | VisitedUsers.insert(PN); | ||||||||
1643 | if (isTriviallyReplaceablePHI(*PN, I)) | ||||||||
1644 | continue; | ||||||||
1645 | |||||||||
1646 | if (!canSplitPredecessors(PN, SafetyInfo)) | ||||||||
1647 | return Changed; | ||||||||
1648 | |||||||||
1649 | // Split predecessors of the PHI so that we can make users trivially | ||||||||
1650 | // replaceable. | ||||||||
1651 | splitPredecessorsOfLoopExit(PN, DT, LI, CurLoop, SafetyInfo, MSSAU); | ||||||||
1652 | |||||||||
1653 | // Should rebuild the iterators, as they may be invalidated by | ||||||||
1654 | // splitPredecessorsOfLoopExit(). | ||||||||
1655 | UI = I.user_begin(); | ||||||||
1656 | UE = I.user_end(); | ||||||||
1657 | } | ||||||||
1658 | |||||||||
1659 | if (VisitedUsers.empty()) | ||||||||
1660 | return Changed; | ||||||||
1661 | |||||||||
1662 | #ifndef NDEBUG | ||||||||
1663 | SmallVector<BasicBlock *, 32> ExitBlocks; | ||||||||
1664 | CurLoop->getUniqueExitBlocks(ExitBlocks); | ||||||||
1665 | SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(), | ||||||||
1666 | ExitBlocks.end()); | ||||||||
1667 | #endif | ||||||||
1668 | |||||||||
1669 | // Clones of this instruction. Don't create more than one per exit block! | ||||||||
1670 | SmallDenseMap<BasicBlock *, Instruction *, 32> SunkCopies; | ||||||||
1671 | |||||||||
1672 | // If this instruction is only used outside of the loop, then all users are | ||||||||
1673 | // PHI nodes in exit blocks due to LCSSA form. Just RAUW them with clones of | ||||||||
1674 | // the instruction. | ||||||||
1675 | // First check if I is worth sinking for all uses. Sink only when it is worth | ||||||||
1676 | // across all uses. | ||||||||
1677 | SmallSetVector<User*, 8> Users(I.user_begin(), I.user_end()); | ||||||||
1678 | SmallVector<PHINode *, 8> ExitPNs; | ||||||||
1679 | for (auto *UI : Users) { | ||||||||
1680 | auto *User = cast<Instruction>(UI); | ||||||||
1681 | |||||||||
1682 | if (CurLoop->contains(User)) | ||||||||
1683 | continue; | ||||||||
1684 | |||||||||
1685 | PHINode *PN = cast<PHINode>(User); | ||||||||
1686 | assert(ExitBlockSet.count(PN->getParent()) &&((ExitBlockSet.count(PN->getParent()) && "The LCSSA PHI is not in an exit block!" ) ? static_cast<void> (0) : __assert_fail ("ExitBlockSet.count(PN->getParent()) && \"The LCSSA PHI is not in an exit block!\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 1687, __PRETTY_FUNCTION__)) | ||||||||
1687 | "The LCSSA PHI is not in an exit block!")((ExitBlockSet.count(PN->getParent()) && "The LCSSA PHI is not in an exit block!" ) ? static_cast<void> (0) : __assert_fail ("ExitBlockSet.count(PN->getParent()) && \"The LCSSA PHI is not in an exit block!\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 1687, __PRETTY_FUNCTION__)); | ||||||||
1688 | if (!worthSinkOrHoistInst(I, PN->getParent(), ORE, BFI)) { | ||||||||
1689 | return Changed; | ||||||||
1690 | } | ||||||||
1691 | |||||||||
1692 | ExitPNs.push_back(PN); | ||||||||
1693 | } | ||||||||
1694 | |||||||||
1695 | for (auto *PN : ExitPNs) { | ||||||||
1696 | |||||||||
1697 | // The PHI must be trivially replaceable. | ||||||||
1698 | Instruction *New = sinkThroughTriviallyReplaceablePHI( | ||||||||
1699 | PN, &I, LI, SunkCopies, SafetyInfo, CurLoop, MSSAU); | ||||||||
1700 | PN->replaceAllUsesWith(New); | ||||||||
1701 | eraseInstruction(*PN, *SafetyInfo, nullptr, nullptr); | ||||||||
1702 | Changed = true; | ||||||||
1703 | } | ||||||||
1704 | return Changed; | ||||||||
1705 | } | ||||||||
1706 | |||||||||
1707 | /// When an instruction is found to only use loop invariant operands that | ||||||||
1708 | /// is safe to hoist, this instruction is called to do the dirty work. | ||||||||
1709 | /// | ||||||||
1710 | static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, | ||||||||
1711 | BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo, | ||||||||
1712 | MemorySSAUpdater *MSSAU, ScalarEvolution *SE, | ||||||||
1713 | OptimizationRemarkEmitter *ORE) { | ||||||||
1714 | LLVM_DEBUG(dbgs() << "LICM hoisting to " << Dest->getName() << ": " << Ido { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("licm")) { dbgs() << "LICM hoisting to " << Dest ->getName() << ": " << I << "\n"; } } while (false) | ||||||||
1715 | << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("licm")) { dbgs() << "LICM hoisting to " << Dest ->getName() << ": " << I << "\n"; } } while (false); | ||||||||
1716 | ORE->emit([&]() { | ||||||||
1717 | return OptimizationRemark(DEBUG_TYPE"licm", "Hoisted", &I) << "hoisting " | ||||||||
1718 | << ore::NV("Inst", &I); | ||||||||
1719 | }); | ||||||||
1720 | |||||||||
1721 | // Metadata can be dependent on conditions we are hoisting above. | ||||||||
1722 | // Conservatively strip all metadata on the instruction unless we were | ||||||||
1723 | // guaranteed to execute I if we entered the loop, in which case the metadata | ||||||||
1724 | // is valid in the loop preheader. | ||||||||
1725 | if (I.hasMetadataOtherThanDebugLoc() && | ||||||||
1726 | // The check on hasMetadataOtherThanDebugLoc is to prevent us from burning | ||||||||
1727 | // time in isGuaranteedToExecute if we don't actually have anything to | ||||||||
1728 | // drop. It is a compile time optimization, not required for correctness. | ||||||||
1729 | !SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop)) | ||||||||
1730 | I.dropUnknownNonDebugMetadata(); | ||||||||
1731 | |||||||||
1732 | if (isa<PHINode>(I)) | ||||||||
1733 | // Move the new node to the end of the phi list in the destination block. | ||||||||
1734 | moveInstructionBefore(I, *Dest->getFirstNonPHI(), *SafetyInfo, MSSAU, SE); | ||||||||
1735 | else | ||||||||
1736 | // Move the new node to the destination block, before its terminator. | ||||||||
1737 | moveInstructionBefore(I, *Dest->getTerminator(), *SafetyInfo, MSSAU, SE); | ||||||||
1738 | |||||||||
1739 | I.updateLocationAfterHoist(); | ||||||||
1740 | |||||||||
1741 | if (isa<LoadInst>(I)) | ||||||||
1742 | ++NumMovedLoads; | ||||||||
1743 | else if (isa<CallInst>(I)) | ||||||||
1744 | ++NumMovedCalls; | ||||||||
1745 | ++NumHoisted; | ||||||||
1746 | } | ||||||||
1747 | |||||||||
1748 | /// Only sink or hoist an instruction if it is not a trapping instruction, | ||||||||
1749 | /// or if the instruction is known not to trap when moved to the preheader. | ||||||||
1750 | /// or if it is a trapping instruction and is guaranteed to execute. | ||||||||
1751 | static bool isSafeToExecuteUnconditionally(Instruction &Inst, | ||||||||
1752 | const DominatorTree *DT, | ||||||||
1753 | const Loop *CurLoop, | ||||||||
1754 | const LoopSafetyInfo *SafetyInfo, | ||||||||
1755 | OptimizationRemarkEmitter *ORE, | ||||||||
1756 | const Instruction *CtxI) { | ||||||||
1757 | if (isSafeToSpeculativelyExecute(&Inst, CtxI, DT)) | ||||||||
1758 | return true; | ||||||||
1759 | |||||||||
1760 | bool GuaranteedToExecute = | ||||||||
1761 | SafetyInfo->isGuaranteedToExecute(Inst, DT, CurLoop); | ||||||||
1762 | |||||||||
1763 | if (!GuaranteedToExecute) { | ||||||||
1764 | auto *LI = dyn_cast<LoadInst>(&Inst); | ||||||||
1765 | if (LI && CurLoop->isLoopInvariant(LI->getPointerOperand())) | ||||||||
1766 | ORE->emit([&]() { | ||||||||
1767 | return OptimizationRemarkMissed( | ||||||||
1768 | DEBUG_TYPE"licm", "LoadWithLoopInvariantAddressCondExecuted", LI) | ||||||||
1769 | << "failed to hoist load with loop-invariant address " | ||||||||
1770 | "because load is conditionally executed"; | ||||||||
1771 | }); | ||||||||
1772 | } | ||||||||
1773 | |||||||||
1774 | return GuaranteedToExecute; | ||||||||
1775 | } | ||||||||
1776 | |||||||||
1777 | namespace { | ||||||||
1778 | class LoopPromoter : public LoadAndStorePromoter { | ||||||||
1779 | Value *SomePtr; // Designated pointer to store to. | ||||||||
1780 | const SmallSetVector<Value *, 8> &PointerMustAliases; | ||||||||
1781 | SmallVectorImpl<BasicBlock *> &LoopExitBlocks; | ||||||||
1782 | SmallVectorImpl<Instruction *> &LoopInsertPts; | ||||||||
1783 | SmallVectorImpl<MemoryAccess *> &MSSAInsertPts; | ||||||||
1784 | PredIteratorCache &PredCache; | ||||||||
1785 | AliasSetTracker *AST; | ||||||||
1786 | MemorySSAUpdater *MSSAU; | ||||||||
1787 | LoopInfo &LI; | ||||||||
1788 | DebugLoc DL; | ||||||||
1789 | int Alignment; | ||||||||
1790 | bool UnorderedAtomic; | ||||||||
1791 | AAMDNodes AATags; | ||||||||
1792 | ICFLoopSafetyInfo &SafetyInfo; | ||||||||
1793 | |||||||||
1794 | Value *maybeInsertLCSSAPHI(Value *V, BasicBlock *BB) const { | ||||||||
1795 | if (Instruction *I = dyn_cast<Instruction>(V)) | ||||||||
1796 | if (Loop *L = LI.getLoopFor(I->getParent())) | ||||||||
1797 | if (!L->contains(BB)) { | ||||||||
1798 | // We need to create an LCSSA PHI node for the incoming value and | ||||||||
1799 | // store that. | ||||||||
1800 | PHINode *PN = PHINode::Create(I->getType(), PredCache.size(BB), | ||||||||
1801 | I->getName() + ".lcssa", &BB->front()); | ||||||||
1802 | for (BasicBlock *Pred : PredCache.get(BB)) | ||||||||
1803 | PN->addIncoming(I, Pred); | ||||||||
1804 | return PN; | ||||||||
1805 | } | ||||||||
1806 | return V; | ||||||||
1807 | } | ||||||||
1808 | |||||||||
1809 | public: | ||||||||
1810 | LoopPromoter(Value *SP, ArrayRef<const Instruction *> Insts, SSAUpdater &S, | ||||||||
1811 | const SmallSetVector<Value *, 8> &PMA, | ||||||||
1812 | SmallVectorImpl<BasicBlock *> &LEB, | ||||||||
1813 | SmallVectorImpl<Instruction *> &LIP, | ||||||||
1814 | SmallVectorImpl<MemoryAccess *> &MSSAIP, PredIteratorCache &PIC, | ||||||||
1815 | AliasSetTracker *ast, MemorySSAUpdater *MSSAU, LoopInfo &li, | ||||||||
1816 | DebugLoc dl, int alignment, bool UnorderedAtomic, | ||||||||
1817 | const AAMDNodes &AATags, ICFLoopSafetyInfo &SafetyInfo) | ||||||||
1818 | : LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA), | ||||||||
1819 | LoopExitBlocks(LEB), LoopInsertPts(LIP), MSSAInsertPts(MSSAIP), | ||||||||
1820 | PredCache(PIC), AST(ast), MSSAU(MSSAU), LI(li), DL(std::move(dl)), | ||||||||
1821 | Alignment(alignment), UnorderedAtomic(UnorderedAtomic), AATags(AATags), | ||||||||
1822 | SafetyInfo(SafetyInfo) {} | ||||||||
1823 | |||||||||
1824 | bool isInstInList(Instruction *I, | ||||||||
1825 | const SmallVectorImpl<Instruction *> &) const override { | ||||||||
1826 | Value *Ptr; | ||||||||
1827 | if (LoadInst *LI = dyn_cast<LoadInst>(I)) | ||||||||
1828 | Ptr = LI->getOperand(0); | ||||||||
1829 | else | ||||||||
1830 | Ptr = cast<StoreInst>(I)->getPointerOperand(); | ||||||||
1831 | return PointerMustAliases.count(Ptr); | ||||||||
1832 | } | ||||||||
1833 | |||||||||
1834 | void doExtraRewritesBeforeFinalDeletion() override { | ||||||||
1835 | // Insert stores after in the loop exit blocks. Each exit block gets a | ||||||||
1836 | // store of the live-out values that feed them. Since we've already told | ||||||||
1837 | // the SSA updater about the defs in the loop and the preheader | ||||||||
1838 | // definition, it is all set and we can start using it. | ||||||||
1839 | for (unsigned i = 0, e = LoopExitBlocks.size(); i != e; ++i) { | ||||||||
1840 | BasicBlock *ExitBlock = LoopExitBlocks[i]; | ||||||||
1841 | Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock); | ||||||||
1842 | LiveInValue = maybeInsertLCSSAPHI(LiveInValue, ExitBlock); | ||||||||
1843 | Value *Ptr = maybeInsertLCSSAPHI(SomePtr, ExitBlock); | ||||||||
1844 | Instruction *InsertPos = LoopInsertPts[i]; | ||||||||
1845 | StoreInst *NewSI = new StoreInst(LiveInValue, Ptr, InsertPos); | ||||||||
1846 | if (UnorderedAtomic) | ||||||||
1847 | NewSI->setOrdering(AtomicOrdering::Unordered); | ||||||||
1848 | NewSI->setAlignment(Align(Alignment)); | ||||||||
1849 | NewSI->setDebugLoc(DL); | ||||||||
1850 | if (AATags) | ||||||||
1851 | NewSI->setAAMetadata(AATags); | ||||||||
1852 | |||||||||
1853 | if (MSSAU) { | ||||||||
1854 | MemoryAccess *MSSAInsertPoint = MSSAInsertPts[i]; | ||||||||
1855 | MemoryAccess *NewMemAcc; | ||||||||
1856 | if (!MSSAInsertPoint) { | ||||||||
1857 | NewMemAcc = MSSAU->createMemoryAccessInBB( | ||||||||
1858 | NewSI, nullptr, NewSI->getParent(), MemorySSA::Beginning); | ||||||||
1859 | } else { | ||||||||
1860 | NewMemAcc = | ||||||||
1861 | MSSAU->createMemoryAccessAfter(NewSI, nullptr, MSSAInsertPoint); | ||||||||
1862 | } | ||||||||
1863 | MSSAInsertPts[i] = NewMemAcc; | ||||||||
1864 | MSSAU->insertDef(cast<MemoryDef>(NewMemAcc), true); | ||||||||
1865 | // FIXME: true for safety, false may still be correct. | ||||||||
1866 | } | ||||||||
1867 | } | ||||||||
1868 | } | ||||||||
1869 | |||||||||
1870 | void replaceLoadWithValue(LoadInst *LI, Value *V) const override { | ||||||||
1871 | // Update alias analysis. | ||||||||
1872 | if (AST) | ||||||||
1873 | AST->copyValue(LI, V); | ||||||||
1874 | } | ||||||||
1875 | void instructionDeleted(Instruction *I) const override { | ||||||||
1876 | SafetyInfo.removeInstruction(I); | ||||||||
1877 | if (AST) | ||||||||
1878 | AST->deleteValue(I); | ||||||||
1879 | if (MSSAU) | ||||||||
1880 | MSSAU->removeMemoryAccess(I); | ||||||||
1881 | } | ||||||||
1882 | }; | ||||||||
1883 | |||||||||
1884 | |||||||||
1885 | /// Return true iff we can prove that a caller of this function can not inspect | ||||||||
1886 | /// the contents of the provided object in a well defined program. | ||||||||
1887 | bool isKnownNonEscaping(Value *Object, const TargetLibraryInfo *TLI) { | ||||||||
1888 | if (isa<AllocaInst>(Object)) | ||||||||
1889 | // Since the alloca goes out of scope, we know the caller can't retain a | ||||||||
1890 | // reference to it and be well defined. Thus, we don't need to check for | ||||||||
1891 | // capture. | ||||||||
1892 | return true; | ||||||||
1893 | |||||||||
1894 | // For all other objects we need to know that the caller can't possibly | ||||||||
1895 | // have gotten a reference to the object. There are two components of | ||||||||
1896 | // that: | ||||||||
1897 | // 1) Object can't be escaped by this function. This is what | ||||||||
1898 | // PointerMayBeCaptured checks. | ||||||||
1899 | // 2) Object can't have been captured at definition site. For this, we | ||||||||
1900 | // need to know the return value is noalias. At the moment, we use a | ||||||||
1901 | // weaker condition and handle only AllocLikeFunctions (which are | ||||||||
1902 | // known to be noalias). TODO | ||||||||
1903 | return isAllocLikeFn(Object, TLI) && | ||||||||
1904 | !PointerMayBeCaptured(Object, true, true); | ||||||||
1905 | } | ||||||||
1906 | |||||||||
1907 | } // namespace | ||||||||
1908 | |||||||||
1909 | /// Try to promote memory values to scalars by sinking stores out of the | ||||||||
1910 | /// loop and moving loads to before the loop. We do this by looping over | ||||||||
1911 | /// the stores in the loop, looking for stores to Must pointers which are | ||||||||
1912 | /// loop invariant. | ||||||||
1913 | /// | ||||||||
1914 | bool llvm::promoteLoopAccessesToScalars( | ||||||||
1915 | const SmallSetVector<Value *, 8> &PointerMustAliases, | ||||||||
1916 | SmallVectorImpl<BasicBlock *> &ExitBlocks, | ||||||||
1917 | SmallVectorImpl<Instruction *> &InsertPts, | ||||||||
1918 | SmallVectorImpl<MemoryAccess *> &MSSAInsertPts, PredIteratorCache &PIC, | ||||||||
1919 | LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI, | ||||||||
1920 | Loop *CurLoop, AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU, | ||||||||
1921 | ICFLoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE) { | ||||||||
1922 | // Verify inputs. | ||||||||
1923 | assert(LI != nullptr && DT != nullptr && CurLoop != nullptr &&((LI != nullptr && DT != nullptr && CurLoop != nullptr && SafetyInfo != nullptr && "Unexpected Input to promoteLoopAccessesToScalars" ) ? static_cast<void> (0) : __assert_fail ("LI != nullptr && DT != nullptr && CurLoop != nullptr && SafetyInfo != nullptr && \"Unexpected Input to promoteLoopAccessesToScalars\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 1925, __PRETTY_FUNCTION__)) | ||||||||
1924 | SafetyInfo != nullptr &&((LI != nullptr && DT != nullptr && CurLoop != nullptr && SafetyInfo != nullptr && "Unexpected Input to promoteLoopAccessesToScalars" ) ? static_cast<void> (0) : __assert_fail ("LI != nullptr && DT != nullptr && CurLoop != nullptr && SafetyInfo != nullptr && \"Unexpected Input to promoteLoopAccessesToScalars\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 1925, __PRETTY_FUNCTION__)) | ||||||||
1925 | "Unexpected Input to promoteLoopAccessesToScalars")((LI != nullptr && DT != nullptr && CurLoop != nullptr && SafetyInfo != nullptr && "Unexpected Input to promoteLoopAccessesToScalars" ) ? static_cast<void> (0) : __assert_fail ("LI != nullptr && DT != nullptr && CurLoop != nullptr && SafetyInfo != nullptr && \"Unexpected Input to promoteLoopAccessesToScalars\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 1925, __PRETTY_FUNCTION__)); | ||||||||
1926 | |||||||||
1927 | Value *SomePtr = *PointerMustAliases.begin(); | ||||||||
1928 | BasicBlock *Preheader = CurLoop->getLoopPreheader(); | ||||||||
1929 | |||||||||
1930 | // It is not safe to promote a load/store from the loop if the load/store is | ||||||||
1931 | // conditional. For example, turning: | ||||||||
1932 | // | ||||||||
1933 | // for () { if (c) *P += 1; } | ||||||||
1934 | // | ||||||||
1935 | // into: | ||||||||
1936 | // | ||||||||
1937 | // tmp = *P; for () { if (c) tmp +=1; } *P = tmp; | ||||||||
1938 | // | ||||||||
1939 | // is not safe, because *P may only be valid to access if 'c' is true. | ||||||||
1940 | // | ||||||||
1941 | // The safety property divides into two parts: | ||||||||
1942 | // p1) The memory may not be dereferenceable on entry to the loop. In this | ||||||||
1943 | // case, we can't insert the required load in the preheader. | ||||||||
1944 | // p2) The memory model does not allow us to insert a store along any dynamic | ||||||||
1945 | // path which did not originally have one. | ||||||||
1946 | // | ||||||||
1947 | // If at least one store is guaranteed to execute, both properties are | ||||||||
1948 | // satisfied, and promotion is legal. | ||||||||
1949 | // | ||||||||
1950 | // This, however, is not a necessary condition. Even if no store/load is | ||||||||
1951 | // guaranteed to execute, we can still establish these properties. | ||||||||
1952 | // We can establish (p1) by proving that hoisting the load into the preheader | ||||||||
1953 | // is safe (i.e. proving dereferenceability on all paths through the loop). We | ||||||||
1954 | // can use any access within the alias set to prove dereferenceability, | ||||||||
1955 | // since they're all must alias. | ||||||||
1956 | // | ||||||||
1957 | // There are two ways establish (p2): | ||||||||
1958 | // a) Prove the location is thread-local. In this case the memory model | ||||||||
1959 | // requirement does not apply, and stores are safe to insert. | ||||||||
1960 | // b) Prove a store dominates every exit block. In this case, if an exit | ||||||||
1961 | // blocks is reached, the original dynamic path would have taken us through | ||||||||
1962 | // the store, so inserting a store into the exit block is safe. Note that this | ||||||||
1963 | // is different from the store being guaranteed to execute. For instance, | ||||||||
1964 | // if an exception is thrown on the first iteration of the loop, the original | ||||||||
1965 | // store is never executed, but the exit blocks are not executed either. | ||||||||
1966 | |||||||||
1967 | bool DereferenceableInPH = false; | ||||||||
1968 | bool SafeToInsertStore = false; | ||||||||
1969 | |||||||||
1970 | SmallVector<Instruction *, 64> LoopUses; | ||||||||
1971 | |||||||||
1972 | // We start with an alignment of one and try to find instructions that allow | ||||||||
1973 | // us to prove better alignment. | ||||||||
1974 | Align Alignment; | ||||||||
1975 | // Keep track of which types of access we see | ||||||||
1976 | bool SawUnorderedAtomic = false; | ||||||||
1977 | bool SawNotAtomic = false; | ||||||||
1978 | AAMDNodes AATags; | ||||||||
1979 | |||||||||
1980 | const DataLayout &MDL = Preheader->getModule()->getDataLayout(); | ||||||||
1981 | |||||||||
1982 | bool IsKnownThreadLocalObject = false; | ||||||||
1983 | if (SafetyInfo->anyBlockMayThrow()) { | ||||||||
1984 | // If a loop can throw, we have to insert a store along each unwind edge. | ||||||||
1985 | // That said, we can't actually make the unwind edge explicit. Therefore, | ||||||||
1986 | // we have to prove that the store is dead along the unwind edge. We do | ||||||||
1987 | // this by proving that the caller can't have a reference to the object | ||||||||
1988 | // after return and thus can't possibly load from the object. | ||||||||
1989 | Value *Object = getUnderlyingObject(SomePtr); | ||||||||
1990 | if (!isKnownNonEscaping(Object, TLI)) | ||||||||
1991 | return false; | ||||||||
1992 | // Subtlety: Alloca's aren't visible to callers, but *are* potentially | ||||||||
1993 | // visible to other threads if captured and used during their lifetimes. | ||||||||
1994 | IsKnownThreadLocalObject = !isa<AllocaInst>(Object); | ||||||||
1995 | } | ||||||||
1996 | |||||||||
1997 | // Check that all of the pointers in the alias set have the same type. We | ||||||||
1998 | // cannot (yet) promote a memory location that is loaded and stored in | ||||||||
1999 | // different sizes. While we are at it, collect alignment and AA info. | ||||||||
2000 | for (Value *ASIV : PointerMustAliases) { | ||||||||
2001 | // Check that all of the pointers in the alias set have the same type. We | ||||||||
2002 | // cannot (yet) promote a memory location that is loaded and stored in | ||||||||
2003 | // different sizes. | ||||||||
2004 | if (SomePtr->getType() != ASIV->getType()) | ||||||||
2005 | return false; | ||||||||
2006 | |||||||||
2007 | for (User *U : ASIV->users()) { | ||||||||
2008 | // Ignore instructions that are outside the loop. | ||||||||
2009 | Instruction *UI = dyn_cast<Instruction>(U); | ||||||||
2010 | if (!UI || !CurLoop->contains(UI)) | ||||||||
2011 | continue; | ||||||||
2012 | |||||||||
2013 | // If there is an non-load/store instruction in the loop, we can't promote | ||||||||
2014 | // it. | ||||||||
2015 | if (LoadInst *Load = dyn_cast<LoadInst>(UI)) { | ||||||||
2016 | if (!Load->isUnordered()) | ||||||||
2017 | return false; | ||||||||
2018 | |||||||||
2019 | SawUnorderedAtomic |= Load->isAtomic(); | ||||||||
2020 | SawNotAtomic |= !Load->isAtomic(); | ||||||||
2021 | |||||||||
2022 | Align InstAlignment = Load->getAlign(); | ||||||||
2023 | |||||||||
2024 | // Note that proving a load safe to speculate requires proving | ||||||||
2025 | // sufficient alignment at the target location. Proving it guaranteed | ||||||||
2026 | // to execute does as well. Thus we can increase our guaranteed | ||||||||
2027 | // alignment as well. | ||||||||
2028 | if (!DereferenceableInPH || (InstAlignment > Alignment)) | ||||||||
2029 | if (isSafeToExecuteUnconditionally(*Load, DT, CurLoop, SafetyInfo, | ||||||||
2030 | ORE, Preheader->getTerminator())) { | ||||||||
2031 | DereferenceableInPH = true; | ||||||||
2032 | Alignment = std::max(Alignment, InstAlignment); | ||||||||
2033 | } | ||||||||
2034 | } else if (const StoreInst *Store = dyn_cast<StoreInst>(UI)) { | ||||||||
2035 | // Stores *of* the pointer are not interesting, only stores *to* the | ||||||||
2036 | // pointer. | ||||||||
2037 | if (UI->getOperand(1) != ASIV) | ||||||||
2038 | continue; | ||||||||
2039 | if (!Store->isUnordered()) | ||||||||
2040 | return false; | ||||||||
2041 | |||||||||
2042 | SawUnorderedAtomic |= Store->isAtomic(); | ||||||||
2043 | SawNotAtomic |= !Store->isAtomic(); | ||||||||
2044 | |||||||||
2045 | // If the store is guaranteed to execute, both properties are satisfied. | ||||||||
2046 | // We may want to check if a store is guaranteed to execute even if we | ||||||||
2047 | // already know that promotion is safe, since it may have higher | ||||||||
2048 | // alignment than any other guaranteed stores, in which case we can | ||||||||
2049 | // raise the alignment on the promoted store. | ||||||||
2050 | Align InstAlignment = Store->getAlign(); | ||||||||
2051 | |||||||||
2052 | if (!DereferenceableInPH || !SafeToInsertStore || | ||||||||
2053 | (InstAlignment > Alignment)) { | ||||||||
2054 | if (SafetyInfo->isGuaranteedToExecute(*UI, DT, CurLoop)) { | ||||||||
2055 | DereferenceableInPH = true; | ||||||||
2056 | SafeToInsertStore = true; | ||||||||
2057 | Alignment = std::max(Alignment, InstAlignment); | ||||||||
2058 | } | ||||||||
2059 | } | ||||||||
2060 | |||||||||
2061 | // If a store dominates all exit blocks, it is safe to sink. | ||||||||
2062 | // As explained above, if an exit block was executed, a dominating | ||||||||
2063 | // store must have been executed at least once, so we are not | ||||||||
2064 | // introducing stores on paths that did not have them. | ||||||||
2065 | // Note that this only looks at explicit exit blocks. If we ever | ||||||||
2066 | // start sinking stores into unwind edges (see above), this will break. | ||||||||
2067 | if (!SafeToInsertStore) | ||||||||
2068 | SafeToInsertStore = llvm::all_of(ExitBlocks, [&](BasicBlock *Exit) { | ||||||||
2069 | return DT->dominates(Store->getParent(), Exit); | ||||||||
2070 | }); | ||||||||
2071 | |||||||||
2072 | // If the store is not guaranteed to execute, we may still get | ||||||||
2073 | // deref info through it. | ||||||||
2074 | if (!DereferenceableInPH) { | ||||||||
2075 | DereferenceableInPH = isDereferenceableAndAlignedPointer( | ||||||||
2076 | Store->getPointerOperand(), Store->getValueOperand()->getType(), | ||||||||
2077 | Store->getAlign(), MDL, Preheader->getTerminator(), DT); | ||||||||
2078 | } | ||||||||
2079 | } else | ||||||||
2080 | return false; // Not a load or store. | ||||||||
2081 | |||||||||
2082 | // Merge the AA tags. | ||||||||
2083 | if (LoopUses.empty()) { | ||||||||
2084 | // On the first load/store, just take its AA tags. | ||||||||
2085 | UI->getAAMetadata(AATags); | ||||||||
2086 | } else if (AATags) { | ||||||||
2087 | UI->getAAMetadata(AATags, /* Merge = */ true); | ||||||||
2088 | } | ||||||||
2089 | |||||||||
2090 | LoopUses.push_back(UI); | ||||||||
2091 | } | ||||||||
2092 | } | ||||||||
2093 | |||||||||
2094 | // If we found both an unordered atomic instruction and a non-atomic memory | ||||||||
2095 | // access, bail. We can't blindly promote non-atomic to atomic since we | ||||||||
2096 | // might not be able to lower the result. We can't downgrade since that | ||||||||
2097 | // would violate memory model. Also, align 0 is an error for atomics. | ||||||||
2098 | if (SawUnorderedAtomic && SawNotAtomic) | ||||||||
2099 | return false; | ||||||||
2100 | |||||||||
2101 | // If we're inserting an atomic load in the preheader, we must be able to | ||||||||
2102 | // lower it. We're only guaranteed to be able to lower naturally aligned | ||||||||
2103 | // atomics. | ||||||||
2104 | auto *SomePtrElemType = SomePtr->getType()->getPointerElementType(); | ||||||||
2105 | if (SawUnorderedAtomic && | ||||||||
2106 | Alignment < MDL.getTypeStoreSize(SomePtrElemType)) | ||||||||
2107 | return false; | ||||||||
2108 | |||||||||
2109 | // If we couldn't prove we can hoist the load, bail. | ||||||||
2110 | if (!DereferenceableInPH) | ||||||||
2111 | return false; | ||||||||
2112 | |||||||||
2113 | // We know we can hoist the load, but don't have a guaranteed store. | ||||||||
2114 | // Check whether the location is thread-local. If it is, then we can insert | ||||||||
2115 | // stores along paths which originally didn't have them without violating the | ||||||||
2116 | // memory model. | ||||||||
2117 | if (!SafeToInsertStore) { | ||||||||
2118 | if (IsKnownThreadLocalObject) | ||||||||
2119 | SafeToInsertStore = true; | ||||||||
2120 | else { | ||||||||
2121 | Value *Object = getUnderlyingObject(SomePtr); | ||||||||
2122 | SafeToInsertStore = | ||||||||
2123 | (isAllocLikeFn(Object, TLI) || isa<AllocaInst>(Object)) && | ||||||||
2124 | !PointerMayBeCaptured(Object, true, true); | ||||||||
2125 | } | ||||||||
2126 | } | ||||||||
2127 | |||||||||
2128 | // If we've still failed to prove we can sink the store, give up. | ||||||||
2129 | if (!SafeToInsertStore) | ||||||||
2130 | return false; | ||||||||
2131 | |||||||||
2132 | // Otherwise, this is safe to promote, lets do it! | ||||||||
2133 | LLVM_DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " << *SomePtrdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("licm")) { dbgs() << "LICM: Promoting value stored to in loop: " << *SomePtr << '\n'; } } while (false) | ||||||||
2134 | << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("licm")) { dbgs() << "LICM: Promoting value stored to in loop: " << *SomePtr << '\n'; } } while (false); | ||||||||
2135 | ORE->emit([&]() { | ||||||||
2136 | return OptimizationRemark(DEBUG_TYPE"licm", "PromoteLoopAccessesToScalar", | ||||||||
2137 | LoopUses[0]) | ||||||||
2138 | << "Moving accesses to memory location out of the loop"; | ||||||||
2139 | }); | ||||||||
2140 | ++NumPromoted; | ||||||||
2141 | |||||||||
2142 | // Look at all the loop uses, and try to merge their locations. | ||||||||
2143 | std::vector<const DILocation *> LoopUsesLocs; | ||||||||
2144 | for (auto U : LoopUses) | ||||||||
2145 | LoopUsesLocs.push_back(U->getDebugLoc().get()); | ||||||||
2146 | auto DL = DebugLoc(DILocation::getMergedLocations(LoopUsesLocs)); | ||||||||
2147 | |||||||||
2148 | // We use the SSAUpdater interface to insert phi nodes as required. | ||||||||
2149 | SmallVector<PHINode *, 16> NewPHIs; | ||||||||
2150 | SSAUpdater SSA(&NewPHIs); | ||||||||
2151 | LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks, | ||||||||
2152 | InsertPts, MSSAInsertPts, PIC, CurAST, MSSAU, *LI, DL, | ||||||||
2153 | Alignment.value(), SawUnorderedAtomic, AATags, | ||||||||
2154 | *SafetyInfo); | ||||||||
2155 | |||||||||
2156 | // Set up the preheader to have a definition of the value. It is the live-out | ||||||||
2157 | // value from the preheader that uses in the loop will use. | ||||||||
2158 | LoadInst *PreheaderLoad = new LoadInst( | ||||||||
2159 | SomePtr->getType()->getPointerElementType(), SomePtr, | ||||||||
2160 | SomePtr->getName() + ".promoted", Preheader->getTerminator()); | ||||||||
2161 | if (SawUnorderedAtomic) | ||||||||
2162 | PreheaderLoad->setOrdering(AtomicOrdering::Unordered); | ||||||||
2163 | PreheaderLoad->setAlignment(Alignment); | ||||||||
2164 | PreheaderLoad->setDebugLoc(DebugLoc()); | ||||||||
2165 | if (AATags) | ||||||||
2166 | PreheaderLoad->setAAMetadata(AATags); | ||||||||
2167 | SSA.AddAvailableValue(Preheader, PreheaderLoad); | ||||||||
2168 | |||||||||
2169 | if (MSSAU) { | ||||||||
2170 | MemoryAccess *PreheaderLoadMemoryAccess = MSSAU->createMemoryAccessInBB( | ||||||||
2171 | PreheaderLoad, nullptr, PreheaderLoad->getParent(), MemorySSA::End); | ||||||||
2172 | MemoryUse *NewMemUse = cast<MemoryUse>(PreheaderLoadMemoryAccess); | ||||||||
2173 | MSSAU->insertUse(NewMemUse, /*RenameUses=*/true); | ||||||||
2174 | } | ||||||||
2175 | |||||||||
2176 | if (MSSAU && VerifyMemorySSA) | ||||||||
2177 | MSSAU->getMemorySSA()->verifyMemorySSA(); | ||||||||
2178 | // Rewrite all the loads in the loop and remember all the definitions from | ||||||||
2179 | // stores in the loop. | ||||||||
2180 | Promoter.run(LoopUses); | ||||||||
2181 | |||||||||
2182 | if (MSSAU && VerifyMemorySSA) | ||||||||
2183 | MSSAU->getMemorySSA()->verifyMemorySSA(); | ||||||||
2184 | // If the SSAUpdater didn't use the load in the preheader, just zap it now. | ||||||||
2185 | if (PreheaderLoad->use_empty()) | ||||||||
2186 | eraseInstruction(*PreheaderLoad, *SafetyInfo, CurAST, MSSAU); | ||||||||
2187 | |||||||||
2188 | return true; | ||||||||
2189 | } | ||||||||
2190 | |||||||||
2191 | /// Returns an owning pointer to an alias set which incorporates aliasing info | ||||||||
2192 | /// from L and all subloops of L. | ||||||||
2193 | std::unique_ptr<AliasSetTracker> | ||||||||
2194 | LoopInvariantCodeMotion::collectAliasInfoForLoop(Loop *L, LoopInfo *LI, | ||||||||
2195 | AAResults *AA) { | ||||||||
2196 | auto CurAST = std::make_unique<AliasSetTracker>(*AA); | ||||||||
2197 | |||||||||
2198 | // Add everything from all the sub loops. | ||||||||
2199 | for (Loop *InnerL : L->getSubLoops()) | ||||||||
2200 | for (BasicBlock *BB : InnerL->blocks()) | ||||||||
2201 | CurAST->add(*BB); | ||||||||
2202 | |||||||||
2203 | // And merge in this loop (without anything from inner loops). | ||||||||
2204 | for (BasicBlock *BB : L->blocks()) | ||||||||
2205 | if (LI->getLoopFor(BB) == L) | ||||||||
2206 | CurAST->add(*BB); | ||||||||
2207 | |||||||||
2208 | return CurAST; | ||||||||
2209 | } | ||||||||
2210 | |||||||||
2211 | std::unique_ptr<AliasSetTracker> | ||||||||
2212 | LoopInvariantCodeMotion::collectAliasInfoForLoopWithMSSA( | ||||||||
2213 | Loop *L, AAResults *AA, MemorySSAUpdater *MSSAU) { | ||||||||
2214 | auto *MSSA = MSSAU->getMemorySSA(); | ||||||||
2215 | auto CurAST = std::make_unique<AliasSetTracker>(*AA, MSSA, L); | ||||||||
2216 | CurAST->addAllInstructionsInLoopUsingMSSA(); | ||||||||
2217 | return CurAST; | ||||||||
2218 | } | ||||||||
2219 | |||||||||
2220 | static bool pointerInvalidatedByLoop(MemoryLocation MemLoc, | ||||||||
2221 | AliasSetTracker *CurAST, Loop *CurLoop, | ||||||||
2222 | AAResults *AA) { | ||||||||
2223 | // First check to see if any of the basic blocks in CurLoop invalidate *V. | ||||||||
2224 | bool isInvalidatedAccordingToAST = CurAST->getAliasSetFor(MemLoc).isMod(); | ||||||||
2225 | |||||||||
2226 | if (!isInvalidatedAccordingToAST || !LICMN2Theshold) | ||||||||
2227 | return isInvalidatedAccordingToAST; | ||||||||
2228 | |||||||||
2229 | // Check with a diagnostic analysis if we can refine the information above. | ||||||||
2230 | // This is to identify the limitations of using the AST. | ||||||||
2231 | // The alias set mechanism used by LICM has a major weakness in that it | ||||||||
2232 | // combines all things which may alias into a single set *before* asking | ||||||||
2233 | // modref questions. As a result, a single readonly call within a loop will | ||||||||
2234 | // collapse all loads and stores into a single alias set and report | ||||||||
2235 | // invalidation if the loop contains any store. For example, readonly calls | ||||||||
2236 | // with deopt states have this form and create a general alias set with all | ||||||||
2237 | // loads and stores. In order to get any LICM in loops containing possible | ||||||||
2238 | // deopt states we need a more precise invalidation of checking the mod ref | ||||||||
2239 | // info of each instruction within the loop and LI. This has a complexity of | ||||||||
2240 | // O(N^2), so currently, it is used only as a diagnostic tool since the | ||||||||
2241 | // default value of LICMN2Threshold is zero. | ||||||||
2242 | |||||||||
2243 | // Don't look at nested loops. | ||||||||
2244 | if (CurLoop->begin() != CurLoop->end()) | ||||||||
2245 | return true; | ||||||||
2246 | |||||||||
2247 | int N = 0; | ||||||||
2248 | for (BasicBlock *BB : CurLoop->getBlocks()) | ||||||||
2249 | for (Instruction &I : *BB) { | ||||||||
2250 | if (N >= LICMN2Theshold) { | ||||||||
2251 | LLVM_DEBUG(dbgs() << "Alasing N2 threshold exhausted for "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("licm")) { dbgs() << "Alasing N2 threshold exhausted for " << *(MemLoc.Ptr) << "\n"; } } while (false) | ||||||||
2252 | << *(MemLoc.Ptr) << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("licm")) { dbgs() << "Alasing N2 threshold exhausted for " << *(MemLoc.Ptr) << "\n"; } } while (false); | ||||||||
2253 | return true; | ||||||||
2254 | } | ||||||||
2255 | N++; | ||||||||
2256 | auto Res = AA->getModRefInfo(&I, MemLoc); | ||||||||
2257 | if (isModSet(Res)) { | ||||||||
2258 | LLVM_DEBUG(dbgs() << "Aliasing failed on " << I << " for "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("licm")) { dbgs() << "Aliasing failed on " << I << " for " << *(MemLoc.Ptr) << "\n"; } } while (false ) | ||||||||
2259 | << *(MemLoc.Ptr) << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("licm")) { dbgs() << "Aliasing failed on " << I << " for " << *(MemLoc.Ptr) << "\n"; } } while (false ); | ||||||||
2260 | return true; | ||||||||
2261 | } | ||||||||
2262 | } | ||||||||
2263 | LLVM_DEBUG(dbgs() << "Aliasing okay for " << *(MemLoc.Ptr) << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("licm")) { dbgs() << "Aliasing okay for " << *(MemLoc .Ptr) << "\n"; } } while (false); | ||||||||
2264 | return false; | ||||||||
2265 | } | ||||||||
2266 | |||||||||
2267 | static bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU, | ||||||||
2268 | Loop *CurLoop, | ||||||||
2269 | SinkAndHoistLICMFlags &Flags) { | ||||||||
2270 | // For hoisting, use the walker to determine safety | ||||||||
2271 | if (!Flags.IsSink) { | ||||||||
2272 | MemoryAccess *Source; | ||||||||
2273 | // See declaration of SetLicmMssaOptCap for usage details. | ||||||||
2274 | if (Flags.LicmMssaOptCounter >= Flags.LicmMssaOptCap) | ||||||||
2275 | Source = MU->getDefiningAccess(); | ||||||||
2276 | else { | ||||||||
2277 | Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(MU); | ||||||||
2278 | Flags.LicmMssaOptCounter++; | ||||||||
2279 | } | ||||||||
2280 | return !MSSA->isLiveOnEntryDef(Source) && | ||||||||
2281 | CurLoop->contains(Source->getBlock()); | ||||||||
2282 | } | ||||||||
2283 | |||||||||
2284 | // For sinking, we'd need to check all Defs below this use. The getClobbering | ||||||||
2285 | // call will look on the backedge of the loop, but will check aliasing with | ||||||||
2286 | // the instructions on the previous iteration. | ||||||||
2287 | // For example: | ||||||||
2288 | // for (i ... ) | ||||||||
2289 | // load a[i] ( Use (LoE) | ||||||||
2290 | // store a[i] ( 1 = Def (2), with 2 = Phi for the loop. | ||||||||
2291 | // i++; | ||||||||
2292 | // The load sees no clobbering inside the loop, as the backedge alias check | ||||||||
2293 | // does phi translation, and will check aliasing against store a[i-1]. | ||||||||
2294 | // However sinking the load outside the loop, below the store is incorrect. | ||||||||
2295 | |||||||||
2296 | // For now, only sink if there are no Defs in the loop, and the existing ones | ||||||||
2297 | // precede the use and are in the same block. | ||||||||
2298 | // FIXME: Increase precision: Safe to sink if Use post dominates the Def; | ||||||||
2299 | // needs PostDominatorTreeAnalysis. | ||||||||
2300 | // FIXME: More precise: no Defs that alias this Use. | ||||||||
2301 | if (Flags.NoOfMemAccTooLarge) | ||||||||
2302 | return true; | ||||||||
2303 | for (auto *BB : CurLoop->getBlocks()) | ||||||||
2304 | if (auto *Accesses = MSSA->getBlockDefs(BB)) | ||||||||
2305 | for (const auto &MA : *Accesses) | ||||||||
2306 | if (const auto *MD = dyn_cast<MemoryDef>(&MA)) | ||||||||
2307 | if (MU->getBlock() != MD->getBlock() || | ||||||||
2308 | !MSSA->locallyDominates(MD, MU)) | ||||||||
2309 | return true; | ||||||||
2310 | return false; | ||||||||
2311 | } | ||||||||
2312 | |||||||||
2313 | /// Little predicate that returns true if the specified basic block is in | ||||||||
2314 | /// a subloop of the current one, not the current one itself. | ||||||||
2315 | /// | ||||||||
2316 | static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI) { | ||||||||
2317 | assert(CurLoop->contains(BB) && "Only valid if BB is IN the loop")((CurLoop->contains(BB) && "Only valid if BB is IN the loop" ) ? static_cast<void> (0) : __assert_fail ("CurLoop->contains(BB) && \"Only valid if BB is IN the loop\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/lib/Transforms/Scalar/LICM.cpp" , 2317, __PRETTY_FUNCTION__)); | ||||||||
2318 | return LI->getLoopFor(BB) != CurLoop; | ||||||||
2319 | } |
1 | //===- PatternMatch.h - Match on the LLVM IR --------------------*- C++ -*-===// | ||||||||||||||||
2 | // | ||||||||||||||||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||||||||||||||||
4 | // See https://llvm.org/LICENSE.txt for license information. | ||||||||||||||||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||||||||||||||||
6 | // | ||||||||||||||||
7 | //===----------------------------------------------------------------------===// | ||||||||||||||||
8 | // | ||||||||||||||||
9 | // This file provides a simple and efficient mechanism for performing general | ||||||||||||||||
10 | // tree-based pattern matches on the LLVM IR. The power of these routines is | ||||||||||||||||
11 | // that it allows you to write concise patterns that are expressive and easy to | ||||||||||||||||
12 | // understand. The other major advantage of this is that it allows you to | ||||||||||||||||
13 | // trivially capture/bind elements in the pattern to variables. For example, | ||||||||||||||||
14 | // you can do something like this: | ||||||||||||||||
15 | // | ||||||||||||||||
16 | // Value *Exp = ... | ||||||||||||||||
17 | // Value *X, *Y; ConstantInt *C1, *C2; // (X & C1) | (Y & C2) | ||||||||||||||||
18 | // if (match(Exp, m_Or(m_And(m_Value(X), m_ConstantInt(C1)), | ||||||||||||||||
19 | // m_And(m_Value(Y), m_ConstantInt(C2))))) { | ||||||||||||||||
20 | // ... Pattern is matched and variables are bound ... | ||||||||||||||||
21 | // } | ||||||||||||||||
22 | // | ||||||||||||||||
23 | // This is primarily useful to things like the instruction combiner, but can | ||||||||||||||||
24 | // also be useful for static analysis tools or code generators. | ||||||||||||||||
25 | // | ||||||||||||||||
26 | //===----------------------------------------------------------------------===// | ||||||||||||||||
27 | |||||||||||||||||
28 | #ifndef LLVM_IR_PATTERNMATCH_H | ||||||||||||||||
29 | #define LLVM_IR_PATTERNMATCH_H | ||||||||||||||||
30 | |||||||||||||||||
31 | #include "llvm/ADT/APFloat.h" | ||||||||||||||||
32 | #include "llvm/ADT/APInt.h" | ||||||||||||||||
33 | #include "llvm/IR/Constant.h" | ||||||||||||||||
34 | #include "llvm/IR/Constants.h" | ||||||||||||||||
35 | #include "llvm/IR/DataLayout.h" | ||||||||||||||||
36 | #include "llvm/IR/InstrTypes.h" | ||||||||||||||||
37 | #include "llvm/IR/Instruction.h" | ||||||||||||||||
38 | #include "llvm/IR/Instructions.h" | ||||||||||||||||
39 | #include "llvm/IR/IntrinsicInst.h" | ||||||||||||||||
40 | #include "llvm/IR/Intrinsics.h" | ||||||||||||||||
41 | #include "llvm/IR/Operator.h" | ||||||||||||||||
42 | #include "llvm/IR/Value.h" | ||||||||||||||||
43 | #include "llvm/Support/Casting.h" | ||||||||||||||||
44 | #include <cstdint> | ||||||||||||||||
45 | |||||||||||||||||
46 | namespace llvm { | ||||||||||||||||
47 | namespace PatternMatch { | ||||||||||||||||
48 | |||||||||||||||||
49 | template <typename Val, typename Pattern> bool match(Val *V, const Pattern &P) { | ||||||||||||||||
50 | return const_cast<Pattern &>(P).match(V); | ||||||||||||||||
51 | } | ||||||||||||||||
52 | |||||||||||||||||
53 | template <typename Pattern> bool match(ArrayRef<int> Mask, const Pattern &P) { | ||||||||||||||||
54 | return const_cast<Pattern &>(P).match(Mask); | ||||||||||||||||
55 | } | ||||||||||||||||
56 | |||||||||||||||||
57 | template <typename SubPattern_t> struct OneUse_match { | ||||||||||||||||
58 | SubPattern_t SubPattern; | ||||||||||||||||
59 | |||||||||||||||||
60 | OneUse_match(const SubPattern_t &SP) : SubPattern(SP) {} | ||||||||||||||||
61 | |||||||||||||||||
62 | template <typename OpTy> bool match(OpTy *V) { | ||||||||||||||||
63 | return V->hasOneUse() && SubPattern.match(V); | ||||||||||||||||
64 | } | ||||||||||||||||
65 | }; | ||||||||||||||||
66 | |||||||||||||||||
67 | template <typename T> inline OneUse_match<T> m_OneUse(const T &SubPattern) { | ||||||||||||||||
68 | return SubPattern; | ||||||||||||||||
69 | } | ||||||||||||||||
70 | |||||||||||||||||
71 | template <typename Class> struct class_match { | ||||||||||||||||
72 | template <typename ITy> bool match(ITy *V) { return isa<Class>(V); } | ||||||||||||||||
73 | }; | ||||||||||||||||
74 | |||||||||||||||||
75 | /// Match an arbitrary value and ignore it. | ||||||||||||||||
76 | inline class_match<Value> m_Value() { return class_match<Value>(); } | ||||||||||||||||
77 | |||||||||||||||||
78 | /// Match an arbitrary unary operation and ignore it. | ||||||||||||||||
79 | inline class_match<UnaryOperator> m_UnOp() { | ||||||||||||||||
80 | return class_match<UnaryOperator>(); | ||||||||||||||||
81 | } | ||||||||||||||||
82 | |||||||||||||||||
83 | /// Match an arbitrary binary operation and ignore it. | ||||||||||||||||
84 | inline class_match<BinaryOperator> m_BinOp() { | ||||||||||||||||
85 | return class_match<BinaryOperator>(); | ||||||||||||||||
86 | } | ||||||||||||||||
87 | |||||||||||||||||
88 | /// Matches any compare instruction and ignore it. | ||||||||||||||||
89 | inline class_match<CmpInst> m_Cmp() { return class_match<CmpInst>(); } | ||||||||||||||||
90 | |||||||||||||||||
91 | /// Match an arbitrary ConstantInt and ignore it. | ||||||||||||||||
92 | inline class_match<ConstantInt> m_ConstantInt() { | ||||||||||||||||
93 | return class_match<ConstantInt>(); | ||||||||||||||||
94 | } | ||||||||||||||||
95 | |||||||||||||||||
96 | /// Match an arbitrary undef constant. | ||||||||||||||||
97 | inline class_match<UndefValue> m_Undef() { return class_match<UndefValue>(); } | ||||||||||||||||
98 | |||||||||||||||||
99 | /// Match an arbitrary Constant and ignore it. | ||||||||||||||||
100 | inline class_match<Constant> m_Constant() { return class_match<Constant>(); } | ||||||||||||||||
101 | |||||||||||||||||
102 | /// Match an arbitrary basic block value and ignore it. | ||||||||||||||||
103 | inline class_match<BasicBlock> m_BasicBlock() { | ||||||||||||||||
104 | return class_match<BasicBlock>(); | ||||||||||||||||
105 | } | ||||||||||||||||
106 | |||||||||||||||||
107 | /// Inverting matcher | ||||||||||||||||
108 | template <typename Ty> struct match_unless { | ||||||||||||||||
109 | Ty M; | ||||||||||||||||
110 | |||||||||||||||||
111 | match_unless(const Ty &Matcher) : M(Matcher) {} | ||||||||||||||||
112 | |||||||||||||||||
113 | template <typename ITy> bool match(ITy *V) { return !M.match(V); } | ||||||||||||||||
114 | }; | ||||||||||||||||
115 | |||||||||||||||||
116 | /// Match if the inner matcher does *NOT* match. | ||||||||||||||||
117 | template <typename Ty> inline match_unless<Ty> m_Unless(const Ty &M) { | ||||||||||||||||
118 | return match_unless<Ty>(M); | ||||||||||||||||
119 | } | ||||||||||||||||
120 | |||||||||||||||||
121 | /// Matching combinators | ||||||||||||||||
122 | template <typename LTy, typename RTy> struct match_combine_or { | ||||||||||||||||
123 | LTy L; | ||||||||||||||||
124 | RTy R; | ||||||||||||||||
125 | |||||||||||||||||
126 | match_combine_or(const LTy &Left, const RTy &Right) : L(Left), R(Right) {} | ||||||||||||||||
127 | |||||||||||||||||
128 | template <typename ITy> bool match(ITy *V) { | ||||||||||||||||
129 | if (L.match(V)) | ||||||||||||||||
130 | return true; | ||||||||||||||||
131 | if (R.match(V)) | ||||||||||||||||
132 | return true; | ||||||||||||||||
133 | return false; | ||||||||||||||||
134 | } | ||||||||||||||||
135 | }; | ||||||||||||||||
136 | |||||||||||||||||
137 | template <typename LTy, typename RTy> struct match_combine_and { | ||||||||||||||||
138 | LTy L; | ||||||||||||||||
139 | RTy R; | ||||||||||||||||
140 | |||||||||||||||||
141 | match_combine_and(const LTy &Left, const RTy &Right) : L(Left), R(Right) {} | ||||||||||||||||
142 | |||||||||||||||||
143 | template <typename ITy> bool match(ITy *V) { | ||||||||||||||||
144 | if (L.match(V)) | ||||||||||||||||
145 | if (R.match(V)) | ||||||||||||||||
146 | return true; | ||||||||||||||||
147 | return false; | ||||||||||||||||
148 | } | ||||||||||||||||
149 | }; | ||||||||||||||||
150 | |||||||||||||||||
151 | /// Combine two pattern matchers matching L || R | ||||||||||||||||
152 | template <typename LTy, typename RTy> | ||||||||||||||||
153 | inline match_combine_or<LTy, RTy> m_CombineOr(const LTy &L, const RTy &R) { | ||||||||||||||||
154 | return match_combine_or<LTy, RTy>(L, R); | ||||||||||||||||
155 | } | ||||||||||||||||
156 | |||||||||||||||||
157 | /// Combine two pattern matchers matching L && R | ||||||||||||||||
158 | template <typename LTy, typename RTy> | ||||||||||||||||
159 | inline match_combine_and<LTy, RTy> m_CombineAnd(const LTy &L, const RTy &R) { | ||||||||||||||||
160 | return match_combine_and<LTy, RTy>(L, R); | ||||||||||||||||
161 | } | ||||||||||||||||
162 | |||||||||||||||||
163 | struct apint_match { | ||||||||||||||||
164 | const APInt *&Res; | ||||||||||||||||
165 | bool AllowUndef; | ||||||||||||||||
166 | |||||||||||||||||
167 | apint_match(const APInt *&Res, bool AllowUndef) | ||||||||||||||||
168 | : Res(Res), AllowUndef(AllowUndef) {} | ||||||||||||||||
169 | |||||||||||||||||
170 | template <typename ITy> bool match(ITy *V) { | ||||||||||||||||
171 | if (auto *CI = dyn_cast<ConstantInt>(V)) { | ||||||||||||||||
172 | Res = &CI->getValue(); | ||||||||||||||||
173 | return true; | ||||||||||||||||
174 | } | ||||||||||||||||
175 | if (V->getType()->isVectorTy()) | ||||||||||||||||
176 | if (const auto *C = dyn_cast<Constant>(V)) | ||||||||||||||||
177 | if (auto *CI = dyn_cast_or_null<ConstantInt>( | ||||||||||||||||
178 | C->getSplatValue(AllowUndef))) { | ||||||||||||||||
179 | Res = &CI->getValue(); | ||||||||||||||||
180 | return true; | ||||||||||||||||
181 | } | ||||||||||||||||
182 | return false; | ||||||||||||||||
183 | } | ||||||||||||||||
184 | }; | ||||||||||||||||
185 | // Either constexpr if or renaming ConstantFP::getValueAPF to | ||||||||||||||||
186 | // ConstantFP::getValue is needed to do it via single template | ||||||||||||||||
187 | // function for both apint/apfloat. | ||||||||||||||||
188 | struct apfloat_match { | ||||||||||||||||
189 | const APFloat *&Res; | ||||||||||||||||
190 | bool AllowUndef; | ||||||||||||||||
191 | |||||||||||||||||
192 | apfloat_match(const APFloat *&Res, bool AllowUndef) | ||||||||||||||||
193 | : Res(Res), AllowUndef(AllowUndef) {} | ||||||||||||||||
194 | |||||||||||||||||
195 | template <typename ITy> bool match(ITy *V) { | ||||||||||||||||
196 | if (auto *CI = dyn_cast<ConstantFP>(V)) { | ||||||||||||||||
197 | Res = &CI->getValueAPF(); | ||||||||||||||||
198 | return true; | ||||||||||||||||
199 | } | ||||||||||||||||
200 | if (V->getType()->isVectorTy()) | ||||||||||||||||
201 | if (const auto *C = dyn_cast<Constant>(V)) | ||||||||||||||||
202 | if (auto *CI = dyn_cast_or_null<ConstantFP>( | ||||||||||||||||
203 | C->getSplatValue(AllowUndef))) { | ||||||||||||||||
204 | Res = &CI->getValueAPF(); | ||||||||||||||||
205 | return true; | ||||||||||||||||
206 | } | ||||||||||||||||
207 | return false; | ||||||||||||||||
208 | } | ||||||||||||||||
209 | }; | ||||||||||||||||
210 | |||||||||||||||||
211 | /// Match a ConstantInt or splatted ConstantVector, binding the | ||||||||||||||||
212 | /// specified pointer to the contained APInt. | ||||||||||||||||
213 | inline apint_match m_APInt(const APInt *&Res) { | ||||||||||||||||
214 | // Forbid undefs by default to maintain previous behavior. | ||||||||||||||||
215 | return apint_match(Res, /* AllowUndef */ false); | ||||||||||||||||
216 | } | ||||||||||||||||
217 | |||||||||||||||||
218 | /// Match APInt while allowing undefs in splat vector constants. | ||||||||||||||||
219 | inline apint_match m_APIntAllowUndef(const APInt *&Res) { | ||||||||||||||||
220 | return apint_match(Res, /* AllowUndef */ true); | ||||||||||||||||
221 | } | ||||||||||||||||
222 | |||||||||||||||||
223 | /// Match APInt while forbidding undefs in splat vector constants. | ||||||||||||||||
224 | inline apint_match m_APIntForbidUndef(const APInt *&Res) { | ||||||||||||||||
225 | return apint_match(Res, /* AllowUndef */ false); | ||||||||||||||||
226 | } | ||||||||||||||||
227 | |||||||||||||||||
228 | /// Match a ConstantFP or splatted ConstantVector, binding the | ||||||||||||||||
229 | /// specified pointer to the contained APFloat. | ||||||||||||||||
230 | inline apfloat_match m_APFloat(const APFloat *&Res) { | ||||||||||||||||
231 | // Forbid undefs by default to maintain previous behavior. | ||||||||||||||||
232 | return apfloat_match(Res, /* AllowUndef */ false); | ||||||||||||||||
233 | } | ||||||||||||||||
234 | |||||||||||||||||
235 | /// Match APFloat while allowing undefs in splat vector constants. | ||||||||||||||||
236 | inline apfloat_match m_APFloatAllowUndef(const APFloat *&Res) { | ||||||||||||||||
237 | return apfloat_match(Res, /* AllowUndef */ true); | ||||||||||||||||
238 | } | ||||||||||||||||
239 | |||||||||||||||||
240 | /// Match APFloat while forbidding undefs in splat vector constants. | ||||||||||||||||
241 | inline apfloat_match m_APFloatForbidUndef(const APFloat *&Res) { | ||||||||||||||||
242 | return apfloat_match(Res, /* AllowUndef */ false); | ||||||||||||||||
243 | } | ||||||||||||||||
244 | |||||||||||||||||
245 | template <int64_t Val> struct constantint_match { | ||||||||||||||||
246 | template <typename ITy> bool match(ITy *V) { | ||||||||||||||||
247 | if (const auto *CI = dyn_cast<ConstantInt>(V)) { | ||||||||||||||||
248 | const APInt &CIV = CI->getValue(); | ||||||||||||||||
249 | if (Val >= 0) | ||||||||||||||||
250 | return CIV == static_cast<uint64_t>(Val); | ||||||||||||||||
251 | // If Val is negative, and CI is shorter than it, truncate to the right | ||||||||||||||||
252 | // number of bits. If it is larger, then we have to sign extend. Just | ||||||||||||||||
253 | // compare their negated values. | ||||||||||||||||
254 | return -CIV == -Val; | ||||||||||||||||
255 | } | ||||||||||||||||
256 | return false; | ||||||||||||||||
257 | } | ||||||||||||||||
258 | }; | ||||||||||||||||
259 | |||||||||||||||||
260 | /// Match a ConstantInt with a specific value. | ||||||||||||||||
261 | template <int64_t Val> inline constantint_match<Val> m_ConstantInt() { | ||||||||||||||||
262 | return constantint_match<Val>(); | ||||||||||||||||
263 | } | ||||||||||||||||
264 | |||||||||||||||||
265 | /// This helper class is used to match constant scalars, vector splats, | ||||||||||||||||
266 | /// and fixed width vectors that satisfy a specified predicate. | ||||||||||||||||
267 | /// For fixed width vector constants, undefined elements are ignored. | ||||||||||||||||
268 | template <typename Predicate, typename ConstantVal> | ||||||||||||||||
269 | struct cstval_pred_ty : public Predicate { | ||||||||||||||||
270 | template <typename ITy> bool match(ITy *V) { | ||||||||||||||||
271 | if (const auto *CV = dyn_cast<ConstantVal>(V)) | ||||||||||||||||
272 | return this->isValue(CV->getValue()); | ||||||||||||||||
273 | if (const auto *VTy = dyn_cast<VectorType>(V->getType())) { | ||||||||||||||||
274 | if (const auto *C = dyn_cast<Constant>(V)) { | ||||||||||||||||
275 | if (const auto *CV = dyn_cast_or_null<ConstantVal>(C->getSplatValue())) | ||||||||||||||||
276 | return this->isValue(CV->getValue()); | ||||||||||||||||
277 | |||||||||||||||||
278 | // Number of elements of a scalable vector unknown at compile time | ||||||||||||||||
279 | auto *FVTy = dyn_cast<FixedVectorType>(VTy); | ||||||||||||||||
280 | if (!FVTy) | ||||||||||||||||
281 | return false; | ||||||||||||||||
282 | |||||||||||||||||
283 | // Non-splat vector constant: check each element for a match. | ||||||||||||||||
284 | unsigned NumElts = FVTy->getNumElements(); | ||||||||||||||||
285 | assert(NumElts != 0 && "Constant vector with no elements?")((NumElts != 0 && "Constant vector with no elements?" ) ? static_cast<void> (0) : __assert_fail ("NumElts != 0 && \"Constant vector with no elements?\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/include/llvm/IR/PatternMatch.h" , 285, __PRETTY_FUNCTION__)); | ||||||||||||||||
286 | bool HasNonUndefElements = false; | ||||||||||||||||
287 | for (unsigned i = 0; i != NumElts; ++i) { | ||||||||||||||||
288 | Constant *Elt = C->getAggregateElement(i); | ||||||||||||||||
289 | if (!Elt) | ||||||||||||||||
290 | return false; | ||||||||||||||||
291 | if (isa<UndefValue>(Elt)) | ||||||||||||||||
292 | continue; | ||||||||||||||||
293 | auto *CV = dyn_cast<ConstantVal>(Elt); | ||||||||||||||||
294 | if (!CV || !this->isValue(CV->getValue())) | ||||||||||||||||
295 | return false; | ||||||||||||||||
296 | HasNonUndefElements = true; | ||||||||||||||||
297 | } | ||||||||||||||||
298 | return HasNonUndefElements; | ||||||||||||||||
299 | } | ||||||||||||||||
300 | } | ||||||||||||||||
301 | return false; | ||||||||||||||||
302 | } | ||||||||||||||||
303 | }; | ||||||||||||||||
304 | |||||||||||||||||
305 | /// specialization of cstval_pred_ty for ConstantInt | ||||||||||||||||
306 | template <typename Predicate> | ||||||||||||||||
307 | using cst_pred_ty = cstval_pred_ty<Predicate, ConstantInt>; | ||||||||||||||||
308 | |||||||||||||||||
309 | /// specialization of cstval_pred_ty for ConstantFP | ||||||||||||||||
310 | template <typename Predicate> | ||||||||||||||||
311 | using cstfp_pred_ty = cstval_pred_ty<Predicate, ConstantFP>; | ||||||||||||||||
312 | |||||||||||||||||
313 | /// This helper class is used to match scalar and vector constants that | ||||||||||||||||
314 | /// satisfy a specified predicate, and bind them to an APInt. | ||||||||||||||||
315 | template <typename Predicate> struct api_pred_ty : public Predicate { | ||||||||||||||||
316 | const APInt *&Res; | ||||||||||||||||
317 | |||||||||||||||||
318 | api_pred_ty(const APInt *&R) : Res(R) {} | ||||||||||||||||
319 | |||||||||||||||||
320 | template <typename ITy> bool match(ITy *V) { | ||||||||||||||||
321 | if (const auto *CI = dyn_cast<ConstantInt>(V)) | ||||||||||||||||
322 | if (this->isValue(CI->getValue())) { | ||||||||||||||||
323 | Res = &CI->getValue(); | ||||||||||||||||
324 | return true; | ||||||||||||||||
325 | } | ||||||||||||||||
326 | if (V->getType()->isVectorTy()) | ||||||||||||||||
327 | if (const auto *C = dyn_cast<Constant>(V)) | ||||||||||||||||
328 | if (auto *CI = dyn_cast_or_null<ConstantInt>(C->getSplatValue())) | ||||||||||||||||
329 | if (this->isValue(CI->getValue())) { | ||||||||||||||||
330 | Res = &CI->getValue(); | ||||||||||||||||
331 | return true; | ||||||||||||||||
332 | } | ||||||||||||||||
333 | |||||||||||||||||
334 | return false; | ||||||||||||||||
335 | } | ||||||||||||||||
336 | }; | ||||||||||||||||
337 | |||||||||||||||||
338 | /// This helper class is used to match scalar and vector constants that | ||||||||||||||||
339 | /// satisfy a specified predicate, and bind them to an APFloat. | ||||||||||||||||
340 | /// Undefs are allowed in splat vector constants. | ||||||||||||||||
341 | template <typename Predicate> struct apf_pred_ty : public Predicate { | ||||||||||||||||
342 | const APFloat *&Res; | ||||||||||||||||
343 | |||||||||||||||||
344 | apf_pred_ty(const APFloat *&R) : Res(R) {} | ||||||||||||||||
345 | |||||||||||||||||
346 | template <typename ITy> bool match(ITy *V) { | ||||||||||||||||
347 | if (const auto *CI = dyn_cast<ConstantFP>(V)) | ||||||||||||||||
348 | if (this->isValue(CI->getValue())) { | ||||||||||||||||
349 | Res = &CI->getValue(); | ||||||||||||||||
350 | return true; | ||||||||||||||||
351 | } | ||||||||||||||||
352 | if (V->getType()->isVectorTy()) | ||||||||||||||||
353 | if (const auto *C = dyn_cast<Constant>(V)) | ||||||||||||||||
354 | if (auto *CI = dyn_cast_or_null<ConstantFP>( | ||||||||||||||||
355 | C->getSplatValue(/* AllowUndef */ true))) | ||||||||||||||||
356 | if (this->isValue(CI->getValue())) { | ||||||||||||||||
357 | Res = &CI->getValue(); | ||||||||||||||||
358 | return true; | ||||||||||||||||
359 | } | ||||||||||||||||
360 | |||||||||||||||||
361 | return false; | ||||||||||||||||
362 | } | ||||||||||||||||
363 | }; | ||||||||||||||||
364 | |||||||||||||||||
365 | /////////////////////////////////////////////////////////////////////////////// | ||||||||||||||||
366 | // | ||||||||||||||||
367 | // Encapsulate constant value queries for use in templated predicate matchers. | ||||||||||||||||
368 | // This allows checking if constants match using compound predicates and works | ||||||||||||||||
369 | // with vector constants, possibly with relaxed constraints. For example, ignore | ||||||||||||||||
370 | // undef values. | ||||||||||||||||
371 | // | ||||||||||||||||
372 | /////////////////////////////////////////////////////////////////////////////// | ||||||||||||||||
373 | |||||||||||||||||
374 | struct is_any_apint { | ||||||||||||||||
375 | bool isValue(const APInt &C) { return true; } | ||||||||||||||||
376 | }; | ||||||||||||||||
377 | /// Match an integer or vector with any integral constant. | ||||||||||||||||
378 | /// For vectors, this includes constants with undefined elements. | ||||||||||||||||
379 | inline cst_pred_ty<is_any_apint> m_AnyIntegralConstant() { | ||||||||||||||||
380 | return cst_pred_ty<is_any_apint>(); | ||||||||||||||||
381 | } | ||||||||||||||||
382 | |||||||||||||||||
383 | struct is_all_ones { | ||||||||||||||||
384 | bool isValue(const APInt &C) { return C.isAllOnesValue(); } | ||||||||||||||||
385 | }; | ||||||||||||||||
386 | /// Match an integer or vector with all bits set. | ||||||||||||||||
387 | /// For vectors, this includes constants with undefined elements. | ||||||||||||||||
388 | inline cst_pred_ty<is_all_ones> m_AllOnes() { | ||||||||||||||||
389 | return cst_pred_ty<is_all_ones>(); | ||||||||||||||||
390 | } | ||||||||||||||||
391 | |||||||||||||||||
392 | struct is_maxsignedvalue { | ||||||||||||||||
393 | bool isValue(const APInt &C) { return C.isMaxSignedValue(); } | ||||||||||||||||
394 | }; | ||||||||||||||||
395 | /// Match an integer or vector with values having all bits except for the high | ||||||||||||||||
396 | /// bit set (0x7f...). | ||||||||||||||||
397 | /// For vectors, this includes constants with undefined elements. | ||||||||||||||||
398 | inline cst_pred_ty<is_maxsignedvalue> m_MaxSignedValue() { | ||||||||||||||||
399 | return cst_pred_ty<is_maxsignedvalue>(); | ||||||||||||||||
400 | } | ||||||||||||||||
401 | inline api_pred_ty<is_maxsignedvalue> m_MaxSignedValue(const APInt *&V) { | ||||||||||||||||
402 | return V; | ||||||||||||||||
403 | } | ||||||||||||||||
404 | |||||||||||||||||
405 | struct is_negative { | ||||||||||||||||
406 | bool isValue(const APInt &C) { return C.isNegative(); } | ||||||||||||||||
407 | }; | ||||||||||||||||
408 | /// Match an integer or vector of negative values. | ||||||||||||||||
409 | /// For vectors, this includes constants with undefined elements. | ||||||||||||||||
410 | inline cst_pred_ty<is_negative> m_Negative() { | ||||||||||||||||
411 | return cst_pred_ty<is_negative>(); | ||||||||||||||||
412 | } | ||||||||||||||||
413 | inline api_pred_ty<is_negative> m_Negative(const APInt *&V) { | ||||||||||||||||
414 | return V; | ||||||||||||||||
415 | } | ||||||||||||||||
416 | |||||||||||||||||
417 | struct is_nonnegative { | ||||||||||||||||
418 | bool isValue(const APInt &C) { return C.isNonNegative(); } | ||||||||||||||||
419 | }; | ||||||||||||||||
420 | /// Match an integer or vector of non-negative values. | ||||||||||||||||
421 | /// For vectors, this includes constants with undefined elements. | ||||||||||||||||
422 | inline cst_pred_ty<is_nonnegative> m_NonNegative() { | ||||||||||||||||
423 | return cst_pred_ty<is_nonnegative>(); | ||||||||||||||||
424 | } | ||||||||||||||||
425 | inline api_pred_ty<is_nonnegative> m_NonNegative(const APInt *&V) { | ||||||||||||||||
426 | return V; | ||||||||||||||||
427 | } | ||||||||||||||||
428 | |||||||||||||||||
429 | struct is_strictlypositive { | ||||||||||||||||
430 | bool isValue(const APInt &C) { return C.isStrictlyPositive(); } | ||||||||||||||||
431 | }; | ||||||||||||||||
432 | /// Match an integer or vector of strictly positive values. | ||||||||||||||||
433 | /// For vectors, this includes constants with undefined elements. | ||||||||||||||||
434 | inline cst_pred_ty<is_strictlypositive> m_StrictlyPositive() { | ||||||||||||||||
435 | return cst_pred_ty<is_strictlypositive>(); | ||||||||||||||||
436 | } | ||||||||||||||||
437 | inline api_pred_ty<is_strictlypositive> m_StrictlyPositive(const APInt *&V) { | ||||||||||||||||
438 | return V; | ||||||||||||||||
439 | } | ||||||||||||||||
440 | |||||||||||||||||
441 | struct is_nonpositive { | ||||||||||||||||
442 | bool isValue(const APInt &C) { return C.isNonPositive(); } | ||||||||||||||||
443 | }; | ||||||||||||||||
444 | /// Match an integer or vector of non-positive values. | ||||||||||||||||
445 | /// For vectors, this includes constants with undefined elements. | ||||||||||||||||
446 | inline cst_pred_ty<is_nonpositive> m_NonPositive() { | ||||||||||||||||
447 | return cst_pred_ty<is_nonpositive>(); | ||||||||||||||||
448 | } | ||||||||||||||||
449 | inline api_pred_ty<is_nonpositive> m_NonPositive(const APInt *&V) { return V; } | ||||||||||||||||
450 | |||||||||||||||||
451 | struct is_one { | ||||||||||||||||
452 | bool isValue(const APInt &C) { return C.isOneValue(); } | ||||||||||||||||
453 | }; | ||||||||||||||||
454 | /// Match an integer 1 or a vector with all elements equal to 1. | ||||||||||||||||
455 | /// For vectors, this includes constants with undefined elements. | ||||||||||||||||
456 | inline cst_pred_ty<is_one> m_One() { | ||||||||||||||||
457 | return cst_pred_ty<is_one>(); | ||||||||||||||||
458 | } | ||||||||||||||||
459 | |||||||||||||||||
460 | struct is_zero_int { | ||||||||||||||||
461 | bool isValue(const APInt &C) { return C.isNullValue(); } | ||||||||||||||||
462 | }; | ||||||||||||||||
463 | /// Match an integer 0 or a vector with all elements equal to 0. | ||||||||||||||||
464 | /// For vectors, this includes constants with undefined elements. | ||||||||||||||||
465 | inline cst_pred_ty<is_zero_int> m_ZeroInt() { | ||||||||||||||||
466 | return cst_pred_ty<is_zero_int>(); | ||||||||||||||||
467 | } | ||||||||||||||||
468 | |||||||||||||||||
469 | struct is_zero { | ||||||||||||||||
470 | template <typename ITy> bool match(ITy *V) { | ||||||||||||||||
471 | auto *C = dyn_cast<Constant>(V); | ||||||||||||||||
472 | // FIXME: this should be able to do something for scalable vectors | ||||||||||||||||
473 | return C && (C->isNullValue() || cst_pred_ty<is_zero_int>().match(C)); | ||||||||||||||||
474 | } | ||||||||||||||||
475 | }; | ||||||||||||||||
476 | /// Match any null constant or a vector with all elements equal to 0. | ||||||||||||||||
477 | /// For vectors, this includes constants with undefined elements. | ||||||||||||||||
478 | inline is_zero m_Zero() { | ||||||||||||||||
479 | return is_zero(); | ||||||||||||||||
480 | } | ||||||||||||||||
481 | |||||||||||||||||
482 | struct is_power2 { | ||||||||||||||||
483 | bool isValue(const APInt &C) { return C.isPowerOf2(); } | ||||||||||||||||
484 | }; | ||||||||||||||||
485 | /// Match an integer or vector power-of-2. | ||||||||||||||||
486 | /// For vectors, this includes constants with undefined elements. | ||||||||||||||||
487 | inline cst_pred_ty<is_power2> m_Power2() { | ||||||||||||||||
488 | return cst_pred_ty<is_power2>(); | ||||||||||||||||
489 | } | ||||||||||||||||
490 | inline api_pred_ty<is_power2> m_Power2(const APInt *&V) { | ||||||||||||||||
491 | return V; | ||||||||||||||||
492 | } | ||||||||||||||||
493 | |||||||||||||||||
494 | struct is_negated_power2 { | ||||||||||||||||
495 | bool isValue(const APInt &C) { return (-C).isPowerOf2(); } | ||||||||||||||||
496 | }; | ||||||||||||||||
497 | /// Match a integer or vector negated power-of-2. | ||||||||||||||||
498 | /// For vectors, this includes constants with undefined elements. | ||||||||||||||||
499 | inline cst_pred_ty<is_negated_power2> m_NegatedPower2() { | ||||||||||||||||
500 | return cst_pred_ty<is_negated_power2>(); | ||||||||||||||||
501 | } | ||||||||||||||||
502 | inline api_pred_ty<is_negated_power2> m_NegatedPower2(const APInt *&V) { | ||||||||||||||||
503 | return V; | ||||||||||||||||
504 | } | ||||||||||||||||
505 | |||||||||||||||||
506 | struct is_power2_or_zero { | ||||||||||||||||
507 | bool isValue(const APInt &C) { return !C || C.isPowerOf2(); } | ||||||||||||||||
508 | }; | ||||||||||||||||
509 | /// Match an integer or vector of 0 or power-of-2 values. | ||||||||||||||||
510 | /// For vectors, this includes constants with undefined elements. | ||||||||||||||||
511 | inline cst_pred_ty<is_power2_or_zero> m_Power2OrZero() { | ||||||||||||||||
512 | return cst_pred_ty<is_power2_or_zero>(); | ||||||||||||||||
513 | } | ||||||||||||||||
514 | inline api_pred_ty<is_power2_or_zero> m_Power2OrZero(const APInt *&V) { | ||||||||||||||||
515 | return V; | ||||||||||||||||
516 | } | ||||||||||||||||
517 | |||||||||||||||||
518 | struct is_sign_mask { | ||||||||||||||||
519 | bool isValue(const APInt &C) { return C.isSignMask(); } | ||||||||||||||||
520 | }; | ||||||||||||||||
521 | /// Match an integer or vector with only the sign bit(s) set. | ||||||||||||||||
522 | /// For vectors, this includes constants with undefined elements. | ||||||||||||||||
523 | inline cst_pred_ty<is_sign_mask> m_SignMask() { | ||||||||||||||||
524 | return cst_pred_ty<is_sign_mask>(); | ||||||||||||||||
525 | } | ||||||||||||||||
526 | |||||||||||||||||
527 | struct is_lowbit_mask { | ||||||||||||||||
528 | bool isValue(const APInt &C) { return C.isMask(); } | ||||||||||||||||
529 | }; | ||||||||||||||||
530 | /// Match an integer or vector with only the low bit(s) set. | ||||||||||||||||
531 | /// For vectors, this includes constants with undefined elements. | ||||||||||||||||
532 | inline cst_pred_ty<is_lowbit_mask> m_LowBitMask() { | ||||||||||||||||
533 | return cst_pred_ty<is_lowbit_mask>(); | ||||||||||||||||
534 | } | ||||||||||||||||
535 | |||||||||||||||||
536 | struct icmp_pred_with_threshold { | ||||||||||||||||
537 | ICmpInst::Predicate Pred; | ||||||||||||||||
538 | const APInt *Thr; | ||||||||||||||||
539 | bool isValue(const APInt &C) { | ||||||||||||||||
540 | switch (Pred) { | ||||||||||||||||
541 | case ICmpInst::Predicate::ICMP_EQ: | ||||||||||||||||
542 | return C.eq(*Thr); | ||||||||||||||||
543 | case ICmpInst::Predicate::ICMP_NE: | ||||||||||||||||
544 | return C.ne(*Thr); | ||||||||||||||||
545 | case ICmpInst::Predicate::ICMP_UGT: | ||||||||||||||||
546 | return C.ugt(*Thr); | ||||||||||||||||
547 | case ICmpInst::Predicate::ICMP_UGE: | ||||||||||||||||
548 | return C.uge(*Thr); | ||||||||||||||||
549 | case ICmpInst::Predicate::ICMP_ULT: | ||||||||||||||||
550 | return C.ult(*Thr); | ||||||||||||||||
551 | case ICmpInst::Predicate::ICMP_ULE: | ||||||||||||||||
552 | return C.ule(*Thr); | ||||||||||||||||
553 | case ICmpInst::Predicate::ICMP_SGT: | ||||||||||||||||
554 | return C.sgt(*Thr); | ||||||||||||||||
555 | case ICmpInst::Predicate::ICMP_SGE: | ||||||||||||||||
556 | return C.sge(*Thr); | ||||||||||||||||
557 | case ICmpInst::Predicate::ICMP_SLT: | ||||||||||||||||
558 | return C.slt(*Thr); | ||||||||||||||||
559 | case ICmpInst::Predicate::ICMP_SLE: | ||||||||||||||||
560 | return C.sle(*Thr); | ||||||||||||||||
561 | default: | ||||||||||||||||
562 | llvm_unreachable("Unhandled ICmp predicate")::llvm::llvm_unreachable_internal("Unhandled ICmp predicate", "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/include/llvm/IR/PatternMatch.h" , 562); | ||||||||||||||||
563 | } | ||||||||||||||||
564 | } | ||||||||||||||||
565 | }; | ||||||||||||||||
566 | /// Match an integer or vector with every element comparing 'pred' (eg/ne/...) | ||||||||||||||||
567 | /// to Threshold. For vectors, this includes constants with undefined elements. | ||||||||||||||||
568 | inline cst_pred_ty<icmp_pred_with_threshold> | ||||||||||||||||
569 | m_SpecificInt_ICMP(ICmpInst::Predicate Predicate, const APInt &Threshold) { | ||||||||||||||||
570 | cst_pred_ty<icmp_pred_with_threshold> P; | ||||||||||||||||
571 | P.Pred = Predicate; | ||||||||||||||||
572 | P.Thr = &Threshold; | ||||||||||||||||
573 | return P; | ||||||||||||||||
574 | } | ||||||||||||||||
575 | |||||||||||||||||
576 | struct is_nan { | ||||||||||||||||
577 | bool isValue(const APFloat &C) { return C.isNaN(); } | ||||||||||||||||
578 | }; | ||||||||||||||||
579 | /// Match an arbitrary NaN constant. This includes quiet and signalling nans. | ||||||||||||||||
580 | /// For vectors, this includes constants with undefined elements. | ||||||||||||||||
581 | inline cstfp_pred_ty<is_nan> m_NaN() { | ||||||||||||||||
582 | return cstfp_pred_ty<is_nan>(); | ||||||||||||||||
583 | } | ||||||||||||||||
584 | |||||||||||||||||
585 | struct is_nonnan { | ||||||||||||||||
586 | bool isValue(const APFloat &C) { return !C.isNaN(); } | ||||||||||||||||
587 | }; | ||||||||||||||||
588 | /// Match a non-NaN FP constant. | ||||||||||||||||
589 | /// For vectors, this includes constants with undefined elements. | ||||||||||||||||
590 | inline cstfp_pred_ty<is_nonnan> m_NonNaN() { | ||||||||||||||||
591 | return cstfp_pred_ty<is_nonnan>(); | ||||||||||||||||
592 | } | ||||||||||||||||
593 | |||||||||||||||||
594 | struct is_inf { | ||||||||||||||||
595 | bool isValue(const APFloat &C) { return C.isInfinity(); } | ||||||||||||||||
596 | }; | ||||||||||||||||
597 | /// Match a positive or negative infinity FP constant. | ||||||||||||||||
598 | /// For vectors, this includes constants with undefined elements. | ||||||||||||||||
599 | inline cstfp_pred_ty<is_inf> m_Inf() { | ||||||||||||||||
600 | return cstfp_pred_ty<is_inf>(); | ||||||||||||||||
601 | } | ||||||||||||||||
602 | |||||||||||||||||
603 | struct is_noninf { | ||||||||||||||||
604 | bool isValue(const APFloat &C) { return !C.isInfinity(); } | ||||||||||||||||
605 | }; | ||||||||||||||||
606 | /// Match a non-infinity FP constant, i.e. finite or NaN. | ||||||||||||||||
607 | /// For vectors, this includes constants with undefined elements. | ||||||||||||||||
608 | inline cstfp_pred_ty<is_noninf> m_NonInf() { | ||||||||||||||||
609 | return cstfp_pred_ty<is_noninf>(); | ||||||||||||||||
610 | } | ||||||||||||||||
611 | |||||||||||||||||
612 | struct is_finite { | ||||||||||||||||
613 | bool isValue(const APFloat &C) { return C.isFinite(); } | ||||||||||||||||
614 | }; | ||||||||||||||||
615 | /// Match a finite FP constant, i.e. not infinity or NaN. | ||||||||||||||||
616 | /// For vectors, this includes constants with undefined elements. | ||||||||||||||||
617 | inline cstfp_pred_ty<is_finite> m_Finite() { | ||||||||||||||||
618 | return cstfp_pred_ty<is_finite>(); | ||||||||||||||||
619 | } | ||||||||||||||||
620 | inline apf_pred_ty<is_finite> m_Finite(const APFloat *&V) { return V; } | ||||||||||||||||
621 | |||||||||||||||||
622 | struct is_finitenonzero { | ||||||||||||||||
623 | bool isValue(const APFloat &C) { return C.isFiniteNonZero(); } | ||||||||||||||||
624 | }; | ||||||||||||||||
625 | /// Match a finite non-zero FP constant. | ||||||||||||||||
626 | /// For vectors, this includes constants with undefined elements. | ||||||||||||||||
627 | inline cstfp_pred_ty<is_finitenonzero> m_FiniteNonZero() { | ||||||||||||||||
628 | return cstfp_pred_ty<is_finitenonzero>(); | ||||||||||||||||
629 | } | ||||||||||||||||
630 | inline apf_pred_ty<is_finitenonzero> m_FiniteNonZero(const APFloat *&V) { | ||||||||||||||||
631 | return V; | ||||||||||||||||
632 | } | ||||||||||||||||
633 | |||||||||||||||||
634 | struct is_any_zero_fp { | ||||||||||||||||
635 | bool isValue(const APFloat &C) { return C.isZero(); } | ||||||||||||||||
636 | }; | ||||||||||||||||
637 | /// Match a floating-point negative zero or positive zero. | ||||||||||||||||
638 | /// For vectors, this includes constants with undefined elements. | ||||||||||||||||
639 | inline cstfp_pred_ty<is_any_zero_fp> m_AnyZeroFP() { | ||||||||||||||||
640 | return cstfp_pred_ty<is_any_zero_fp>(); | ||||||||||||||||
641 | } | ||||||||||||||||
642 | |||||||||||||||||
643 | struct is_pos_zero_fp { | ||||||||||||||||
644 | bool isValue(const APFloat &C) { return C.isPosZero(); } | ||||||||||||||||
645 | }; | ||||||||||||||||
646 | /// Match a floating-point positive zero. | ||||||||||||||||
647 | /// For vectors, this includes constants with undefined elements. | ||||||||||||||||
648 | inline cstfp_pred_ty<is_pos_zero_fp> m_PosZeroFP() { | ||||||||||||||||
649 | return cstfp_pred_ty<is_pos_zero_fp>(); | ||||||||||||||||
650 | } | ||||||||||||||||
651 | |||||||||||||||||
652 | struct is_neg_zero_fp { | ||||||||||||||||
653 | bool isValue(const APFloat &C) { return C.isNegZero(); } | ||||||||||||||||
654 | }; | ||||||||||||||||
655 | /// Match a floating-point negative zero. | ||||||||||||||||
656 | /// For vectors, this includes constants with undefined elements. | ||||||||||||||||
657 | inline cstfp_pred_ty<is_neg_zero_fp> m_NegZeroFP() { | ||||||||||||||||
658 | return cstfp_pred_ty<is_neg_zero_fp>(); | ||||||||||||||||
659 | } | ||||||||||||||||
660 | |||||||||||||||||
661 | struct is_non_zero_fp { | ||||||||||||||||
662 | bool isValue(const APFloat &C) { return C.isNonZero(); } | ||||||||||||||||
663 | }; | ||||||||||||||||
664 | /// Match a floating-point non-zero. | ||||||||||||||||
665 | /// For vectors, this includes constants with undefined elements. | ||||||||||||||||
666 | inline cstfp_pred_ty<is_non_zero_fp> m_NonZeroFP() { | ||||||||||||||||
667 | return cstfp_pred_ty<is_non_zero_fp>(); | ||||||||||||||||
668 | } | ||||||||||||||||
669 | |||||||||||||||||
670 | /////////////////////////////////////////////////////////////////////////////// | ||||||||||||||||
671 | |||||||||||||||||
672 | template <typename Class> struct bind_ty { | ||||||||||||||||
673 | Class *&VR; | ||||||||||||||||
674 | |||||||||||||||||
675 | bind_ty(Class *&V) : VR(V) {} | ||||||||||||||||
676 | |||||||||||||||||
677 | template <typename ITy> bool match(ITy *V) { | ||||||||||||||||
678 | if (auto *CV = dyn_cast<Class>(V)) { | ||||||||||||||||
679 | VR = CV; | ||||||||||||||||
680 | return true; | ||||||||||||||||
681 | } | ||||||||||||||||
682 | return false; | ||||||||||||||||
683 | } | ||||||||||||||||
684 | }; | ||||||||||||||||
685 | |||||||||||||||||
686 | /// Match a value, capturing it if we match. | ||||||||||||||||
687 | inline bind_ty<Value> m_Value(Value *&V) { return V; } | ||||||||||||||||
688 | inline bind_ty<const Value> m_Value(const Value *&V) { return V; } | ||||||||||||||||
689 | |||||||||||||||||
690 | /// Match an instruction, capturing it if we match. | ||||||||||||||||
691 | inline bind_ty<Instruction> m_Instruction(Instruction *&I) { return I; } | ||||||||||||||||
692 | /// Match a unary operator, capturing it if we match. | ||||||||||||||||
693 | inline bind_ty<UnaryOperator> m_UnOp(UnaryOperator *&I) { return I; } | ||||||||||||||||
694 | /// Match a binary operator, capturing it if we match. | ||||||||||||||||
695 | inline bind_ty<BinaryOperator> m_BinOp(BinaryOperator *&I) { return I; } | ||||||||||||||||
696 | /// Match a with overflow intrinsic, capturing it if we match. | ||||||||||||||||
697 | inline bind_ty<WithOverflowInst> m_WithOverflowInst(WithOverflowInst *&I) { return I; } | ||||||||||||||||
698 | |||||||||||||||||
699 | /// Match a ConstantInt, capturing the value if we match. | ||||||||||||||||
700 | inline bind_ty<ConstantInt> m_ConstantInt(ConstantInt *&CI) { return CI; } | ||||||||||||||||
701 | |||||||||||||||||
702 | /// Match a Constant, capturing the value if we match. | ||||||||||||||||
703 | inline bind_ty<Constant> m_Constant(Constant *&C) { return C; } | ||||||||||||||||
704 | |||||||||||||||||
705 | /// Match a ConstantFP, capturing the value if we match. | ||||||||||||||||
706 | inline bind_ty<ConstantFP> m_ConstantFP(ConstantFP *&C) { return C; } | ||||||||||||||||
707 | |||||||||||||||||
708 | /// Match a basic block value, capturing it if we match. | ||||||||||||||||
709 | inline bind_ty<BasicBlock> m_BasicBlock(BasicBlock *&V) { return V; } | ||||||||||||||||
710 | inline bind_ty<const BasicBlock> m_BasicBlock(const BasicBlock *&V) { | ||||||||||||||||
711 | return V; | ||||||||||||||||
712 | } | ||||||||||||||||
713 | |||||||||||||||||
714 | /// Match a specified Value*. | ||||||||||||||||
715 | struct specificval_ty { | ||||||||||||||||
716 | const Value *Val; | ||||||||||||||||
717 | |||||||||||||||||
718 | specificval_ty(const Value *V) : Val(V) {} | ||||||||||||||||
719 | |||||||||||||||||
720 | template <typename ITy> bool match(ITy *V) { return V == Val; } | ||||||||||||||||
721 | }; | ||||||||||||||||
722 | |||||||||||||||||
723 | /// Match if we have a specific specified value. | ||||||||||||||||
724 | inline specificval_ty m_Specific(const Value *V) { return V; } | ||||||||||||||||
725 | |||||||||||||||||
726 | /// Stores a reference to the Value *, not the Value * itself, | ||||||||||||||||
727 | /// thus can be used in commutative matchers. | ||||||||||||||||
728 | template <typename Class> struct deferredval_ty { | ||||||||||||||||
729 | Class *const &Val; | ||||||||||||||||
730 | |||||||||||||||||
731 | deferredval_ty(Class *const &V) : Val(V) {} | ||||||||||||||||
732 | |||||||||||||||||
733 | template <typename ITy> bool match(ITy *const V) { return V == Val; } | ||||||||||||||||
734 | }; | ||||||||||||||||
735 | |||||||||||||||||
736 | /// A commutative-friendly version of m_Specific(). | ||||||||||||||||
737 | inline deferredval_ty<Value> m_Deferred(Value *const &V) { return V; } | ||||||||||||||||
738 | inline deferredval_ty<const Value> m_Deferred(const Value *const &V) { | ||||||||||||||||
739 | return V; | ||||||||||||||||
740 | } | ||||||||||||||||
741 | |||||||||||||||||
742 | /// Match a specified floating point value or vector of all elements of | ||||||||||||||||
743 | /// that value. | ||||||||||||||||
744 | struct specific_fpval { | ||||||||||||||||
745 | double Val; | ||||||||||||||||
746 | |||||||||||||||||
747 | specific_fpval(double V) : Val(V) {} | ||||||||||||||||
748 | |||||||||||||||||
749 | template <typename ITy> bool match(ITy *V) { | ||||||||||||||||
750 | if (const auto *CFP = dyn_cast<ConstantFP>(V)) | ||||||||||||||||
751 | return CFP->isExactlyValue(Val); | ||||||||||||||||
752 | if (V->getType()->isVectorTy()) | ||||||||||||||||
753 | if (const auto *C = dyn_cast<Constant>(V)) | ||||||||||||||||
754 | if (auto *CFP = dyn_cast_or_null<ConstantFP>(C->getSplatValue())) | ||||||||||||||||
755 | return CFP->isExactlyValue(Val); | ||||||||||||||||
756 | return false; | ||||||||||||||||
757 | } | ||||||||||||||||
758 | }; | ||||||||||||||||
759 | |||||||||||||||||
760 | /// Match a specific floating point value or vector with all elements | ||||||||||||||||
761 | /// equal to the value. | ||||||||||||||||
762 | inline specific_fpval m_SpecificFP(double V) { return specific_fpval(V); } | ||||||||||||||||
763 | |||||||||||||||||
764 | /// Match a float 1.0 or vector with all elements equal to 1.0. | ||||||||||||||||
765 | inline specific_fpval m_FPOne() { return m_SpecificFP(1.0); } | ||||||||||||||||
766 | |||||||||||||||||
767 | struct bind_const_intval_ty { | ||||||||||||||||
768 | uint64_t &VR; | ||||||||||||||||
769 | |||||||||||||||||
770 | bind_const_intval_ty(uint64_t &V) : VR(V) {} | ||||||||||||||||
771 | |||||||||||||||||
772 | template <typename ITy> bool match(ITy *V) { | ||||||||||||||||
773 | if (const auto *CV = dyn_cast<ConstantInt>(V)) | ||||||||||||||||
774 | if (CV->getValue().ule(UINT64_MAX(18446744073709551615UL))) { | ||||||||||||||||
775 | VR = CV->getZExtValue(); | ||||||||||||||||
776 | return true; | ||||||||||||||||
777 | } | ||||||||||||||||
778 | return false; | ||||||||||||||||
779 | } | ||||||||||||||||
780 | }; | ||||||||||||||||
781 | |||||||||||||||||
782 | /// Match a specified integer value or vector of all elements of that | ||||||||||||||||
783 | /// value. | ||||||||||||||||
784 | template <bool AllowUndefs> | ||||||||||||||||
785 | struct specific_intval { | ||||||||||||||||
786 | APInt Val; | ||||||||||||||||
787 | |||||||||||||||||
788 | specific_intval(APInt V) : Val(std::move(V)) {} | ||||||||||||||||
789 | |||||||||||||||||
790 | template <typename ITy> bool match(ITy *V) { | ||||||||||||||||
791 | const auto *CI = dyn_cast<ConstantInt>(V); | ||||||||||||||||
792 | if (!CI && V->getType()->isVectorTy()) | ||||||||||||||||
793 | if (const auto *C = dyn_cast<Constant>(V)) | ||||||||||||||||
794 | CI = dyn_cast_or_null<ConstantInt>(C->getSplatValue(AllowUndefs)); | ||||||||||||||||
795 | |||||||||||||||||
796 | return CI && APInt::isSameValue(CI->getValue(), Val); | ||||||||||||||||
797 | } | ||||||||||||||||
798 | }; | ||||||||||||||||
799 | |||||||||||||||||
800 | /// Match a specific integer value or vector with all elements equal to | ||||||||||||||||
801 | /// the value. | ||||||||||||||||
802 | inline specific_intval<false> m_SpecificInt(APInt V) { | ||||||||||||||||
803 | return specific_intval<false>(std::move(V)); | ||||||||||||||||
804 | } | ||||||||||||||||
805 | |||||||||||||||||
806 | inline specific_intval<false> m_SpecificInt(uint64_t V) { | ||||||||||||||||
807 | return m_SpecificInt(APInt(64, V)); | ||||||||||||||||
808 | } | ||||||||||||||||
809 | |||||||||||||||||
810 | inline specific_intval<true> m_SpecificIntAllowUndef(APInt V) { | ||||||||||||||||
811 | return specific_intval<true>(std::move(V)); | ||||||||||||||||
812 | } | ||||||||||||||||
813 | |||||||||||||||||
814 | inline specific_intval<true> m_SpecificIntAllowUndef(uint64_t V) { | ||||||||||||||||
815 | return m_SpecificIntAllowUndef(APInt(64, V)); | ||||||||||||||||
816 | } | ||||||||||||||||
817 | |||||||||||||||||
818 | /// Match a ConstantInt and bind to its value. This does not match | ||||||||||||||||
819 | /// ConstantInts wider than 64-bits. | ||||||||||||||||
820 | inline bind_const_intval_ty m_ConstantInt(uint64_t &V) { return V; } | ||||||||||||||||
821 | |||||||||||||||||
822 | /// Match a specified basic block value. | ||||||||||||||||
823 | struct specific_bbval { | ||||||||||||||||
824 | BasicBlock *Val; | ||||||||||||||||
825 | |||||||||||||||||
826 | specific_bbval(BasicBlock *Val) : Val(Val) {} | ||||||||||||||||
827 | |||||||||||||||||
828 | template <typename ITy> bool match(ITy *V) { | ||||||||||||||||
829 | const auto *BB = dyn_cast<BasicBlock>(V); | ||||||||||||||||
830 | return BB && BB == Val; | ||||||||||||||||
831 | } | ||||||||||||||||
832 | }; | ||||||||||||||||
833 | |||||||||||||||||
834 | /// Match a specific basic block value. | ||||||||||||||||
835 | inline specific_bbval m_SpecificBB(BasicBlock *BB) { | ||||||||||||||||
836 | return specific_bbval(BB); | ||||||||||||||||
837 | } | ||||||||||||||||
838 | |||||||||||||||||
839 | /// A commutative-friendly version of m_Specific(). | ||||||||||||||||
840 | inline deferredval_ty<BasicBlock> m_Deferred(BasicBlock *const &BB) { | ||||||||||||||||
841 | return BB; | ||||||||||||||||
842 | } | ||||||||||||||||
843 | inline deferredval_ty<const BasicBlock> | ||||||||||||||||
844 | m_Deferred(const BasicBlock *const &BB) { | ||||||||||||||||
845 | return BB; | ||||||||||||||||
846 | } | ||||||||||||||||
847 | |||||||||||||||||
848 | //===----------------------------------------------------------------------===// | ||||||||||||||||
849 | // Matcher for any binary operator. | ||||||||||||||||
850 | // | ||||||||||||||||
851 | template <typename LHS_t, typename RHS_t, bool Commutable = false> | ||||||||||||||||
852 | struct AnyBinaryOp_match { | ||||||||||||||||
853 | LHS_t L; | ||||||||||||||||
854 | RHS_t R; | ||||||||||||||||
855 | |||||||||||||||||
856 | // The evaluation order is always stable, regardless of Commutability. | ||||||||||||||||
857 | // The LHS is always matched first. | ||||||||||||||||
858 | AnyBinaryOp_match(const LHS_t &LHS, const RHS_t &RHS) : L(LHS), R(RHS) {} | ||||||||||||||||
859 | |||||||||||||||||
860 | template <typename OpTy> bool match(OpTy *V) { | ||||||||||||||||
861 | if (auto *I = dyn_cast<BinaryOperator>(V)) | ||||||||||||||||
862 | return (L.match(I->getOperand(0)) && R.match(I->getOperand(1))) || | ||||||||||||||||
863 | (Commutable && L.match(I->getOperand(1)) && | ||||||||||||||||
864 | R.match(I->getOperand(0))); | ||||||||||||||||
865 | return false; | ||||||||||||||||
866 | } | ||||||||||||||||
867 | }; | ||||||||||||||||
868 | |||||||||||||||||
869 | template <typename LHS, typename RHS> | ||||||||||||||||
870 | inline AnyBinaryOp_match<LHS, RHS> m_BinOp(const LHS &L, const RHS &R) { | ||||||||||||||||
871 | return AnyBinaryOp_match<LHS, RHS>(L, R); | ||||||||||||||||
872 | } | ||||||||||||||||
873 | |||||||||||||||||
874 | //===----------------------------------------------------------------------===// | ||||||||||||||||
875 | // Matcher for any unary operator. | ||||||||||||||||
876 | // TODO fuse unary, binary matcher into n-ary matcher | ||||||||||||||||
877 | // | ||||||||||||||||
878 | template <typename OP_t> struct AnyUnaryOp_match { | ||||||||||||||||
879 | OP_t X; | ||||||||||||||||
880 | |||||||||||||||||
881 | AnyUnaryOp_match(const OP_t &X) : X(X) {} | ||||||||||||||||
882 | |||||||||||||||||
883 | template <typename OpTy> bool match(OpTy *V) { | ||||||||||||||||
884 | if (auto *I = dyn_cast<UnaryOperator>(V)) | ||||||||||||||||
885 | return X.match(I->getOperand(0)); | ||||||||||||||||
886 | return false; | ||||||||||||||||
887 | } | ||||||||||||||||
888 | }; | ||||||||||||||||
889 | |||||||||||||||||
890 | template <typename OP_t> inline AnyUnaryOp_match<OP_t> m_UnOp(const OP_t &X) { | ||||||||||||||||
891 | return AnyUnaryOp_match<OP_t>(X); | ||||||||||||||||
892 | } | ||||||||||||||||
893 | |||||||||||||||||
894 | //===----------------------------------------------------------------------===// | ||||||||||||||||
895 | // Matchers for specific binary operators. | ||||||||||||||||
896 | // | ||||||||||||||||
897 | |||||||||||||||||
898 | template <typename LHS_t, typename RHS_t, unsigned Opcode, | ||||||||||||||||
899 | bool Commutable = false> | ||||||||||||||||
900 | struct BinaryOp_match { | ||||||||||||||||
901 | LHS_t L; | ||||||||||||||||
902 | RHS_t R; | ||||||||||||||||
903 | |||||||||||||||||
904 | // The evaluation order is always stable, regardless of Commutability. | ||||||||||||||||
905 | // The LHS is always matched first. | ||||||||||||||||
906 | BinaryOp_match(const LHS_t &LHS, const RHS_t &RHS) : L(LHS), R(RHS) {} | ||||||||||||||||
907 | |||||||||||||||||
908 | template <typename OpTy> bool match(OpTy *V) { | ||||||||||||||||
909 | if (V->getValueID() == Value::InstructionVal + Opcode) { | ||||||||||||||||
910 | auto *I = cast<BinaryOperator>(V); | ||||||||||||||||
911 | return (L.match(I->getOperand(0)) && R.match(I->getOperand(1))) || | ||||||||||||||||
912 | (Commutable && L.match(I->getOperand(1)) && | ||||||||||||||||
913 | R.match(I->getOperand(0))); | ||||||||||||||||
914 | } | ||||||||||||||||
915 | if (auto *CE = dyn_cast<ConstantExpr>(V)) | ||||||||||||||||
916 | return CE->getOpcode() == Opcode && | ||||||||||||||||
917 | ((L.match(CE->getOperand(0)) && R.match(CE->getOperand(1))) || | ||||||||||||||||
918 | (Commutable && L.match(CE->getOperand(1)) && | ||||||||||||||||
919 | R.match(CE->getOperand(0)))); | ||||||||||||||||
920 | return false; | ||||||||||||||||
921 | } | ||||||||||||||||
922 | }; | ||||||||||||||||
923 | |||||||||||||||||
924 | template <typename LHS, typename RHS> | ||||||||||||||||
925 | inline BinaryOp_match<LHS, RHS, Instruction::Add> m_Add(const LHS &L, | ||||||||||||||||
926 | const RHS &R) { | ||||||||||||||||
927 | return BinaryOp_match<LHS, RHS, Instruction::Add>(L, R); | ||||||||||||||||
928 | } | ||||||||||||||||
929 | |||||||||||||||||
930 | template <typename LHS, typename RHS> | ||||||||||||||||
931 | inline BinaryOp_match<LHS, RHS, Instruction::FAdd> m_FAdd(const LHS &L, | ||||||||||||||||
932 | const RHS &R) { | ||||||||||||||||
933 | return BinaryOp_match<LHS, RHS, Instruction::FAdd>(L, R); | ||||||||||||||||
934 | } | ||||||||||||||||
935 | |||||||||||||||||
936 | template <typename LHS, typename RHS> | ||||||||||||||||
937 | inline BinaryOp_match<LHS, RHS, Instruction::Sub> m_Sub(const LHS &L, | ||||||||||||||||
938 | const RHS &R) { | ||||||||||||||||
939 | return BinaryOp_match<LHS, RHS, Instruction::Sub>(L, R); | ||||||||||||||||
940 | } | ||||||||||||||||
941 | |||||||||||||||||
942 | template <typename LHS, typename RHS> | ||||||||||||||||
943 | inline BinaryOp_match<LHS, RHS, Instruction::FSub> m_FSub(const LHS &L, | ||||||||||||||||
944 | const RHS &R) { | ||||||||||||||||
945 | return BinaryOp_match<LHS, RHS, Instruction::FSub>(L, R); | ||||||||||||||||
946 | } | ||||||||||||||||
947 | |||||||||||||||||
948 | template <typename Op_t> struct FNeg_match { | ||||||||||||||||
949 | Op_t X; | ||||||||||||||||
950 | |||||||||||||||||
951 | FNeg_match(const Op_t &Op) : X(Op) {} | ||||||||||||||||
952 | template <typename OpTy> bool match(OpTy *V) { | ||||||||||||||||
953 | auto *FPMO = dyn_cast<FPMathOperator>(V); | ||||||||||||||||
954 | if (!FPMO) return false; | ||||||||||||||||
955 | |||||||||||||||||
956 | if (FPMO->getOpcode() == Instruction::FNeg) | ||||||||||||||||
957 | return X.match(FPMO->getOperand(0)); | ||||||||||||||||
958 | |||||||||||||||||
959 | if (FPMO->getOpcode() == Instruction::FSub) { | ||||||||||||||||
960 | if (FPMO->hasNoSignedZeros()) { | ||||||||||||||||
961 | // With 'nsz', any zero goes. | ||||||||||||||||
962 | if (!cstfp_pred_ty<is_any_zero_fp>().match(FPMO->getOperand(0))) | ||||||||||||||||
963 | return false; | ||||||||||||||||
964 | } else { | ||||||||||||||||
965 | // Without 'nsz', we need fsub -0.0, X exactly. | ||||||||||||||||
966 | if (!cstfp_pred_ty<is_neg_zero_fp>().match(FPMO->getOperand(0))) | ||||||||||||||||
967 | return false; | ||||||||||||||||
968 | } | ||||||||||||||||
969 | |||||||||||||||||
970 | return X.match(FPMO->getOperand(1)); | ||||||||||||||||
971 | } | ||||||||||||||||
972 | |||||||||||||||||
973 | return false; | ||||||||||||||||
974 | } | ||||||||||||||||
975 | }; | ||||||||||||||||
976 | |||||||||||||||||
977 | /// Match 'fneg X' as 'fsub -0.0, X'. | ||||||||||||||||
978 | template <typename OpTy> | ||||||||||||||||
979 | inline FNeg_match<OpTy> | ||||||||||||||||
980 | m_FNeg(const OpTy &X) { | ||||||||||||||||
981 | return FNeg_match<OpTy>(X); | ||||||||||||||||
982 | } | ||||||||||||||||
983 | |||||||||||||||||
984 | /// Match 'fneg X' as 'fsub +-0.0, X'. | ||||||||||||||||
985 | template <typename RHS> | ||||||||||||||||
986 | inline BinaryOp_match<cstfp_pred_ty<is_any_zero_fp>, RHS, Instruction::FSub> | ||||||||||||||||
987 | m_FNegNSZ(const RHS &X) { | ||||||||||||||||
988 | return m_FSub(m_AnyZeroFP(), X); | ||||||||||||||||
989 | } | ||||||||||||||||
990 | |||||||||||||||||
991 | template <typename LHS, typename RHS> | ||||||||||||||||
992 | inline BinaryOp_match<LHS, RHS, Instruction::Mul> m_Mul(const LHS &L, | ||||||||||||||||
993 | const RHS &R) { | ||||||||||||||||
994 | return BinaryOp_match<LHS, RHS, Instruction::Mul>(L, R); | ||||||||||||||||
995 | } | ||||||||||||||||
996 | |||||||||||||||||
997 | template <typename LHS, typename RHS> | ||||||||||||||||
998 | inline BinaryOp_match<LHS, RHS, Instruction::FMul> m_FMul(const LHS &L, | ||||||||||||||||
999 | const RHS &R) { | ||||||||||||||||
1000 | return BinaryOp_match<LHS, RHS, Instruction::FMul>(L, R); | ||||||||||||||||
1001 | } | ||||||||||||||||
1002 | |||||||||||||||||
1003 | template <typename LHS, typename RHS> | ||||||||||||||||
1004 | inline BinaryOp_match<LHS, RHS, Instruction::UDiv> m_UDiv(const LHS &L, | ||||||||||||||||
1005 | const RHS &R) { | ||||||||||||||||
1006 | return BinaryOp_match<LHS, RHS, Instruction::UDiv>(L, R); | ||||||||||||||||
1007 | } | ||||||||||||||||
1008 | |||||||||||||||||
1009 | template <typename LHS, typename RHS> | ||||||||||||||||
1010 | inline BinaryOp_match<LHS, RHS, Instruction::SDiv> m_SDiv(const LHS &L, | ||||||||||||||||
1011 | const RHS &R) { | ||||||||||||||||
1012 | return BinaryOp_match<LHS, RHS, Instruction::SDiv>(L, R); | ||||||||||||||||
1013 | } | ||||||||||||||||
1014 | |||||||||||||||||
1015 | template <typename LHS, typename RHS> | ||||||||||||||||
1016 | inline BinaryOp_match<LHS, RHS, Instruction::FDiv> m_FDiv(const LHS &L, | ||||||||||||||||
1017 | const RHS &R) { | ||||||||||||||||
1018 | return BinaryOp_match<LHS, RHS, Instruction::FDiv>(L, R); | ||||||||||||||||
1019 | } | ||||||||||||||||
1020 | |||||||||||||||||
1021 | template <typename LHS, typename RHS> | ||||||||||||||||
1022 | inline BinaryOp_match<LHS, RHS, Instruction::URem> m_URem(const LHS &L, | ||||||||||||||||
1023 | const RHS &R) { | ||||||||||||||||
1024 | return BinaryOp_match<LHS, RHS, Instruction::URem>(L, R); | ||||||||||||||||
1025 | } | ||||||||||||||||
1026 | |||||||||||||||||
1027 | template <typename LHS, typename RHS> | ||||||||||||||||
1028 | inline BinaryOp_match<LHS, RHS, Instruction::SRem> m_SRem(const LHS &L, | ||||||||||||||||
1029 | const RHS &R) { | ||||||||||||||||
1030 | return BinaryOp_match<LHS, RHS, Instruction::SRem>(L, R); | ||||||||||||||||
1031 | } | ||||||||||||||||
1032 | |||||||||||||||||
1033 | template <typename LHS, typename RHS> | ||||||||||||||||
1034 | inline BinaryOp_match<LHS, RHS, Instruction::FRem> m_FRem(const LHS &L, | ||||||||||||||||
1035 | const RHS &R) { | ||||||||||||||||
1036 | return BinaryOp_match<LHS, RHS, Instruction::FRem>(L, R); | ||||||||||||||||
1037 | } | ||||||||||||||||
1038 | |||||||||||||||||
1039 | template <typename LHS, typename RHS> | ||||||||||||||||
1040 | inline BinaryOp_match<LHS, RHS, Instruction::And> m_And(const LHS &L, | ||||||||||||||||
1041 | const RHS &R) { | ||||||||||||||||
1042 | return BinaryOp_match<LHS, RHS, Instruction::And>(L, R); | ||||||||||||||||
1043 | } | ||||||||||||||||
1044 | |||||||||||||||||
1045 | template <typename LHS, typename RHS> | ||||||||||||||||
1046 | inline BinaryOp_match<LHS, RHS, Instruction::Or> m_Or(const LHS &L, | ||||||||||||||||
1047 | const RHS &R) { | ||||||||||||||||
1048 | return BinaryOp_match<LHS, RHS, Instruction::Or>(L, R); | ||||||||||||||||
1049 | } | ||||||||||||||||
1050 | |||||||||||||||||
1051 | template <typename LHS, typename RHS> | ||||||||||||||||
1052 | inline BinaryOp_match<LHS, RHS, Instruction::Xor> m_Xor(const LHS &L, | ||||||||||||||||
1053 | const RHS &R) { | ||||||||||||||||
1054 | return BinaryOp_match<LHS, RHS, Instruction::Xor>(L, R); | ||||||||||||||||
1055 | } | ||||||||||||||||
1056 | |||||||||||||||||
1057 | template <typename LHS, typename RHS> | ||||||||||||||||
1058 | inline BinaryOp_match<LHS, RHS, Instruction::Shl> m_Shl(const LHS &L, | ||||||||||||||||
1059 | const RHS &R) { | ||||||||||||||||
1060 | return BinaryOp_match<LHS, RHS, Instruction::Shl>(L, R); | ||||||||||||||||
1061 | } | ||||||||||||||||
1062 | |||||||||||||||||
1063 | template <typename LHS, typename RHS> | ||||||||||||||||
1064 | inline BinaryOp_match<LHS, RHS, Instruction::LShr> m_LShr(const LHS &L, | ||||||||||||||||
1065 | const RHS &R) { | ||||||||||||||||
1066 | return BinaryOp_match<LHS, RHS, Instruction::LShr>(L, R); | ||||||||||||||||
1067 | } | ||||||||||||||||
1068 | |||||||||||||||||
1069 | template <typename LHS, typename RHS> | ||||||||||||||||
1070 | inline BinaryOp_match<LHS, RHS, Instruction::AShr> m_AShr(const LHS &L, | ||||||||||||||||
1071 | const RHS &R) { | ||||||||||||||||
1072 | return BinaryOp_match<LHS, RHS, Instruction::AShr>(L, R); | ||||||||||||||||
1073 | } | ||||||||||||||||
1074 | |||||||||||||||||
1075 | template <typename LHS_t, typename RHS_t, unsigned Opcode, | ||||||||||||||||
1076 | unsigned WrapFlags = 0> | ||||||||||||||||
1077 | struct OverflowingBinaryOp_match { | ||||||||||||||||
1078 | LHS_t L; | ||||||||||||||||
1079 | RHS_t R; | ||||||||||||||||
1080 | |||||||||||||||||
1081 | OverflowingBinaryOp_match(const LHS_t &LHS, const RHS_t &RHS) | ||||||||||||||||
1082 | : L(LHS), R(RHS) {} | ||||||||||||||||
1083 | |||||||||||||||||
1084 | template <typename OpTy> bool match(OpTy *V) { | ||||||||||||||||
1085 | if (auto *Op = dyn_cast<OverflowingBinaryOperator>(V)) { | ||||||||||||||||
1086 | if (Op->getOpcode() != Opcode) | ||||||||||||||||
1087 | return false; | ||||||||||||||||
1088 | if (WrapFlags & OverflowingBinaryOperator::NoUnsignedWrap && | ||||||||||||||||
1089 | !Op->hasNoUnsignedWrap()) | ||||||||||||||||
1090 | return false; | ||||||||||||||||
1091 | if (WrapFlags & OverflowingBinaryOperator::NoSignedWrap && | ||||||||||||||||
1092 | !Op->hasNoSignedWrap()) | ||||||||||||||||
1093 | return false; | ||||||||||||||||
1094 | return L.match(Op->getOperand(0)) && R.match(Op->getOperand(1)); | ||||||||||||||||
1095 | } | ||||||||||||||||
1096 | return false; | ||||||||||||||||
1097 | } | ||||||||||||||||
1098 | }; | ||||||||||||||||
1099 | |||||||||||||||||
1100 | template <typename LHS, typename RHS> | ||||||||||||||||
1101 | inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Add, | ||||||||||||||||
1102 | OverflowingBinaryOperator::NoSignedWrap> | ||||||||||||||||
1103 | m_NSWAdd(const LHS &L, const RHS &R) { | ||||||||||||||||
1104 | return OverflowingBinaryOp_match<LHS, RHS, Instruction::Add, | ||||||||||||||||
1105 | OverflowingBinaryOperator::NoSignedWrap>( | ||||||||||||||||
1106 | L, R); | ||||||||||||||||
1107 | } | ||||||||||||||||
1108 | template <typename LHS, typename RHS> | ||||||||||||||||
1109 | inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Sub, | ||||||||||||||||
1110 | OverflowingBinaryOperator::NoSignedWrap> | ||||||||||||||||
1111 | m_NSWSub(const LHS &L, const RHS &R) { | ||||||||||||||||
1112 | return OverflowingBinaryOp_match<LHS, RHS, Instruction::Sub, | ||||||||||||||||
1113 | OverflowingBinaryOperator::NoSignedWrap>( | ||||||||||||||||
1114 | L, R); | ||||||||||||||||
1115 | } | ||||||||||||||||
1116 | template <typename LHS, typename RHS> | ||||||||||||||||
1117 | inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Mul, | ||||||||||||||||
1118 | OverflowingBinaryOperator::NoSignedWrap> | ||||||||||||||||
1119 | m_NSWMul(const LHS &L, const RHS &R) { | ||||||||||||||||
1120 | return OverflowingBinaryOp_match<LHS, RHS, Instruction::Mul, | ||||||||||||||||
1121 | OverflowingBinaryOperator::NoSignedWrap>( | ||||||||||||||||
1122 | L, R); | ||||||||||||||||
1123 | } | ||||||||||||||||
1124 | template <typename LHS, typename RHS> | ||||||||||||||||
1125 | inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Shl, | ||||||||||||||||
1126 | OverflowingBinaryOperator::NoSignedWrap> | ||||||||||||||||
1127 | m_NSWShl(const LHS &L, const RHS &R) { | ||||||||||||||||
1128 | return OverflowingBinaryOp_match<LHS, RHS, Instruction::Shl, | ||||||||||||||||
1129 | OverflowingBinaryOperator::NoSignedWrap>( | ||||||||||||||||
1130 | L, R); | ||||||||||||||||
1131 | } | ||||||||||||||||
1132 | |||||||||||||||||
1133 | template <typename LHS, typename RHS> | ||||||||||||||||
1134 | inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Add, | ||||||||||||||||
1135 | OverflowingBinaryOperator::NoUnsignedWrap> | ||||||||||||||||
1136 | m_NUWAdd(const LHS &L, const RHS &R) { | ||||||||||||||||
1137 | return OverflowingBinaryOp_match<LHS, RHS, Instruction::Add, | ||||||||||||||||
1138 | OverflowingBinaryOperator::NoUnsignedWrap>( | ||||||||||||||||
1139 | L, R); | ||||||||||||||||
1140 | } | ||||||||||||||||
1141 | template <typename LHS, typename RHS> | ||||||||||||||||
1142 | inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Sub, | ||||||||||||||||
1143 | OverflowingBinaryOperator::NoUnsignedWrap> | ||||||||||||||||
1144 | m_NUWSub(const LHS &L, const RHS &R) { | ||||||||||||||||
1145 | return OverflowingBinaryOp_match<LHS, RHS, Instruction::Sub, | ||||||||||||||||
1146 | OverflowingBinaryOperator::NoUnsignedWrap>( | ||||||||||||||||
1147 | L, R); | ||||||||||||||||
1148 | } | ||||||||||||||||
1149 | template <typename LHS, typename RHS> | ||||||||||||||||
1150 | inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Mul, | ||||||||||||||||
1151 | OverflowingBinaryOperator::NoUnsignedWrap> | ||||||||||||||||
1152 | m_NUWMul(const LHS &L, const RHS &R) { | ||||||||||||||||
1153 | return OverflowingBinaryOp_match<LHS, RHS, Instruction::Mul, | ||||||||||||||||
1154 | OverflowingBinaryOperator::NoUnsignedWrap>( | ||||||||||||||||
1155 | L, R); | ||||||||||||||||
1156 | } | ||||||||||||||||
1157 | template <typename LHS, typename RHS> | ||||||||||||||||
1158 | inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Shl, | ||||||||||||||||
1159 | OverflowingBinaryOperator::NoUnsignedWrap> | ||||||||||||||||
1160 | m_NUWShl(const LHS &L, const RHS &R) { | ||||||||||||||||
1161 | return OverflowingBinaryOp_match<LHS, RHS, Instruction::Shl, | ||||||||||||||||
1162 | OverflowingBinaryOperator::NoUnsignedWrap>( | ||||||||||||||||
1163 | L, R); | ||||||||||||||||
1164 | } | ||||||||||||||||
1165 | |||||||||||||||||
1166 | //===----------------------------------------------------------------------===// | ||||||||||||||||
1167 | // Class that matches a group of binary opcodes. | ||||||||||||||||
1168 | // | ||||||||||||||||
1169 | template <typename LHS_t, typename RHS_t, typename Predicate> | ||||||||||||||||
1170 | struct BinOpPred_match : Predicate { | ||||||||||||||||
1171 | LHS_t L; | ||||||||||||||||
1172 | RHS_t R; | ||||||||||||||||
1173 | |||||||||||||||||
1174 | BinOpPred_match(const LHS_t &LHS, const RHS_t &RHS) : L(LHS), R(RHS) {} | ||||||||||||||||
1175 | |||||||||||||||||
1176 | template <typename OpTy> bool match(OpTy *V) { | ||||||||||||||||
1177 | if (auto *I = dyn_cast<Instruction>(V)) | ||||||||||||||||
1178 | return this->isOpType(I->getOpcode()) && L.match(I->getOperand(0)) && | ||||||||||||||||
1179 | R.match(I->getOperand(1)); | ||||||||||||||||
1180 | if (auto *CE = dyn_cast<ConstantExpr>(V)) | ||||||||||||||||
1181 | return this->isOpType(CE->getOpcode()) && L.match(CE->getOperand(0)) && | ||||||||||||||||
1182 | R.match(CE->getOperand(1)); | ||||||||||||||||
1183 | return false; | ||||||||||||||||
1184 | } | ||||||||||||||||
1185 | }; | ||||||||||||||||
1186 | |||||||||||||||||
1187 | struct is_shift_op { | ||||||||||||||||
1188 | bool isOpType(unsigned Opcode) { return Instruction::isShift(Opcode); } | ||||||||||||||||
1189 | }; | ||||||||||||||||
1190 | |||||||||||||||||
1191 | struct is_right_shift_op { | ||||||||||||||||
1192 | bool isOpType(unsigned Opcode) { | ||||||||||||||||
1193 | return Opcode == Instruction::LShr || Opcode == Instruction::AShr; | ||||||||||||||||
1194 | } | ||||||||||||||||
1195 | }; | ||||||||||||||||
1196 | |||||||||||||||||
1197 | struct is_logical_shift_op { | ||||||||||||||||
1198 | bool isOpType(unsigned Opcode) { | ||||||||||||||||
1199 | return Opcode == Instruction::LShr || Opcode == Instruction::Shl; | ||||||||||||||||
1200 | } | ||||||||||||||||
1201 | }; | ||||||||||||||||
1202 | |||||||||||||||||
1203 | struct is_bitwiselogic_op { | ||||||||||||||||
1204 | bool isOpType(unsigned Opcode) { | ||||||||||||||||
1205 | return Instruction::isBitwiseLogicOp(Opcode); | ||||||||||||||||
1206 | } | ||||||||||||||||
1207 | }; | ||||||||||||||||
1208 | |||||||||||||||||
1209 | struct is_idiv_op { | ||||||||||||||||
1210 | bool isOpType(unsigned Opcode) { | ||||||||||||||||
1211 | return Opcode == Instruction::SDiv || Opcode == Instruction::UDiv; | ||||||||||||||||
1212 | } | ||||||||||||||||
1213 | }; | ||||||||||||||||
1214 | |||||||||||||||||
1215 | struct is_irem_op { | ||||||||||||||||
1216 | bool isOpType(unsigned Opcode) { | ||||||||||||||||
1217 | return Opcode == Instruction::SRem || Opcode == Instruction::URem; | ||||||||||||||||
1218 | } | ||||||||||||||||
1219 | }; | ||||||||||||||||
1220 | |||||||||||||||||
1221 | /// Matches shift operations. | ||||||||||||||||
1222 | template <typename LHS, typename RHS> | ||||||||||||||||
1223 | inline BinOpPred_match<LHS, RHS, is_shift_op> m_Shift(const LHS &L, | ||||||||||||||||
1224 | const RHS &R) { | ||||||||||||||||
1225 | return BinOpPred_match<LHS, RHS, is_shift_op>(L, R); | ||||||||||||||||
1226 | } | ||||||||||||||||
1227 | |||||||||||||||||
1228 | /// Matches logical shift operations. | ||||||||||||||||
1229 | template <typename LHS, typename RHS> | ||||||||||||||||
1230 | inline BinOpPred_match<LHS, RHS, is_right_shift_op> m_Shr(const LHS &L, | ||||||||||||||||
1231 | const RHS &R) { | ||||||||||||||||
1232 | return BinOpPred_match<LHS, RHS, is_right_shift_op>(L, R); | ||||||||||||||||
1233 | } | ||||||||||||||||
1234 | |||||||||||||||||
1235 | /// Matches logical shift operations. | ||||||||||||||||
1236 | template <typename LHS, typename RHS> | ||||||||||||||||
1237 | inline BinOpPred_match<LHS, RHS, is_logical_shift_op> | ||||||||||||||||
1238 | m_LogicalShift(const LHS &L, const RHS &R) { | ||||||||||||||||
1239 | return BinOpPred_match<LHS, RHS, is_logical_shift_op>(L, R); | ||||||||||||||||
1240 | } | ||||||||||||||||
1241 | |||||||||||||||||
1242 | /// Matches bitwise logic operations. | ||||||||||||||||
1243 | template <typename LHS, typename RHS> | ||||||||||||||||
1244 | inline BinOpPred_match<LHS, RHS, is_bitwiselogic_op> | ||||||||||||||||
1245 | m_BitwiseLogic(const LHS &L, const RHS &R) { | ||||||||||||||||
1246 | return BinOpPred_match<LHS, RHS, is_bitwiselogic_op>(L, R); | ||||||||||||||||
1247 | } | ||||||||||||||||
1248 | |||||||||||||||||
1249 | /// Matches integer division operations. | ||||||||||||||||
1250 | template <typename LHS, typename RHS> | ||||||||||||||||
1251 | inline BinOpPred_match<LHS, RHS, is_idiv_op> m_IDiv(const LHS &L, | ||||||||||||||||
1252 | const RHS &R) { | ||||||||||||||||
1253 | return BinOpPred_match<LHS, RHS, is_idiv_op>(L, R); | ||||||||||||||||
1254 | } | ||||||||||||||||
1255 | |||||||||||||||||
1256 | /// Matches integer remainder operations. | ||||||||||||||||
1257 | template <typename LHS, typename RHS> | ||||||||||||||||
1258 | inline BinOpPred_match<LHS, RHS, is_irem_op> m_IRem(const LHS &L, | ||||||||||||||||
1259 | const RHS &R) { | ||||||||||||||||
1260 | return BinOpPred_match<LHS, RHS, is_irem_op>(L, R); | ||||||||||||||||
1261 | } | ||||||||||||||||
1262 | |||||||||||||||||
1263 | //===----------------------------------------------------------------------===// | ||||||||||||||||
1264 | // Class that matches exact binary ops. | ||||||||||||||||
1265 | // | ||||||||||||||||
1266 | template <typename SubPattern_t> struct Exact_match { | ||||||||||||||||
1267 | SubPattern_t SubPattern; | ||||||||||||||||
1268 | |||||||||||||||||
1269 | Exact_match(const SubPattern_t &SP) : SubPattern(SP) {} | ||||||||||||||||
1270 | |||||||||||||||||
1271 | template <typename OpTy> bool match(OpTy *V) { | ||||||||||||||||
1272 | if (auto *PEO = dyn_cast<PossiblyExactOperator>(V)) | ||||||||||||||||
1273 | return PEO->isExact() && SubPattern.match(V); | ||||||||||||||||
1274 | return false; | ||||||||||||||||
1275 | } | ||||||||||||||||
1276 | }; | ||||||||||||||||
1277 | |||||||||||||||||
1278 | template <typename T> inline Exact_match<T> m_Exact(const T &SubPattern) { | ||||||||||||||||
1279 | return SubPattern; | ||||||||||||||||
1280 | } | ||||||||||||||||
1281 | |||||||||||||||||
1282 | //===----------------------------------------------------------------------===// | ||||||||||||||||
1283 | // Matchers for CmpInst classes | ||||||||||||||||
1284 | // | ||||||||||||||||
1285 | |||||||||||||||||
1286 | template <typename LHS_t, typename RHS_t, typename Class, typename PredicateTy, | ||||||||||||||||
1287 | bool Commutable = false> | ||||||||||||||||
1288 | struct CmpClass_match { | ||||||||||||||||
1289 | PredicateTy &Predicate; | ||||||||||||||||
1290 | LHS_t L; | ||||||||||||||||
1291 | RHS_t R; | ||||||||||||||||
1292 | |||||||||||||||||
1293 | // The evaluation order is always stable, regardless of Commutability. | ||||||||||||||||
1294 | // The LHS is always matched first. | ||||||||||||||||
1295 | CmpClass_match(PredicateTy &Pred, const LHS_t &LHS, const RHS_t &RHS) | ||||||||||||||||
1296 | : Predicate(Pred), L(LHS), R(RHS) {} | ||||||||||||||||
1297 | |||||||||||||||||
1298 | template <typename OpTy> bool match(OpTy *V) { | ||||||||||||||||
1299 | if (auto *I = dyn_cast<Class>(V)) { | ||||||||||||||||
1300 | if (L.match(I->getOperand(0)) && R.match(I->getOperand(1))) { | ||||||||||||||||
1301 | Predicate = I->getPredicate(); | ||||||||||||||||
1302 | return true; | ||||||||||||||||
1303 | } else if (Commutable && L.match(I->getOperand(1)) && | ||||||||||||||||
1304 | R.match(I->getOperand(0))) { | ||||||||||||||||
1305 | Predicate = I->getSwappedPredicate(); | ||||||||||||||||
1306 | return true; | ||||||||||||||||
1307 | } | ||||||||||||||||
1308 | } | ||||||||||||||||
1309 | return false; | ||||||||||||||||
1310 | } | ||||||||||||||||
1311 | }; | ||||||||||||||||
1312 | |||||||||||||||||
1313 | template <typename LHS, typename RHS> | ||||||||||||||||
1314 | inline CmpClass_match<LHS, RHS, CmpInst, CmpInst::Predicate> | ||||||||||||||||
1315 | m_Cmp(CmpInst::Predicate &Pred, const LHS &L, const RHS &R) { | ||||||||||||||||
1316 | return CmpClass_match<LHS, RHS, CmpInst, CmpInst::Predicate>(Pred, L, R); | ||||||||||||||||
1317 | } | ||||||||||||||||
1318 | |||||||||||||||||
1319 | template <typename LHS, typename RHS> | ||||||||||||||||
1320 | inline CmpClass_match<LHS, RHS, ICmpInst, ICmpInst::Predicate> | ||||||||||||||||
1321 | m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R) { | ||||||||||||||||
1322 | return CmpClass_match<LHS, RHS, ICmpInst, ICmpInst::Predicate>(Pred, L, R); | ||||||||||||||||
1323 | } | ||||||||||||||||
1324 | |||||||||||||||||
1325 | template <typename LHS, typename RHS> | ||||||||||||||||
1326 | inline CmpClass_match<LHS, RHS, FCmpInst, FCmpInst::Predicate> | ||||||||||||||||
1327 | m_FCmp(FCmpInst::Predicate &Pred, const LHS &L, const RHS &R) { | ||||||||||||||||
1328 | return CmpClass_match<LHS, RHS, FCmpInst, FCmpInst::Predicate>(Pred, L, R); | ||||||||||||||||
1329 | } | ||||||||||||||||
1330 | |||||||||||||||||
1331 | //===----------------------------------------------------------------------===// | ||||||||||||||||
1332 | // Matchers for instructions with a given opcode and number of operands. | ||||||||||||||||
1333 | // | ||||||||||||||||
1334 | |||||||||||||||||
1335 | /// Matches instructions with Opcode and three operands. | ||||||||||||||||
1336 | template <typename T0, unsigned Opcode> struct OneOps_match { | ||||||||||||||||
1337 | T0 Op1; | ||||||||||||||||
1338 | |||||||||||||||||
1339 | OneOps_match(const T0 &Op1) : Op1(Op1) {} | ||||||||||||||||
1340 | |||||||||||||||||
1341 | template <typename OpTy> bool match(OpTy *V) { | ||||||||||||||||
1342 | if (V->getValueID() == Value::InstructionVal + Opcode) { | ||||||||||||||||
1343 | auto *I = cast<Instruction>(V); | ||||||||||||||||
1344 | return Op1.match(I->getOperand(0)); | ||||||||||||||||
1345 | } | ||||||||||||||||
1346 | return false; | ||||||||||||||||
1347 | } | ||||||||||||||||
1348 | }; | ||||||||||||||||
1349 | |||||||||||||||||
1350 | /// Matches instructions with Opcode and three operands. | ||||||||||||||||
1351 | template <typename T0, typename T1, unsigned Opcode> struct TwoOps_match { | ||||||||||||||||
1352 | T0 Op1; | ||||||||||||||||
1353 | T1 Op2; | ||||||||||||||||
1354 | |||||||||||||||||
1355 | TwoOps_match(const T0 &Op1, const T1 &Op2) : Op1(Op1), Op2(Op2) {} | ||||||||||||||||
1356 | |||||||||||||||||
1357 | template <typename OpTy> bool match(OpTy *V) { | ||||||||||||||||
1358 | if (V->getValueID() == Value::InstructionVal + Opcode) { | ||||||||||||||||
1359 | auto *I = cast<Instruction>(V); | ||||||||||||||||
1360 | return Op1.match(I->getOperand(0)) && Op2.match(I->getOperand(1)); | ||||||||||||||||
1361 | } | ||||||||||||||||
1362 | return false; | ||||||||||||||||
1363 | } | ||||||||||||||||
1364 | }; | ||||||||||||||||
1365 | |||||||||||||||||
1366 | /// Matches instructions with Opcode and three operands. | ||||||||||||||||
1367 | template <typename T0, typename T1, typename T2, unsigned Opcode> | ||||||||||||||||
1368 | struct ThreeOps_match { | ||||||||||||||||
1369 | T0 Op1; | ||||||||||||||||
1370 | T1 Op2; | ||||||||||||||||
1371 | T2 Op3; | ||||||||||||||||
1372 | |||||||||||||||||
1373 | ThreeOps_match(const T0 &Op1, const T1 &Op2, const T2 &Op3) | ||||||||||||||||
1374 | : Op1(Op1), Op2(Op2), Op3(Op3) {} | ||||||||||||||||
1375 | |||||||||||||||||
1376 | template <typename OpTy> bool match(OpTy *V) { | ||||||||||||||||
1377 | if (V->getValueID() == Value::InstructionVal + Opcode) { | ||||||||||||||||
1378 | auto *I = cast<Instruction>(V); | ||||||||||||||||
1379 | return Op1.match(I->getOperand(0)) && Op2.match(I->getOperand(1)) && | ||||||||||||||||
1380 | Op3.match(I->getOperand(2)); | ||||||||||||||||
1381 | } | ||||||||||||||||
1382 | return false; | ||||||||||||||||
1383 | } | ||||||||||||||||
1384 | }; | ||||||||||||||||
1385 | |||||||||||||||||
1386 | /// Matches SelectInst. | ||||||||||||||||
1387 | template <typename Cond, typename LHS, typename RHS> | ||||||||||||||||
1388 | inline ThreeOps_match<Cond, LHS, RHS, Instruction::Select> | ||||||||||||||||
1389 | m_Select(const Cond &C, const LHS &L, const RHS &R) { | ||||||||||||||||
1390 | return ThreeOps_match<Cond, LHS, RHS, Instruction::Select>(C, L, R); | ||||||||||||||||
1391 | } | ||||||||||||||||
1392 | |||||||||||||||||
1393 | /// This matches a select of two constants, e.g.: | ||||||||||||||||
1394 | /// m_SelectCst<-1, 0>(m_Value(V)) | ||||||||||||||||
1395 | template <int64_t L, int64_t R, typename Cond> | ||||||||||||||||
1396 | inline ThreeOps_match<Cond, constantint_match<L>, constantint_match<R>, | ||||||||||||||||
1397 | Instruction::Select> | ||||||||||||||||
1398 | m_SelectCst(const Cond &C) { | ||||||||||||||||
1399 | return m_Select(C, m_ConstantInt<L>(), m_ConstantInt<R>()); | ||||||||||||||||
1400 | } | ||||||||||||||||
1401 | |||||||||||||||||
1402 | /// Matches FreezeInst. | ||||||||||||||||
1403 | template <typename OpTy> | ||||||||||||||||
1404 | inline OneOps_match<OpTy, Instruction::Freeze> m_Freeze(const OpTy &Op) { | ||||||||||||||||
1405 | return OneOps_match<OpTy, Instruction::Freeze>(Op); | ||||||||||||||||
1406 | } | ||||||||||||||||
1407 | |||||||||||||||||
1408 | /// Matches InsertElementInst. | ||||||||||||||||
1409 | template <typename Val_t, typename Elt_t, typename Idx_t> | ||||||||||||||||
1410 | inline ThreeOps_match<Val_t, Elt_t, Idx_t, Instruction::InsertElement> | ||||||||||||||||
1411 | m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx) { | ||||||||||||||||
1412 | return ThreeOps_match<Val_t, Elt_t, Idx_t, Instruction::InsertElement>( | ||||||||||||||||
1413 | Val, Elt, Idx); | ||||||||||||||||
1414 | } | ||||||||||||||||
1415 | |||||||||||||||||
1416 | /// Matches ExtractElementInst. | ||||||||||||||||
1417 | template <typename Val_t, typename Idx_t> | ||||||||||||||||
1418 | inline TwoOps_match<Val_t, Idx_t, Instruction::ExtractElement> | ||||||||||||||||
1419 | m_ExtractElt(const Val_t &Val, const Idx_t &Idx) { | ||||||||||||||||
1420 | return TwoOps_match<Val_t, Idx_t, Instruction::ExtractElement>(Val, Idx); | ||||||||||||||||
1421 | } | ||||||||||||||||
1422 | |||||||||||||||||
1423 | /// Matches shuffle. | ||||||||||||||||
1424 | template <typename T0, typename T1, typename T2> struct Shuffle_match { | ||||||||||||||||
1425 | T0 Op1; | ||||||||||||||||
1426 | T1 Op2; | ||||||||||||||||
1427 | T2 Mask; | ||||||||||||||||
1428 | |||||||||||||||||
1429 | Shuffle_match(const T0 &Op1, const T1 &Op2, const T2 &Mask) | ||||||||||||||||
1430 | : Op1(Op1), Op2(Op2), Mask(Mask) {} | ||||||||||||||||
1431 | |||||||||||||||||
1432 | template <typename OpTy> bool match(OpTy *V) { | ||||||||||||||||
1433 | if (auto *I = dyn_cast<ShuffleVectorInst>(V)) { | ||||||||||||||||
1434 | return Op1.match(I->getOperand(0)) && Op2.match(I->getOperand(1)) && | ||||||||||||||||
1435 | Mask.match(I->getShuffleMask()); | ||||||||||||||||
1436 | } | ||||||||||||||||
1437 | return false; | ||||||||||||||||
1438 | } | ||||||||||||||||
1439 | }; | ||||||||||||||||
1440 | |||||||||||||||||
1441 | struct m_Mask { | ||||||||||||||||
1442 | ArrayRef<int> &MaskRef; | ||||||||||||||||
1443 | m_Mask(ArrayRef<int> &MaskRef) : MaskRef(MaskRef) {} | ||||||||||||||||
1444 | bool match(ArrayRef<int> Mask) { | ||||||||||||||||
1445 | MaskRef = Mask; | ||||||||||||||||
1446 | return true; | ||||||||||||||||
1447 | } | ||||||||||||||||
1448 | }; | ||||||||||||||||
1449 | |||||||||||||||||
1450 | struct m_ZeroMask { | ||||||||||||||||
1451 | bool match(ArrayRef<int> Mask) { | ||||||||||||||||
1452 | return all_of(Mask, [](int Elem) { return Elem == 0 || Elem == -1; }); | ||||||||||||||||
1453 | } | ||||||||||||||||
1454 | }; | ||||||||||||||||
1455 | |||||||||||||||||
1456 | struct m_SpecificMask { | ||||||||||||||||
1457 | ArrayRef<int> &MaskRef; | ||||||||||||||||
1458 | m_SpecificMask(ArrayRef<int> &MaskRef) : MaskRef(MaskRef) {} | ||||||||||||||||
1459 | bool match(ArrayRef<int> Mask) { return MaskRef == Mask; } | ||||||||||||||||
1460 | }; | ||||||||||||||||
1461 | |||||||||||||||||
1462 | struct m_SplatOrUndefMask { | ||||||||||||||||
1463 | int &SplatIndex; | ||||||||||||||||
1464 | m_SplatOrUndefMask(int &SplatIndex) : SplatIndex(SplatIndex) {} | ||||||||||||||||
1465 | bool match(ArrayRef<int> Mask) { | ||||||||||||||||
1466 | auto First = find_if(Mask, [](int Elem) { return Elem != -1; }); | ||||||||||||||||
1467 | if (First == Mask.end()) | ||||||||||||||||
1468 | return false; | ||||||||||||||||
1469 | SplatIndex = *First; | ||||||||||||||||
1470 | return all_of(Mask, | ||||||||||||||||
1471 | [First](int Elem) { return Elem == *First || Elem == -1; }); | ||||||||||||||||
1472 | } | ||||||||||||||||
1473 | }; | ||||||||||||||||
1474 | |||||||||||||||||
1475 | /// Matches ShuffleVectorInst independently of mask value. | ||||||||||||||||
1476 | template <typename V1_t, typename V2_t> | ||||||||||||||||
1477 | inline TwoOps_match<V1_t, V2_t, Instruction::ShuffleVector> | ||||||||||||||||
1478 | m_Shuffle(const V1_t &v1, const V2_t &v2) { | ||||||||||||||||
1479 | return TwoOps_match<V1_t, V2_t, Instruction::ShuffleVector>(v1, v2); | ||||||||||||||||
1480 | } | ||||||||||||||||
1481 | |||||||||||||||||
1482 | template <typename V1_t, typename V2_t, typename Mask_t> | ||||||||||||||||
1483 | inline Shuffle_match<V1_t, V2_t, Mask_t> | ||||||||||||||||
1484 | m_Shuffle(const V1_t &v1, const V2_t &v2, const Mask_t &mask) { | ||||||||||||||||
1485 | return Shuffle_match<V1_t, V2_t, Mask_t>(v1, v2, mask); | ||||||||||||||||
1486 | } | ||||||||||||||||
1487 | |||||||||||||||||
1488 | /// Matches LoadInst. | ||||||||||||||||
1489 | template <typename OpTy> | ||||||||||||||||
1490 | inline OneOps_match<OpTy, Instruction::Load> m_Load(const OpTy &Op) { | ||||||||||||||||
1491 | return OneOps_match<OpTy, Instruction::Load>(Op); | ||||||||||||||||
1492 | } | ||||||||||||||||
1493 | |||||||||||||||||
1494 | /// Matches StoreInst. | ||||||||||||||||
1495 | template <typename ValueOpTy, typename PointerOpTy> | ||||||||||||||||
1496 | inline TwoOps_match<ValueOpTy, PointerOpTy, Instruction::Store> | ||||||||||||||||
1497 | m_Store(const ValueOpTy &ValueOp, const PointerOpTy &PointerOp) { | ||||||||||||||||
1498 | return TwoOps_match<ValueOpTy, PointerOpTy, Instruction::Store>(ValueOp, | ||||||||||||||||
1499 | PointerOp); | ||||||||||||||||
1500 | } | ||||||||||||||||
1501 | |||||||||||||||||
1502 | //===----------------------------------------------------------------------===// | ||||||||||||||||
1503 | // Matchers for CastInst classes | ||||||||||||||||
1504 | // | ||||||||||||||||
1505 | |||||||||||||||||
1506 | template <typename Op_t, unsigned Opcode> struct CastClass_match { | ||||||||||||||||
1507 | Op_t Op; | ||||||||||||||||
1508 | |||||||||||||||||
1509 | CastClass_match(const Op_t &OpMatch) : Op(OpMatch) {} | ||||||||||||||||
1510 | |||||||||||||||||
1511 | template <typename OpTy> bool match(OpTy *V) { | ||||||||||||||||
1512 | if (auto *O = dyn_cast<Operator>(V)) | ||||||||||||||||
1513 | return O->getOpcode() == Opcode && Op.match(O->getOperand(0)); | ||||||||||||||||
1514 | return false; | ||||||||||||||||
1515 | } | ||||||||||||||||
1516 | }; | ||||||||||||||||
1517 | |||||||||||||||||
1518 | /// Matches BitCast. | ||||||||||||||||
1519 | template <typename OpTy> | ||||||||||||||||
1520 | inline CastClass_match<OpTy, Instruction::BitCast> m_BitCast(const OpTy &Op) { | ||||||||||||||||
1521 | return CastClass_match<OpTy, Instruction::BitCast>(Op); | ||||||||||||||||
1522 | } | ||||||||||||||||
1523 | |||||||||||||||||
1524 | /// Matches PtrToInt. | ||||||||||||||||
1525 | template <typename OpTy> | ||||||||||||||||
1526 | inline CastClass_match<OpTy, Instruction::PtrToInt> m_PtrToInt(const OpTy &Op) { | ||||||||||||||||
1527 | return CastClass_match<OpTy, Instruction::PtrToInt>(Op); | ||||||||||||||||
1528 | } | ||||||||||||||||
1529 | |||||||||||||||||
1530 | /// Matches IntToPtr. | ||||||||||||||||
1531 | template <typename OpTy> | ||||||||||||||||
1532 | inline CastClass_match<OpTy, Instruction::IntToPtr> m_IntToPtr(const OpTy &Op) { | ||||||||||||||||
1533 | return CastClass_match<OpTy, Instruction::IntToPtr>(Op); | ||||||||||||||||
1534 | } | ||||||||||||||||
1535 | |||||||||||||||||
1536 | /// Matches Trunc. | ||||||||||||||||
1537 | template <typename OpTy> | ||||||||||||||||
1538 | inline CastClass_match<OpTy, Instruction::Trunc> m_Trunc(const OpTy &Op) { | ||||||||||||||||
1539 | return CastClass_match<OpTy, Instruction::Trunc>(Op); | ||||||||||||||||
1540 | } | ||||||||||||||||
1541 | |||||||||||||||||
1542 | template <typename OpTy> | ||||||||||||||||
1543 | inline match_combine_or<CastClass_match<OpTy, Instruction::Trunc>, OpTy> | ||||||||||||||||
1544 | m_TruncOrSelf(const OpTy &Op) { | ||||||||||||||||
1545 | return m_CombineOr(m_Trunc(Op), Op); | ||||||||||||||||
1546 | } | ||||||||||||||||
1547 | |||||||||||||||||
1548 | /// Matches SExt. | ||||||||||||||||
1549 | template <typename OpTy> | ||||||||||||||||
1550 | inline CastClass_match<OpTy, Instruction::SExt> m_SExt(const OpTy &Op) { | ||||||||||||||||
1551 | return CastClass_match<OpTy, Instruction::SExt>(Op); | ||||||||||||||||
1552 | } | ||||||||||||||||
1553 | |||||||||||||||||
1554 | /// Matches ZExt. | ||||||||||||||||
1555 | template <typename OpTy> | ||||||||||||||||
1556 | inline CastClass_match<OpTy, Instruction::ZExt> m_ZExt(const OpTy &Op) { | ||||||||||||||||
1557 | return CastClass_match<OpTy, Instruction::ZExt>(Op); | ||||||||||||||||
1558 | } | ||||||||||||||||
1559 | |||||||||||||||||
1560 | template <typename OpTy> | ||||||||||||||||
1561 | inline match_combine_or<CastClass_match<OpTy, Instruction::ZExt>, OpTy> | ||||||||||||||||
1562 | m_ZExtOrSelf(const OpTy &Op) { | ||||||||||||||||
1563 | return m_CombineOr(m_ZExt(Op), Op); | ||||||||||||||||
1564 | } | ||||||||||||||||
1565 | |||||||||||||||||
1566 | template <typename OpTy> | ||||||||||||||||
1567 | inline match_combine_or<CastClass_match<OpTy, Instruction::SExt>, OpTy> | ||||||||||||||||
1568 | m_SExtOrSelf(const OpTy &Op) { | ||||||||||||||||
1569 | return m_CombineOr(m_SExt(Op), Op); | ||||||||||||||||
1570 | } | ||||||||||||||||
1571 | |||||||||||||||||
1572 | template <typename OpTy> | ||||||||||||||||
1573 | inline match_combine_or<CastClass_match<OpTy, Instruction::ZExt>, | ||||||||||||||||
1574 | CastClass_match<OpTy, Instruction::SExt>> | ||||||||||||||||
1575 | m_ZExtOrSExt(const OpTy &Op) { | ||||||||||||||||
1576 | return m_CombineOr(m_ZExt(Op), m_SExt(Op)); | ||||||||||||||||
1577 | } | ||||||||||||||||
1578 | |||||||||||||||||
1579 | template <typename OpTy> | ||||||||||||||||
1580 | inline match_combine_or< | ||||||||||||||||
1581 | match_combine_or<CastClass_match<OpTy, Instruction::ZExt>, | ||||||||||||||||
1582 | CastClass_match<OpTy, Instruction::SExt>>, | ||||||||||||||||
1583 | OpTy> | ||||||||||||||||
1584 | m_ZExtOrSExtOrSelf(const OpTy &Op) { | ||||||||||||||||
1585 | return m_CombineOr(m_ZExtOrSExt(Op), Op); | ||||||||||||||||
1586 | } | ||||||||||||||||
1587 | |||||||||||||||||
1588 | template <typename OpTy> | ||||||||||||||||
1589 | inline CastClass_match<OpTy, Instruction::UIToFP> m_UIToFP(const OpTy &Op) { | ||||||||||||||||
1590 | return CastClass_match<OpTy, Instruction::UIToFP>(Op); | ||||||||||||||||
1591 | } | ||||||||||||||||
1592 | |||||||||||||||||
1593 | template <typename OpTy> | ||||||||||||||||
1594 | inline CastClass_match<OpTy, Instruction::SIToFP> m_SIToFP(const OpTy &Op) { | ||||||||||||||||
1595 | return CastClass_match<OpTy, Instruction::SIToFP>(Op); | ||||||||||||||||
1596 | } | ||||||||||||||||
1597 | |||||||||||||||||
1598 | template <typename OpTy> | ||||||||||||||||
1599 | inline CastClass_match<OpTy, Instruction::FPToUI> m_FPToUI(const OpTy &Op) { | ||||||||||||||||
1600 | return CastClass_match<OpTy, Instruction::FPToUI>(Op); | ||||||||||||||||
1601 | } | ||||||||||||||||
1602 | |||||||||||||||||
1603 | template <typename OpTy> | ||||||||||||||||
1604 | inline CastClass_match<OpTy, Instruction::FPToSI> m_FPToSI(const OpTy &Op) { | ||||||||||||||||
1605 | return CastClass_match<OpTy, Instruction::FPToSI>(Op); | ||||||||||||||||
1606 | } | ||||||||||||||||
1607 | |||||||||||||||||
1608 | template <typename OpTy> | ||||||||||||||||
1609 | inline CastClass_match<OpTy, Instruction::FPTrunc> m_FPTrunc(const OpTy &Op) { | ||||||||||||||||
1610 | return CastClass_match<OpTy, Instruction::FPTrunc>(Op); | ||||||||||||||||
1611 | } | ||||||||||||||||
1612 | |||||||||||||||||
1613 | template <typename OpTy> | ||||||||||||||||
1614 | inline CastClass_match<OpTy, Instruction::FPExt> m_FPExt(const OpTy &Op) { | ||||||||||||||||
1615 | return CastClass_match<OpTy, Instruction::FPExt>(Op); | ||||||||||||||||
1616 | } | ||||||||||||||||
1617 | |||||||||||||||||
1618 | //===----------------------------------------------------------------------===// | ||||||||||||||||
1619 | // Matchers for control flow. | ||||||||||||||||
1620 | // | ||||||||||||||||
1621 | |||||||||||||||||
1622 | struct br_match { | ||||||||||||||||
1623 | BasicBlock *&Succ; | ||||||||||||||||
1624 | |||||||||||||||||
1625 | br_match(BasicBlock *&Succ) : Succ(Succ) {} | ||||||||||||||||
1626 | |||||||||||||||||
1627 | template <typename OpTy> bool match(OpTy *V) { | ||||||||||||||||
1628 | if (auto *BI = dyn_cast<BranchInst>(V)) | ||||||||||||||||
1629 | if (BI->isUnconditional()) { | ||||||||||||||||
1630 | Succ = BI->getSuccessor(0); | ||||||||||||||||
1631 | return true; | ||||||||||||||||
1632 | } | ||||||||||||||||
1633 | return false; | ||||||||||||||||
1634 | } | ||||||||||||||||
1635 | }; | ||||||||||||||||
1636 | |||||||||||||||||
1637 | inline br_match m_UnconditionalBr(BasicBlock *&Succ) { return br_match(Succ); } | ||||||||||||||||
1638 | |||||||||||||||||
1639 | template <typename Cond_t, typename TrueBlock_t, typename FalseBlock_t> | ||||||||||||||||
1640 | struct brc_match { | ||||||||||||||||
1641 | Cond_t Cond; | ||||||||||||||||
1642 | TrueBlock_t T; | ||||||||||||||||
1643 | FalseBlock_t F; | ||||||||||||||||
1644 | |||||||||||||||||
1645 | brc_match(const Cond_t &C, const TrueBlock_t &t, const FalseBlock_t &f) | ||||||||||||||||
1646 | : Cond(C), T(t), F(f) {} | ||||||||||||||||
1647 | |||||||||||||||||
1648 | template <typename OpTy> bool match(OpTy *V) { | ||||||||||||||||
1649 | if (auto *BI = dyn_cast<BranchInst>(V)) | ||||||||||||||||
1650 | if (BI->isConditional() && Cond.match(BI->getCondition())) | ||||||||||||||||
1651 | return T.match(BI->getSuccessor(0)) && F.match(BI->getSuccessor(1)); | ||||||||||||||||
1652 | return false; | ||||||||||||||||
1653 | } | ||||||||||||||||
1654 | }; | ||||||||||||||||
1655 | |||||||||||||||||
1656 | template <typename Cond_t> | ||||||||||||||||
1657 | inline brc_match<Cond_t, bind_ty<BasicBlock>, bind_ty<BasicBlock>> | ||||||||||||||||
1658 | m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F) { | ||||||||||||||||
1659 | return brc_match<Cond_t, bind_ty<BasicBlock>, bind_ty<BasicBlock>>( | ||||||||||||||||
1660 | C, m_BasicBlock(T), m_BasicBlock(F)); | ||||||||||||||||
1661 | } | ||||||||||||||||
1662 | |||||||||||||||||
1663 | template <typename Cond_t, typename TrueBlock_t, typename FalseBlock_t> | ||||||||||||||||
1664 | inline brc_match<Cond_t, TrueBlock_t, FalseBlock_t> | ||||||||||||||||
1665 | m_Br(const Cond_t &C, const TrueBlock_t &T, const FalseBlock_t &F) { | ||||||||||||||||
1666 | return brc_match<Cond_t, TrueBlock_t, FalseBlock_t>(C, T, F); | ||||||||||||||||
1667 | } | ||||||||||||||||
1668 | |||||||||||||||||
1669 | //===----------------------------------------------------------------------===// | ||||||||||||||||
1670 | // Matchers for max/min idioms, eg: "select (sgt x, y), x, y" -> smax(x,y). | ||||||||||||||||
1671 | // | ||||||||||||||||
1672 | |||||||||||||||||
1673 | template <typename CmpInst_t, typename LHS_t, typename RHS_t, typename Pred_t, | ||||||||||||||||
1674 | bool Commutable = false> | ||||||||||||||||
1675 | struct MaxMin_match { | ||||||||||||||||
1676 | LHS_t L; | ||||||||||||||||
1677 | RHS_t R; | ||||||||||||||||
1678 | |||||||||||||||||
1679 | // The evaluation order is always stable, regardless of Commutability. | ||||||||||||||||
1680 | // The LHS is always matched first. | ||||||||||||||||
1681 | MaxMin_match(const LHS_t &LHS, const RHS_t &RHS) : L(LHS), R(RHS) {} | ||||||||||||||||
1682 | |||||||||||||||||
1683 | template <typename OpTy> bool match(OpTy *V) { | ||||||||||||||||
1684 | if (auto *II = dyn_cast<IntrinsicInst>(V)) { | ||||||||||||||||
1685 | Intrinsic::ID IID = II->getIntrinsicID(); | ||||||||||||||||
1686 | if ((IID == Intrinsic::smax && Pred_t::match(ICmpInst::ICMP_SGT)) || | ||||||||||||||||
1687 | (IID == Intrinsic::smin && Pred_t::match(ICmpInst::ICMP_SLT)) || | ||||||||||||||||
1688 | (IID == Intrinsic::umax && Pred_t::match(ICmpInst::ICMP_UGT)) || | ||||||||||||||||
1689 | (IID == Intrinsic::umin && Pred_t::match(ICmpInst::ICMP_ULT))) { | ||||||||||||||||
1690 | Value *LHS = II->getOperand(0), *RHS = II->getOperand(1); | ||||||||||||||||
1691 | return (L.match(LHS) && R.match(RHS)) || | ||||||||||||||||
1692 | (Commutable && L.match(RHS) && R.match(LHS)); | ||||||||||||||||
1693 | } | ||||||||||||||||
1694 | } | ||||||||||||||||
1695 | // Look for "(x pred y) ? x : y" or "(x pred y) ? y : x". | ||||||||||||||||
1696 | auto *SI = dyn_cast<SelectInst>(V); | ||||||||||||||||
1697 | if (!SI) | ||||||||||||||||
1698 | return false; | ||||||||||||||||
1699 | auto *Cmp = dyn_cast<CmpInst_t>(SI->getCondition()); | ||||||||||||||||
1700 | if (!Cmp) | ||||||||||||||||
1701 | return false; | ||||||||||||||||
1702 | // At this point we have a select conditioned on a comparison. Check that | ||||||||||||||||
1703 | // it is the values returned by the select that are being compared. | ||||||||||||||||
1704 | Value *TrueVal = SI->getTrueValue(); | ||||||||||||||||
1705 | Value *FalseVal = SI->getFalseValue(); | ||||||||||||||||
1706 | Value *LHS = Cmp->getOperand(0); | ||||||||||||||||
1707 | Value *RHS = Cmp->getOperand(1); | ||||||||||||||||
1708 | if ((TrueVal != LHS || FalseVal != RHS) && | ||||||||||||||||
1709 | (TrueVal != RHS || FalseVal != LHS)) | ||||||||||||||||
1710 | return false; | ||||||||||||||||
1711 | typename CmpInst_t::Predicate Pred = | ||||||||||||||||
1712 | LHS == TrueVal ? Cmp->getPredicate() : Cmp->getInversePredicate(); | ||||||||||||||||
1713 | // Does "(x pred y) ? x : y" represent the desired max/min operation? | ||||||||||||||||
1714 | if (!Pred_t::match(Pred)) | ||||||||||||||||
1715 | return false; | ||||||||||||||||
1716 | // It does! Bind the operands. | ||||||||||||||||
1717 | return (L.match(LHS) && R.match(RHS)) || | ||||||||||||||||
1718 | (Commutable && L.match(RHS) && R.match(LHS)); | ||||||||||||||||
1719 | } | ||||||||||||||||
1720 | }; | ||||||||||||||||
1721 | |||||||||||||||||
1722 | /// Helper class for identifying signed max predicates. | ||||||||||||||||
1723 | struct smax_pred_ty { | ||||||||||||||||
1724 | static bool match(ICmpInst::Predicate Pred) { | ||||||||||||||||
1725 | return Pred == CmpInst::ICMP_SGT || Pred == CmpInst::ICMP_SGE; | ||||||||||||||||
1726 | } | ||||||||||||||||
1727 | }; | ||||||||||||||||
1728 | |||||||||||||||||
1729 | /// Helper class for identifying signed min predicates. | ||||||||||||||||
1730 | struct smin_pred_ty { | ||||||||||||||||
1731 | static bool match(ICmpInst::Predicate Pred) { | ||||||||||||||||
1732 | return Pred == CmpInst::ICMP_SLT || Pred == CmpInst::ICMP_SLE; | ||||||||||||||||
1733 | } | ||||||||||||||||
1734 | }; | ||||||||||||||||
1735 | |||||||||||||||||
1736 | /// Helper class for identifying unsigned max predicates. | ||||||||||||||||
1737 | struct umax_pred_ty { | ||||||||||||||||
1738 | static bool match(ICmpInst::Predicate Pred) { | ||||||||||||||||
1739 | return Pred == CmpInst::ICMP_UGT || Pred == CmpInst::ICMP_UGE; | ||||||||||||||||
1740 | } | ||||||||||||||||
1741 | }; | ||||||||||||||||
1742 | |||||||||||||||||
1743 | /// Helper class for identifying unsigned min predicates. | ||||||||||||||||
1744 | struct umin_pred_ty { | ||||||||||||||||
1745 | static bool match(ICmpInst::Predicate Pred) { | ||||||||||||||||
1746 | return Pred == CmpInst::ICMP_ULT || Pred == CmpInst::ICMP_ULE; | ||||||||||||||||
1747 | } | ||||||||||||||||
1748 | }; | ||||||||||||||||
1749 | |||||||||||||||||
1750 | /// Helper class for identifying ordered max predicates. | ||||||||||||||||
1751 | struct ofmax_pred_ty { | ||||||||||||||||
1752 | static bool match(FCmpInst::Predicate Pred) { | ||||||||||||||||
1753 | return Pred == CmpInst::FCMP_OGT || Pred == CmpInst::FCMP_OGE; | ||||||||||||||||
1754 | } | ||||||||||||||||
1755 | }; | ||||||||||||||||
1756 | |||||||||||||||||
1757 | /// Helper class for identifying ordered min predicates. | ||||||||||||||||
1758 | struct ofmin_pred_ty { | ||||||||||||||||
1759 | static bool match(FCmpInst::Predicate Pred) { | ||||||||||||||||
1760 | return Pred == CmpInst::FCMP_OLT || Pred == CmpInst::FCMP_OLE; | ||||||||||||||||
1761 | } | ||||||||||||||||
1762 | }; | ||||||||||||||||
1763 | |||||||||||||||||
1764 | /// Helper class for identifying unordered max predicates. | ||||||||||||||||
1765 | struct ufmax_pred_ty { | ||||||||||||||||
1766 | static bool match(FCmpInst::Predicate Pred) { | ||||||||||||||||
1767 | return Pred == CmpInst::FCMP_UGT || Pred == CmpInst::FCMP_UGE; | ||||||||||||||||
1768 | } | ||||||||||||||||
1769 | }; | ||||||||||||||||
1770 | |||||||||||||||||
1771 | /// Helper class for identifying unordered min predicates. | ||||||||||||||||
1772 | struct ufmin_pred_ty { | ||||||||||||||||
1773 | static bool match(FCmpInst::Predicate Pred) { | ||||||||||||||||
1774 | return Pred == CmpInst::FCMP_ULT || Pred == CmpInst::FCMP_ULE; | ||||||||||||||||
1775 | } | ||||||||||||||||
1776 | }; | ||||||||||||||||
1777 | |||||||||||||||||
1778 | template <typename LHS, typename RHS> | ||||||||||||||||
1779 | inline MaxMin_match<ICmpInst, LHS, RHS, smax_pred_ty> m_SMax(const LHS &L, | ||||||||||||||||
1780 | const RHS &R) { | ||||||||||||||||
1781 | return MaxMin_match<ICmpInst, LHS, RHS, smax_pred_ty>(L, R); | ||||||||||||||||
1782 | } | ||||||||||||||||
1783 | |||||||||||||||||
1784 | template <typename LHS, typename RHS> | ||||||||||||||||
1785 | inline MaxMin_match<ICmpInst, LHS, RHS, smin_pred_ty> m_SMin(const LHS &L, | ||||||||||||||||
1786 | const RHS &R) { | ||||||||||||||||
1787 | return MaxMin_match<ICmpInst, LHS, RHS, smin_pred_ty>(L, R); | ||||||||||||||||
1788 | } | ||||||||||||||||
1789 | |||||||||||||||||
1790 | template <typename LHS, typename RHS> | ||||||||||||||||
1791 | inline MaxMin_match<ICmpInst, LHS, RHS, umax_pred_ty> m_UMax(const LHS &L, | ||||||||||||||||
1792 | const RHS &R) { | ||||||||||||||||
1793 | return MaxMin_match<ICmpInst, LHS, RHS, umax_pred_ty>(L, R); | ||||||||||||||||
1794 | } | ||||||||||||||||
1795 | |||||||||||||||||
1796 | template <typename LHS, typename RHS> | ||||||||||||||||
1797 | inline MaxMin_match<ICmpInst, LHS, RHS, umin_pred_ty> m_UMin(const LHS &L, | ||||||||||||||||
1798 | const RHS &R) { | ||||||||||||||||
1799 | return MaxMin_match<ICmpInst, LHS, RHS, umin_pred_ty>(L, R); | ||||||||||||||||
1800 | } | ||||||||||||||||
1801 | |||||||||||||||||
1802 | template <typename LHS, typename RHS> | ||||||||||||||||
1803 | inline match_combine_or< | ||||||||||||||||
1804 | match_combine_or<MaxMin_match<ICmpInst, LHS, RHS, smax_pred_ty>, | ||||||||||||||||
1805 | MaxMin_match<ICmpInst, LHS, RHS, smin_pred_ty>>, | ||||||||||||||||
1806 | match_combine_or<MaxMin_match<ICmpInst, LHS, RHS, umax_pred_ty>, | ||||||||||||||||
1807 | MaxMin_match<ICmpInst, LHS, RHS, umin_pred_ty>>> | ||||||||||||||||
1808 | m_MaxOrMin(const LHS &L, const RHS &R) { | ||||||||||||||||
1809 | return m_CombineOr(m_CombineOr(m_SMax(L, R), m_SMin(L, R)), | ||||||||||||||||
1810 | m_CombineOr(m_UMax(L, R), m_UMin(L, R))); | ||||||||||||||||
1811 | } | ||||||||||||||||
1812 | |||||||||||||||||
1813 | /// Match an 'ordered' floating point maximum function. | ||||||||||||||||
1814 | /// Floating point has one special value 'NaN'. Therefore, there is no total | ||||||||||||||||
1815 | /// order. However, if we can ignore the 'NaN' value (for example, because of a | ||||||||||||||||
1816 | /// 'no-nans-float-math' flag) a combination of a fcmp and select has 'maximum' | ||||||||||||||||
1817 | /// semantics. In the presence of 'NaN' we have to preserve the original | ||||||||||||||||
1818 | /// select(fcmp(ogt/ge, L, R), L, R) semantics matched by this predicate. | ||||||||||||||||
1819 | /// | ||||||||||||||||
1820 | /// max(L, R) iff L and R are not NaN | ||||||||||||||||
1821 | /// m_OrdFMax(L, R) = R iff L or R are NaN | ||||||||||||||||
1822 | template <typename LHS, typename RHS> | ||||||||||||||||
1823 | inline MaxMin_match<FCmpInst, LHS, RHS, ofmax_pred_ty> m_OrdFMax(const LHS &L, | ||||||||||||||||
1824 | const RHS &R) { | ||||||||||||||||
1825 | return MaxMin_match<FCmpInst, LHS, RHS, ofmax_pred_ty>(L, R); | ||||||||||||||||
1826 | } | ||||||||||||||||
1827 | |||||||||||||||||
1828 | /// Match an 'ordered' floating point minimum function. | ||||||||||||||||
1829 | /// Floating point has one special value 'NaN'. Therefore, there is no total | ||||||||||||||||
1830 | /// order. However, if we can ignore the 'NaN' value (for example, because of a | ||||||||||||||||
1831 | /// 'no-nans-float-math' flag) a combination of a fcmp and select has 'minimum' | ||||||||||||||||
1832 | /// semantics. In the presence of 'NaN' we have to preserve the original | ||||||||||||||||
1833 | /// select(fcmp(olt/le, L, R), L, R) semantics matched by this predicate. | ||||||||||||||||
1834 | /// | ||||||||||||||||
1835 | /// min(L, R) iff L and R are not NaN | ||||||||||||||||
1836 | /// m_OrdFMin(L, R) = R iff L or R are NaN | ||||||||||||||||
1837 | template <typename LHS, typename RHS> | ||||||||||||||||
1838 | inline MaxMin_match<FCmpInst, LHS, RHS, ofmin_pred_ty> m_OrdFMin(const LHS &L, | ||||||||||||||||
1839 | const RHS &R) { | ||||||||||||||||
1840 | return MaxMin_match<FCmpInst, LHS, RHS, ofmin_pred_ty>(L, R); | ||||||||||||||||
1841 | } | ||||||||||||||||
1842 | |||||||||||||||||
1843 | /// Match an 'unordered' floating point maximum function. | ||||||||||||||||
1844 | /// Floating point has one special value 'NaN'. Therefore, there is no total | ||||||||||||||||
1845 | /// order. However, if we can ignore the 'NaN' value (for example, because of a | ||||||||||||||||
1846 | /// 'no-nans-float-math' flag) a combination of a fcmp and select has 'maximum' | ||||||||||||||||
1847 | /// semantics. In the presence of 'NaN' we have to preserve the original | ||||||||||||||||
1848 | /// select(fcmp(ugt/ge, L, R), L, R) semantics matched by this predicate. | ||||||||||||||||
1849 | /// | ||||||||||||||||
1850 | /// max(L, R) iff L and R are not NaN | ||||||||||||||||
1851 | /// m_UnordFMax(L, R) = L iff L or R are NaN | ||||||||||||||||
1852 | template <typename LHS, typename RHS> | ||||||||||||||||
1853 | inline MaxMin_match<FCmpInst, LHS, RHS, ufmax_pred_ty> | ||||||||||||||||
1854 | m_UnordFMax(const LHS &L, const RHS &R) { | ||||||||||||||||
1855 | return MaxMin_match<FCmpInst, LHS, RHS, ufmax_pred_ty>(L, R); | ||||||||||||||||
1856 | } | ||||||||||||||||
1857 | |||||||||||||||||
1858 | /// Match an 'unordered' floating point minimum function. | ||||||||||||||||
1859 | /// Floating point has one special value 'NaN'. Therefore, there is no total | ||||||||||||||||
1860 | /// order. However, if we can ignore the 'NaN' value (for example, because of a | ||||||||||||||||
1861 | /// 'no-nans-float-math' flag) a combination of a fcmp and select has 'minimum' | ||||||||||||||||
1862 | /// semantics. In the presence of 'NaN' we have to preserve the original | ||||||||||||||||
1863 | /// select(fcmp(ult/le, L, R), L, R) semantics matched by this predicate. | ||||||||||||||||
1864 | /// | ||||||||||||||||
1865 | /// min(L, R) iff L and R are not NaN | ||||||||||||||||
1866 | /// m_UnordFMin(L, R) = L iff L or R are NaN | ||||||||||||||||
1867 | template <typename LHS, typename RHS> | ||||||||||||||||
1868 | inline MaxMin_match<FCmpInst, LHS, RHS, ufmin_pred_ty> | ||||||||||||||||
1869 | m_UnordFMin(const LHS &L, const RHS &R) { | ||||||||||||||||
1870 | return MaxMin_match<FCmpInst, LHS, RHS, ufmin_pred_ty>(L, R); | ||||||||||||||||
1871 | } | ||||||||||||||||
1872 | |||||||||||||||||
1873 | //===----------------------------------------------------------------------===// | ||||||||||||||||
1874 | // Matchers for overflow check patterns: e.g. (a + b) u< a, (a ^ -1) <u b | ||||||||||||||||
1875 | // Note that S might be matched to other instructions than AddInst. | ||||||||||||||||
1876 | // | ||||||||||||||||
1877 | |||||||||||||||||
1878 | template <typename LHS_t, typename RHS_t, typename Sum_t> | ||||||||||||||||
1879 | struct UAddWithOverflow_match { | ||||||||||||||||
1880 | LHS_t L; | ||||||||||||||||
1881 | RHS_t R; | ||||||||||||||||
1882 | Sum_t S; | ||||||||||||||||
1883 | |||||||||||||||||
1884 | UAddWithOverflow_match(const LHS_t &L, const RHS_t &R, const Sum_t &S) | ||||||||||||||||
1885 | : L(L), R(R), S(S) {} | ||||||||||||||||
1886 | |||||||||||||||||
1887 | template <typename OpTy> bool match(OpTy *V) { | ||||||||||||||||
1888 | Value *ICmpLHS, *ICmpRHS; | ||||||||||||||||
1889 | ICmpInst::Predicate Pred; | ||||||||||||||||
1890 | if (!m_ICmp(Pred, m_Value(ICmpLHS), m_Value(ICmpRHS)).match(V)) | ||||||||||||||||
1891 | return false; | ||||||||||||||||
1892 | |||||||||||||||||
1893 | Value *AddLHS, *AddRHS; | ||||||||||||||||
1894 | auto AddExpr = m_Add(m_Value(AddLHS), m_Value(AddRHS)); | ||||||||||||||||
1895 | |||||||||||||||||
1896 | // (a + b) u< a, (a + b) u< b | ||||||||||||||||
1897 | if (Pred == ICmpInst::ICMP_ULT) | ||||||||||||||||
1898 | if (AddExpr.match(ICmpLHS) && (ICmpRHS == AddLHS || ICmpRHS == AddRHS)) | ||||||||||||||||
1899 | return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpLHS); | ||||||||||||||||
1900 | |||||||||||||||||
1901 | // a >u (a + b), b >u (a + b) | ||||||||||||||||
1902 | if (Pred == ICmpInst::ICMP_UGT) | ||||||||||||||||
1903 | if (AddExpr.match(ICmpRHS) && (ICmpLHS == AddLHS || ICmpLHS == AddRHS)) | ||||||||||||||||
1904 | return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpRHS); | ||||||||||||||||
1905 | |||||||||||||||||
1906 | Value *Op1; | ||||||||||||||||
1907 | auto XorExpr = m_OneUse(m_Xor(m_Value(Op1), m_AllOnes())); | ||||||||||||||||
1908 | // (a ^ -1) <u b | ||||||||||||||||
1909 | if (Pred == ICmpInst::ICMP_ULT) { | ||||||||||||||||
1910 | if (XorExpr.match(ICmpLHS)) | ||||||||||||||||
1911 | return L.match(Op1) && R.match(ICmpRHS) && S.match(ICmpLHS); | ||||||||||||||||
1912 | } | ||||||||||||||||
1913 | // b > u (a ^ -1) | ||||||||||||||||
1914 | if (Pred == ICmpInst::ICMP_UGT) { | ||||||||||||||||
1915 | if (XorExpr.match(ICmpRHS)) | ||||||||||||||||
1916 | return L.match(Op1) && R.match(ICmpLHS) && S.match(ICmpRHS); | ||||||||||||||||
1917 | } | ||||||||||||||||
1918 | |||||||||||||||||
1919 | // Match special-case for increment-by-1. | ||||||||||||||||
1920 | if (Pred == ICmpInst::ICMP_EQ) { | ||||||||||||||||
1921 | // (a + 1) == 0 | ||||||||||||||||
1922 | // (1 + a) == 0 | ||||||||||||||||
1923 | if (AddExpr.match(ICmpLHS) && m_ZeroInt().match(ICmpRHS) && | ||||||||||||||||
1924 | (m_One().match(AddLHS) || m_One().match(AddRHS))) | ||||||||||||||||
1925 | return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpLHS); | ||||||||||||||||
1926 | // 0 == (a + 1) | ||||||||||||||||
1927 | // 0 == (1 + a) | ||||||||||||||||
1928 | if (m_ZeroInt().match(ICmpLHS) && AddExpr.match(ICmpRHS) && | ||||||||||||||||
1929 | (m_One().match(AddLHS) || m_One().match(AddRHS))) | ||||||||||||||||
1930 | return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpRHS); | ||||||||||||||||
1931 | } | ||||||||||||||||
1932 | |||||||||||||||||
1933 | return false; | ||||||||||||||||
1934 | } | ||||||||||||||||
1935 | }; | ||||||||||||||||
1936 | |||||||||||||||||
1937 | /// Match an icmp instruction checking for unsigned overflow on addition. | ||||||||||||||||
1938 | /// | ||||||||||||||||
1939 | /// S is matched to the addition whose result is being checked for overflow, and | ||||||||||||||||
1940 | /// L and R are matched to the LHS and RHS of S. | ||||||||||||||||
1941 | template <typename LHS_t, typename RHS_t, typename Sum_t> | ||||||||||||||||
1942 | UAddWithOverflow_match<LHS_t, RHS_t, Sum_t> | ||||||||||||||||
1943 | m_UAddWithOverflow(const LHS_t &L, const RHS_t &R, const Sum_t &S) { | ||||||||||||||||
1944 | return UAddWithOverflow_match<LHS_t, RHS_t, Sum_t>(L, R, S); | ||||||||||||||||
1945 | } | ||||||||||||||||
1946 | |||||||||||||||||
1947 | template <typename Opnd_t> struct Argument_match { | ||||||||||||||||
1948 | unsigned OpI; | ||||||||||||||||
1949 | Opnd_t Val; | ||||||||||||||||
1950 | |||||||||||||||||
1951 | Argument_match(unsigned OpIdx, const Opnd_t &V) : OpI(OpIdx), Val(V) {} | ||||||||||||||||
1952 | |||||||||||||||||
1953 | template <typename OpTy> bool match(OpTy *V) { | ||||||||||||||||
1954 | // FIXME: Should likely be switched to use `CallBase`. | ||||||||||||||||
1955 | if (const auto *CI = dyn_cast<CallInst>(V)) | ||||||||||||||||
1956 | return Val.match(CI->getArgOperand(OpI)); | ||||||||||||||||
1957 | return false; | ||||||||||||||||
1958 | } | ||||||||||||||||
1959 | }; | ||||||||||||||||
1960 | |||||||||||||||||
1961 | /// Match an argument. | ||||||||||||||||
1962 | template <unsigned OpI, typename Opnd_t> | ||||||||||||||||
1963 | inline Argument_match<Opnd_t> m_Argument(const Opnd_t &Op) { | ||||||||||||||||
1964 | return Argument_match<Opnd_t>(OpI, Op); | ||||||||||||||||
1965 | } | ||||||||||||||||
1966 | |||||||||||||||||
1967 | /// Intrinsic matchers. | ||||||||||||||||
1968 | struct IntrinsicID_match { | ||||||||||||||||
1969 | unsigned ID; | ||||||||||||||||
1970 | |||||||||||||||||
1971 | IntrinsicID_match(Intrinsic::ID IntrID) : ID(IntrID) {} | ||||||||||||||||
1972 | |||||||||||||||||
1973 | template <typename OpTy> bool match(OpTy *V) { | ||||||||||||||||
1974 | if (const auto *CI
| ||||||||||||||||
1975 | if (const auto *F = CI->getCalledFunction()) | ||||||||||||||||
1976 | return F->getIntrinsicID() == ID; | ||||||||||||||||
1977 | return false; | ||||||||||||||||
1978 | } | ||||||||||||||||
1979 | }; | ||||||||||||||||
1980 | |||||||||||||||||
1981 | /// Intrinsic matches are combinations of ID matchers, and argument | ||||||||||||||||
1982 | /// matchers. Higher arity matcher are defined recursively in terms of and-ing | ||||||||||||||||
1983 | /// them with lower arity matchers. Here's some convenient typedefs for up to | ||||||||||||||||
1984 | /// several arguments, and more can be added as needed | ||||||||||||||||
1985 | template <typename T0 = void, typename T1 = void, typename T2 = void, | ||||||||||||||||
1986 | typename T3 = void, typename T4 = void, typename T5 = void, | ||||||||||||||||
1987 | typename T6 = void, typename T7 = void, typename T8 = void, | ||||||||||||||||
1988 | typename T9 = void, typename T10 = void> | ||||||||||||||||
1989 | struct m_Intrinsic_Ty; | ||||||||||||||||
1990 | template <typename T0> struct m_Intrinsic_Ty<T0> { | ||||||||||||||||
1991 | using Ty = match_combine_and<IntrinsicID_match, Argument_match<T0>>; | ||||||||||||||||
1992 | }; | ||||||||||||||||
1993 | template <typename T0, typename T1> struct m_Intrinsic_Ty<T0, T1> { | ||||||||||||||||
1994 | using Ty = | ||||||||||||||||
1995 | match_combine_and<typename m_Intrinsic_Ty<T0>::Ty, Argument_match<T1>>; | ||||||||||||||||
1996 | }; | ||||||||||||||||
1997 | template <typename T0, typename T1, typename T2> | ||||||||||||||||
1998 | struct m_Intrinsic_Ty<T0, T1, T2> { | ||||||||||||||||
1999 | using Ty = | ||||||||||||||||
2000 | match_combine_and<typename m_Intrinsic_Ty<T0, T1>::Ty, | ||||||||||||||||
2001 | Argument_match<T2>>; | ||||||||||||||||
2002 | }; | ||||||||||||||||
2003 | template <typename T0, typename T1, typename T2, typename T3> | ||||||||||||||||
2004 | struct m_Intrinsic_Ty<T0, T1, T2, T3> { | ||||||||||||||||
2005 | using Ty = | ||||||||||||||||
2006 | match_combine_and<typename m_Intrinsic_Ty<T0, T1, T2>::Ty, | ||||||||||||||||
2007 | Argument_match<T3>>; | ||||||||||||||||
2008 | }; | ||||||||||||||||
2009 | |||||||||||||||||
2010 | template <typename T0, typename T1, typename T2, typename T3, typename T4> | ||||||||||||||||
2011 | struct m_Intrinsic_Ty<T0, T1, T2, T3, T4> { | ||||||||||||||||
2012 | using Ty = match_combine_and<typename m_Intrinsic_Ty<T0, T1, T2, T3>::Ty, | ||||||||||||||||
2013 | Argument_match<T4>>; | ||||||||||||||||
2014 | }; | ||||||||||||||||
2015 | |||||||||||||||||
2016 | template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5> | ||||||||||||||||
2017 | struct m_Intrinsic_Ty<T0, T1, T2, T3, T4, T5> { | ||||||||||||||||
2018 | using Ty = match_combine_and<typename m_Intrinsic_Ty<T0, T1, T2, T3, T4>::Ty, | ||||||||||||||||
2019 | Argument_match<T5>>; | ||||||||||||||||
2020 | }; | ||||||||||||||||
2021 | |||||||||||||||||
2022 | /// Match intrinsic calls like this: | ||||||||||||||||
2023 | /// m_Intrinsic<Intrinsic::fabs>(m_Value(X)) | ||||||||||||||||
2024 | template <Intrinsic::ID IntrID> inline IntrinsicID_match m_Intrinsic() { | ||||||||||||||||
2025 | return IntrinsicID_match(IntrID); | ||||||||||||||||
2026 | } | ||||||||||||||||
2027 | |||||||||||||||||
2028 | template <Intrinsic::ID IntrID, typename T0> | ||||||||||||||||
2029 | inline typename m_Intrinsic_Ty<T0>::Ty m_Intrinsic(const T0 &Op0) { | ||||||||||||||||
2030 | return m_CombineAnd(m_Intrinsic<IntrID>(), m_Argument<0>(Op0)); | ||||||||||||||||
2031 | } | ||||||||||||||||
2032 | |||||||||||||||||
2033 | template <Intrinsic::ID IntrID, typename T0, typename T1> | ||||||||||||||||
2034 | inline typename m_Intrinsic_Ty<T0, T1>::Ty m_Intrinsic(const T0 &Op0, | ||||||||||||||||
2035 | const T1 &Op1) { | ||||||||||||||||
2036 | return m_CombineAnd(m_Intrinsic<IntrID>(Op0), m_Argument<1>(Op1)); | ||||||||||||||||
2037 | } | ||||||||||||||||
2038 | |||||||||||||||||
2039 | template <Intrinsic::ID IntrID, typename T0, typename T1, typename T2> | ||||||||||||||||
2040 | inline typename m_Intrinsic_Ty<T0, T1, T2>::Ty | ||||||||||||||||
2041 | m_Intrinsic(const T0 &Op0, const T1 &Op1, const T2 &Op2) { | ||||||||||||||||
2042 | return m_CombineAnd(m_Intrinsic<IntrID>(Op0, Op1), m_Argument<2>(Op2)); | ||||||||||||||||
2043 | } | ||||||||||||||||
2044 | |||||||||||||||||
2045 | template <Intrinsic::ID IntrID, typename T0, typename T1, typename T2, | ||||||||||||||||
2046 | typename T3> | ||||||||||||||||
2047 | inline typename m_Intrinsic_Ty<T0, T1, T2, T3>::Ty | ||||||||||||||||
2048 | m_Intrinsic(const T0 &Op0, const T1 &Op1, const T2 &Op2, const T3 &Op3) { | ||||||||||||||||
2049 | return m_CombineAnd(m_Intrinsic<IntrID>(Op0, Op1, Op2), m_Argument<3>(Op3)); | ||||||||||||||||
2050 | } | ||||||||||||||||
2051 | |||||||||||||||||
2052 | template <Intrinsic::ID IntrID, typename T0, typename T1, typename T2, | ||||||||||||||||
2053 | typename T3, typename T4> | ||||||||||||||||
2054 | inline typename m_Intrinsic_Ty<T0, T1, T2, T3, T4>::Ty | ||||||||||||||||
2055 | m_Intrinsic(const T0 &Op0, const T1 &Op1, const T2 &Op2, const T3 &Op3, | ||||||||||||||||
2056 | const T4 &Op4) { | ||||||||||||||||
2057 | return m_CombineAnd(m_Intrinsic<IntrID>(Op0, Op1, Op2, Op3), | ||||||||||||||||
2058 | m_Argument<4>(Op4)); | ||||||||||||||||
2059 | } | ||||||||||||||||
2060 | |||||||||||||||||
2061 | template <Intrinsic::ID IntrID, typename T0, typename T1, typename T2, | ||||||||||||||||
2062 | typename T3, typename T4, typename T5> | ||||||||||||||||
2063 | inline typename m_Intrinsic_Ty<T0, T1, T2, T3, T4, T5>::Ty | ||||||||||||||||
2064 | m_Intrinsic(const T0 &Op0, const T1 &Op1, const T2 &Op2, const T3 &Op3, | ||||||||||||||||
2065 | const T4 &Op4, const T5 &Op5) { | ||||||||||||||||
2066 | return m_CombineAnd(m_Intrinsic<IntrID>(Op0, Op1, Op2, Op3, Op4), | ||||||||||||||||
2067 | m_Argument<5>(Op5)); | ||||||||||||||||
2068 | } | ||||||||||||||||
2069 | |||||||||||||||||
2070 | // Helper intrinsic matching specializations. | ||||||||||||||||
2071 | template <typename Opnd0> | ||||||||||||||||
2072 | inline typename m_Intrinsic_Ty<Opnd0>::Ty m_BitReverse(const Opnd0 &Op0) { | ||||||||||||||||
2073 | return m_Intrinsic<Intrinsic::bitreverse>(Op0); | ||||||||||||||||
2074 | } | ||||||||||||||||
2075 | |||||||||||||||||
2076 | template <typename Opnd0> | ||||||||||||||||
2077 | inline typename m_Intrinsic_Ty<Opnd0>::Ty m_BSwap(const Opnd0 &Op0) { | ||||||||||||||||
2078 | return m_Intrinsic<Intrinsic::bswap>(Op0); | ||||||||||||||||
2079 | } | ||||||||||||||||
2080 | |||||||||||||||||
2081 | template <typename Opnd0> | ||||||||||||||||
2082 | inline typename m_Intrinsic_Ty<Opnd0>::Ty m_FAbs(const Opnd0 &Op0) { | ||||||||||||||||
2083 | return m_Intrinsic<Intrinsic::fabs>(Op0); | ||||||||||||||||
2084 | } | ||||||||||||||||
2085 | |||||||||||||||||
2086 | template <typename Opnd0> | ||||||||||||||||
2087 | inline typename m_Intrinsic_Ty<Opnd0>::Ty m_FCanonicalize(const Opnd0 &Op0) { | ||||||||||||||||
2088 | return m_Intrinsic<Intrinsic::canonicalize>(Op0); | ||||||||||||||||
2089 | } | ||||||||||||||||
2090 | |||||||||||||||||
2091 | template <typename Opnd0, typename Opnd1> | ||||||||||||||||
2092 | inline typename m_Intrinsic_Ty<Opnd0, Opnd1>::Ty m_FMin(const Opnd0 &Op0, | ||||||||||||||||
2093 | const Opnd1 &Op1) { | ||||||||||||||||
2094 | return m_Intrinsic<Intrinsic::minnum>(Op0, Op1); | ||||||||||||||||
2095 | } | ||||||||||||||||
2096 | |||||||||||||||||
2097 | template <typename Opnd0, typename Opnd1> | ||||||||||||||||
2098 | inline typename m_Intrinsic_Ty<Opnd0, Opnd1>::Ty m_FMax(const Opnd0 &Op0, | ||||||||||||||||
2099 | const Opnd1 &Op1) { | ||||||||||||||||
2100 | return m_Intrinsic<Intrinsic::maxnum>(Op0, Op1); | ||||||||||||||||
2101 | } | ||||||||||||||||
2102 | |||||||||||||||||
2103 | template <typename Opnd0, typename Opnd1, typename Opnd2> | ||||||||||||||||
2104 | inline typename m_Intrinsic_Ty<Opnd0, Opnd1, Opnd2>::Ty | ||||||||||||||||
2105 | m_FShl(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2) { | ||||||||||||||||
2106 | return m_Intrinsic<Intrinsic::fshl>(Op0, Op1, Op2); | ||||||||||||||||
2107 | } | ||||||||||||||||
2108 | |||||||||||||||||
2109 | template <typename Opnd0, typename Opnd1, typename Opnd2> | ||||||||||||||||
2110 | inline typename m_Intrinsic_Ty<Opnd0, Opnd1, Opnd2>::Ty | ||||||||||||||||
2111 | m_FShr(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2) { | ||||||||||||||||
2112 | return m_Intrinsic<Intrinsic::fshr>(Op0, Op1, Op2); | ||||||||||||||||
2113 | } | ||||||||||||||||
2114 | |||||||||||||||||
2115 | //===----------------------------------------------------------------------===// | ||||||||||||||||
2116 | // Matchers for two-operands operators with the operators in either order | ||||||||||||||||
2117 | // | ||||||||||||||||
2118 | |||||||||||||||||
2119 | /// Matches a BinaryOperator with LHS and RHS in either order. | ||||||||||||||||
2120 | template <typename LHS, typename RHS> | ||||||||||||||||
2121 | inline AnyBinaryOp_match<LHS, RHS, true> m_c_BinOp(const LHS &L, const RHS &R) { | ||||||||||||||||
2122 | return AnyBinaryOp_match<LHS, RHS, true>(L, R); | ||||||||||||||||
2123 | } | ||||||||||||||||
2124 | |||||||||||||||||
2125 | /// Matches an ICmp with a predicate over LHS and RHS in either order. | ||||||||||||||||
2126 | /// Swaps the predicate if operands are commuted. | ||||||||||||||||
2127 | template <typename LHS, typename RHS> | ||||||||||||||||
2128 | inline CmpClass_match<LHS, RHS, ICmpInst, ICmpInst::Predicate, true> | ||||||||||||||||
2129 | m_c_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R) { | ||||||||||||||||
2130 | return CmpClass_match<LHS, RHS, ICmpInst, ICmpInst::Predicate, true>(Pred, L, | ||||||||||||||||
2131 | R); | ||||||||||||||||
2132 | } | ||||||||||||||||
2133 | |||||||||||||||||
2134 | /// Matches a Add with LHS and RHS in either order. | ||||||||||||||||
2135 | template <typename LHS, typename RHS> | ||||||||||||||||
2136 | inline BinaryOp_match<LHS, RHS, Instruction::Add, true> m_c_Add(const LHS &L, | ||||||||||||||||
2137 | const RHS &R) { | ||||||||||||||||
2138 | return BinaryOp_match<LHS, RHS, Instruction::Add, true>(L, R); | ||||||||||||||||
2139 | } | ||||||||||||||||
2140 | |||||||||||||||||
2141 | /// Matches a Mul with LHS and RHS in either order. | ||||||||||||||||
2142 | template <typename LHS, typename RHS> | ||||||||||||||||
2143 | inline BinaryOp_match<LHS, RHS, Instruction::Mul, true> m_c_Mul(const LHS &L, | ||||||||||||||||
2144 | const RHS &R) { | ||||||||||||||||
2145 | return BinaryOp_match<LHS, RHS, Instruction::Mul, true>(L, R); | ||||||||||||||||
2146 | } | ||||||||||||||||
2147 | |||||||||||||||||
2148 | /// Matches an And with LHS and RHS in either order. | ||||||||||||||||
2149 | template <typename LHS, typename RHS> | ||||||||||||||||
2150 | inline BinaryOp_match<LHS, RHS, Instruction::And, true> m_c_And(const LHS &L, | ||||||||||||||||
2151 | const RHS &R) { | ||||||||||||||||
2152 | return BinaryOp_match<LHS, RHS, Instruction::And, true>(L, R); | ||||||||||||||||
2153 | } | ||||||||||||||||
2154 | |||||||||||||||||
2155 | /// Matches an Or with LHS and RHS in either order. | ||||||||||||||||
2156 | template <typename LHS, typename RHS> | ||||||||||||||||
2157 | inline BinaryOp_match<LHS, RHS, Instruction::Or, true> m_c_Or(const LHS &L, | ||||||||||||||||
2158 | const RHS &R) { | ||||||||||||||||
2159 | return BinaryOp_match<LHS, RHS, Instruction::Or, true>(L, R); | ||||||||||||||||
2160 | } | ||||||||||||||||
2161 | |||||||||||||||||
2162 | /// Matches an Xor with LHS and RHS in either order. | ||||||||||||||||
2163 | template <typename LHS, typename RHS> | ||||||||||||||||
2164 | inline BinaryOp_match<LHS, RHS, Instruction::Xor, true> m_c_Xor(const LHS &L, | ||||||||||||||||
2165 | const RHS &R) { | ||||||||||||||||
2166 | return BinaryOp_match<LHS, RHS, Instruction::Xor, true>(L, R); | ||||||||||||||||
2167 | } | ||||||||||||||||
2168 | |||||||||||||||||
2169 | /// Matches a 'Neg' as 'sub 0, V'. | ||||||||||||||||
2170 | template <typename ValTy> | ||||||||||||||||
2171 | inline BinaryOp_match<cst_pred_ty<is_zero_int>, ValTy, Instruction::Sub> | ||||||||||||||||
2172 | m_Neg(const ValTy &V) { | ||||||||||||||||
2173 | return m_Sub(m_ZeroInt(), V); | ||||||||||||||||
2174 | } | ||||||||||||||||
2175 | |||||||||||||||||
2176 | /// Matches a 'Neg' as 'sub nsw 0, V'. | ||||||||||||||||
2177 | template <typename ValTy> | ||||||||||||||||
2178 | inline OverflowingBinaryOp_match<cst_pred_ty<is_zero_int>, ValTy, | ||||||||||||||||
2179 | Instruction::Sub, | ||||||||||||||||
2180 | OverflowingBinaryOperator::NoSignedWrap> | ||||||||||||||||
2181 | m_NSWNeg(const ValTy &V) { | ||||||||||||||||
2182 | return m_NSWSub(m_ZeroInt(), V); | ||||||||||||||||
2183 | } | ||||||||||||||||
2184 | |||||||||||||||||
2185 | /// Matches a 'Not' as 'xor V, -1' or 'xor -1, V'. | ||||||||||||||||
2186 | template <typename ValTy> | ||||||||||||||||
2187 | inline BinaryOp_match<ValTy, cst_pred_ty<is_all_ones>, Instruction::Xor, true> | ||||||||||||||||
2188 | m_Not(const ValTy &V) { | ||||||||||||||||
2189 | return m_c_Xor(V, m_AllOnes()); | ||||||||||||||||
2190 | } | ||||||||||||||||
2191 | |||||||||||||||||
2192 | /// Matches an SMin with LHS and RHS in either order. | ||||||||||||||||
2193 | template <typename LHS, typename RHS> | ||||||||||||||||
2194 | inline MaxMin_match<ICmpInst, LHS, RHS, smin_pred_ty, true> | ||||||||||||||||
2195 | m_c_SMin(const LHS &L, const RHS &R) { | ||||||||||||||||
2196 | return MaxMin_match<ICmpInst, LHS, RHS, smin_pred_ty, true>(L, R); | ||||||||||||||||
2197 | } | ||||||||||||||||
2198 | /// Matches an SMax with LHS and RHS in either order. | ||||||||||||||||
2199 | template <typename LHS, typename RHS> | ||||||||||||||||
2200 | inline MaxMin_match<ICmpInst, LHS, RHS, smax_pred_ty, true> | ||||||||||||||||
2201 | m_c_SMax(const LHS &L, const RHS &R) { | ||||||||||||||||
2202 | return MaxMin_match<ICmpInst, LHS, RHS, smax_pred_ty, true>(L, R); | ||||||||||||||||
2203 | } | ||||||||||||||||
2204 | /// Matches a UMin with LHS and RHS in either order. | ||||||||||||||||
2205 | template <typename LHS, typename RHS> | ||||||||||||||||
2206 | inline MaxMin_match<ICmpInst, LHS, RHS, umin_pred_ty, true> | ||||||||||||||||
2207 | m_c_UMin(const LHS &L, const RHS &R) { | ||||||||||||||||
2208 | return MaxMin_match<ICmpInst, LHS, RHS, umin_pred_ty, true>(L, R); | ||||||||||||||||
2209 | } | ||||||||||||||||
2210 | /// Matches a UMax with LHS and RHS in either order. | ||||||||||||||||
2211 | template <typename LHS, typename RHS> | ||||||||||||||||
2212 | inline MaxMin_match<ICmpInst, LHS, RHS, umax_pred_ty, true> | ||||||||||||||||
2213 | m_c_UMax(const LHS &L, const RHS &R) { | ||||||||||||||||
2214 | return MaxMin_match<ICmpInst, LHS, RHS, umax_pred_ty, true>(L, R); | ||||||||||||||||
2215 | } | ||||||||||||||||
2216 | |||||||||||||||||
2217 | template <typename LHS, typename RHS> | ||||||||||||||||
2218 | inline match_combine_or< | ||||||||||||||||
2219 | match_combine_or<MaxMin_match<ICmpInst, LHS, RHS, smax_pred_ty, true>, | ||||||||||||||||
2220 | MaxMin_match<ICmpInst, LHS, RHS, smin_pred_ty, true>>, | ||||||||||||||||
2221 | match_combine_or<MaxMin_match<ICmpInst, LHS, RHS, umax_pred_ty, true>, | ||||||||||||||||
2222 | MaxMin_match<ICmpInst, LHS, RHS, umin_pred_ty, true>>> | ||||||||||||||||
2223 | m_c_MaxOrMin(const LHS &L, const RHS &R) { | ||||||||||||||||
2224 | return m_CombineOr(m_CombineOr(m_c_SMax(L, R), m_c_SMin(L, R)), | ||||||||||||||||
2225 | m_CombineOr(m_c_UMax(L, R), m_c_UMin(L, R))); | ||||||||||||||||
2226 | } | ||||||||||||||||
2227 | |||||||||||||||||
2228 | /// Matches FAdd with LHS and RHS in either order. | ||||||||||||||||
2229 | template <typename LHS, typename RHS> | ||||||||||||||||
2230 | inline BinaryOp_match<LHS, RHS, Instruction::FAdd, true> | ||||||||||||||||
2231 | m_c_FAdd(const LHS &L, const RHS &R) { | ||||||||||||||||
2232 | return BinaryOp_match<LHS, RHS, Instruction::FAdd, true>(L, R); | ||||||||||||||||
2233 | } | ||||||||||||||||
2234 | |||||||||||||||||
2235 | /// Matches FMul with LHS and RHS in either order. | ||||||||||||||||
2236 | template <typename LHS, typename RHS> | ||||||||||||||||
2237 | inline BinaryOp_match<LHS, RHS, Instruction::FMul, true> | ||||||||||||||||
2238 | m_c_FMul(const LHS &L, const RHS &R) { | ||||||||||||||||
2239 | return BinaryOp_match<LHS, RHS, Instruction::FMul, true>(L, R); | ||||||||||||||||
2240 | } | ||||||||||||||||
2241 | |||||||||||||||||
2242 | template <typename Opnd_t> struct Signum_match { | ||||||||||||||||
2243 | Opnd_t Val; | ||||||||||||||||
2244 | Signum_match(const Opnd_t &V) : Val(V) {} | ||||||||||||||||
2245 | |||||||||||||||||
2246 | template <typename OpTy> bool match(OpTy *V) { | ||||||||||||||||
2247 | unsigned TypeSize = V->getType()->getScalarSizeInBits(); | ||||||||||||||||
2248 | if (TypeSize == 0) | ||||||||||||||||
2249 | return false; | ||||||||||||||||
2250 | |||||||||||||||||
2251 | unsigned ShiftWidth = TypeSize - 1; | ||||||||||||||||
2252 | Value *OpL = nullptr, *OpR = nullptr; | ||||||||||||||||
2253 | |||||||||||||||||
2254 | // This is the representation of signum we match: | ||||||||||||||||
2255 | // | ||||||||||||||||
2256 | // signum(x) == (x >> 63) | (-x >>u 63) | ||||||||||||||||
2257 | // | ||||||||||||||||
2258 | // An i1 value is its own signum, so it's correct to match | ||||||||||||||||
2259 | // | ||||||||||||||||
2260 | // signum(x) == (x >> 0) | (-x >>u 0) | ||||||||||||||||
2261 | // | ||||||||||||||||
2262 | // for i1 values. | ||||||||||||||||
2263 | |||||||||||||||||
2264 | auto LHS = m_AShr(m_Value(OpL), m_SpecificInt(ShiftWidth)); | ||||||||||||||||
2265 | auto RHS = m_LShr(m_Neg(m_Value(OpR)), m_SpecificInt(ShiftWidth)); | ||||||||||||||||
2266 | auto Signum = m_Or(LHS, RHS); | ||||||||||||||||
2267 | |||||||||||||||||
2268 | return Signum.match(V) && OpL == OpR && Val.match(OpL); | ||||||||||||||||
2269 | } | ||||||||||||||||
2270 | }; | ||||||||||||||||
2271 | |||||||||||||||||
2272 | /// Matches a signum pattern. | ||||||||||||||||
2273 | /// | ||||||||||||||||
2274 | /// signum(x) = | ||||||||||||||||
2275 | /// x > 0 -> 1 | ||||||||||||||||
2276 | /// x == 0 -> 0 | ||||||||||||||||
2277 | /// x < 0 -> -1 | ||||||||||||||||
2278 | template <typename Val_t> inline Signum_match<Val_t> m_Signum(const Val_t &V) { | ||||||||||||||||
2279 | return Signum_match<Val_t>(V); | ||||||||||||||||
2280 | } | ||||||||||||||||
2281 | |||||||||||||||||
2282 | template <int Ind, typename Opnd_t> struct ExtractValue_match { | ||||||||||||||||
2283 | Opnd_t Val; | ||||||||||||||||
2284 | ExtractValue_match(const Opnd_t &V) : Val(V) {} | ||||||||||||||||
2285 | |||||||||||||||||
2286 | template <typename OpTy> bool match(OpTy *V) { | ||||||||||||||||
2287 | if (auto *I = dyn_cast<ExtractValueInst>(V)) | ||||||||||||||||
2288 | return I->getNumIndices() == 1 && I->getIndices()[0] == Ind && | ||||||||||||||||
2289 | Val.match(I->getAggregateOperand()); | ||||||||||||||||
2290 | return false; | ||||||||||||||||
2291 | } | ||||||||||||||||
2292 | }; | ||||||||||||||||
2293 | |||||||||||||||||
2294 | /// Match a single index ExtractValue instruction. | ||||||||||||||||
2295 | /// For example m_ExtractValue<1>(...) | ||||||||||||||||
2296 | template <int Ind, typename Val_t> | ||||||||||||||||
2297 | inline ExtractValue_match<Ind, Val_t> m_ExtractValue(const Val_t &V) { | ||||||||||||||||
2298 | return ExtractValue_match<Ind, Val_t>(V); | ||||||||||||||||
2299 | } | ||||||||||||||||
2300 | |||||||||||||||||
2301 | /// Matches patterns for `vscale`. This can either be a call to `llvm.vscale` or | ||||||||||||||||
2302 | /// the constant expression | ||||||||||||||||
2303 | /// `ptrtoint(gep <vscale x 1 x i8>, <vscale x 1 x i8>* null, i32 1>` | ||||||||||||||||
2304 | /// under the right conditions determined by DataLayout. | ||||||||||||||||
2305 | struct VScaleVal_match { | ||||||||||||||||
2306 | private: | ||||||||||||||||
2307 | template <typename Base, typename Offset> | ||||||||||||||||
2308 | inline BinaryOp_match<Base, Offset, Instruction::GetElementPtr> | ||||||||||||||||
2309 | m_OffsetGep(const Base &B, const Offset &O) { | ||||||||||||||||
2310 | return BinaryOp_match<Base, Offset, Instruction::GetElementPtr>(B, O); | ||||||||||||||||
2311 | } | ||||||||||||||||
2312 | |||||||||||||||||
2313 | public: | ||||||||||||||||
2314 | const DataLayout &DL; | ||||||||||||||||
2315 | VScaleVal_match(const DataLayout &DL) : DL(DL) {} | ||||||||||||||||
2316 | |||||||||||||||||
2317 | template <typename ITy> bool match(ITy *V) { | ||||||||||||||||
2318 | if (m_Intrinsic<Intrinsic::vscale>().match(V)) | ||||||||||||||||
2319 | return true; | ||||||||||||||||
2320 | |||||||||||||||||
2321 | if (m_PtrToInt(m_OffsetGep(m_Zero(), m_SpecificInt(1))).match(V)) { | ||||||||||||||||
2322 | Type *PtrTy = cast<Operator>(V)->getOperand(0)->getType(); | ||||||||||||||||
2323 | auto *DerefTy = PtrTy->getPointerElementType(); | ||||||||||||||||
2324 | if (isa<ScalableVectorType>(DerefTy) && | ||||||||||||||||
2325 | DL.getTypeAllocSizeInBits(DerefTy).getKnownMinSize() == 8) | ||||||||||||||||
2326 | return true; | ||||||||||||||||
2327 | } | ||||||||||||||||
2328 | |||||||||||||||||
2329 | return false; | ||||||||||||||||
2330 | } | ||||||||||||||||
2331 | }; | ||||||||||||||||
2332 | |||||||||||||||||
2333 | inline VScaleVal_match m_VScale(const DataLayout &DL) { | ||||||||||||||||
2334 | return VScaleVal_match(DL); | ||||||||||||||||
2335 | } | ||||||||||||||||
2336 | |||||||||||||||||
2337 | } // end namespace PatternMatch | ||||||||||||||||
2338 | } // end namespace llvm | ||||||||||||||||
2339 | |||||||||||||||||
2340 | #endif // LLVM_IR_PATTERNMATCH_H |
1 | //===- llvm/Analysis/AliasAnalysis.h - Alias Analysis Interface -*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines the generic AliasAnalysis interface, which is used as the |
10 | // common interface used by all clients of alias analysis information, and |
11 | // implemented by all alias analysis implementations. Mod/Ref information is |
12 | // also captured by this interface. |
13 | // |
14 | // Implementations of this interface must implement the various virtual methods, |
15 | // which automatically provides functionality for the entire suite of client |
16 | // APIs. |
17 | // |
18 | // This API identifies memory regions with the MemoryLocation class. The pointer |
19 | // component specifies the base memory address of the region. The Size specifies |
20 | // the maximum size (in address units) of the memory region, or |
21 | // MemoryLocation::UnknownSize if the size is not known. The TBAA tag |
22 | // identifies the "type" of the memory reference; see the |
23 | // TypeBasedAliasAnalysis class for details. |
24 | // |
25 | // Some non-obvious details include: |
26 | // - Pointers that point to two completely different objects in memory never |
27 | // alias, regardless of the value of the Size component. |
28 | // - NoAlias doesn't imply inequal pointers. The most obvious example of this |
29 | // is two pointers to constant memory. Even if they are equal, constant |
30 | // memory is never stored to, so there will never be any dependencies. |
31 | // In this and other situations, the pointers may be both NoAlias and |
32 | // MustAlias at the same time. The current API can only return one result, |
33 | // though this is rarely a problem in practice. |
34 | // |
35 | //===----------------------------------------------------------------------===// |
36 | |
37 | #ifndef LLVM_ANALYSIS_ALIASANALYSIS_H |
38 | #define LLVM_ANALYSIS_ALIASANALYSIS_H |
39 | |
40 | #include "llvm/ADT/DenseMap.h" |
41 | #include "llvm/ADT/None.h" |
42 | #include "llvm/ADT/Optional.h" |
43 | #include "llvm/ADT/SmallVector.h" |
44 | #include "llvm/Analysis/MemoryLocation.h" |
45 | #include "llvm/Analysis/TargetLibraryInfo.h" |
46 | #include "llvm/IR/Function.h" |
47 | #include "llvm/IR/Instruction.h" |
48 | #include "llvm/IR/Instructions.h" |
49 | #include "llvm/IR/PassManager.h" |
50 | #include "llvm/Pass.h" |
51 | #include <cstdint> |
52 | #include <functional> |
53 | #include <memory> |
54 | #include <vector> |
55 | |
56 | namespace llvm { |
57 | |
58 | class AnalysisUsage; |
59 | class BasicAAResult; |
60 | class BasicBlock; |
61 | class DominatorTree; |
62 | class Value; |
63 | |
64 | /// The possible results of an alias query. |
65 | /// |
66 | /// These results are always computed between two MemoryLocation objects as |
67 | /// a query to some alias analysis. |
68 | /// |
69 | /// Note that these are unscoped enumerations because we would like to support |
70 | /// implicitly testing a result for the existence of any possible aliasing with |
71 | /// a conversion to bool, but an "enum class" doesn't support this. The |
72 | /// canonical names from the literature are suffixed and unique anyways, and so |
73 | /// they serve as global constants in LLVM for these results. |
74 | /// |
75 | /// See docs/AliasAnalysis.html for more information on the specific meanings |
76 | /// of these values. |
77 | enum AliasResult : uint8_t { |
78 | /// The two locations do not alias at all. |
79 | /// |
80 | /// This value is arranged to convert to false, while all other values |
81 | /// convert to true. This allows a boolean context to convert the result to |
82 | /// a binary flag indicating whether there is the possibility of aliasing. |
83 | NoAlias = 0, |
84 | /// The two locations may or may not alias. This is the least precise result. |
85 | MayAlias, |
86 | /// The two locations alias, but only due to a partial overlap. |
87 | PartialAlias, |
88 | /// The two locations precisely alias each other. |
89 | MustAlias, |
90 | }; |
91 | |
92 | /// << operator for AliasResult. |
93 | raw_ostream &operator<<(raw_ostream &OS, AliasResult AR); |
94 | |
95 | /// Flags indicating whether a memory access modifies or references memory. |
96 | /// |
97 | /// This is no access at all, a modification, a reference, or both |
98 | /// a modification and a reference. These are specifically structured such that |
99 | /// they form a three bit matrix and bit-tests for 'mod' or 'ref' or 'must' |
100 | /// work with any of the possible values. |
101 | enum class ModRefInfo : uint8_t { |
102 | /// Must is provided for completeness, but no routines will return only |
103 | /// Must today. See definition of Must below. |
104 | Must = 0, |
105 | /// The access may reference the value stored in memory, |
106 | /// a mustAlias relation was found, and no mayAlias or partialAlias found. |
107 | MustRef = 1, |
108 | /// The access may modify the value stored in memory, |
109 | /// a mustAlias relation was found, and no mayAlias or partialAlias found. |
110 | MustMod = 2, |
111 | /// The access may reference, modify or both the value stored in memory, |
112 | /// a mustAlias relation was found, and no mayAlias or partialAlias found. |
113 | MustModRef = MustRef | MustMod, |
114 | /// The access neither references nor modifies the value stored in memory. |
115 | NoModRef = 4, |
116 | /// The access may reference the value stored in memory. |
117 | Ref = NoModRef | MustRef, |
118 | /// The access may modify the value stored in memory. |
119 | Mod = NoModRef | MustMod, |
120 | /// The access may reference and may modify the value stored in memory. |
121 | ModRef = Ref | Mod, |
122 | |
123 | /// About Must: |
124 | /// Must is set in a best effort manner. |
125 | /// We usually do not try our best to infer Must, instead it is merely |
126 | /// another piece of "free" information that is presented when available. |
127 | /// Must set means there was certainly a MustAlias found. For calls, |
128 | /// where multiple arguments are checked (argmemonly), this translates to |
129 | /// only MustAlias or NoAlias was found. |
130 | /// Must is not set for RAR accesses, even if the two locations must |
131 | /// alias. The reason is that two read accesses translate to an early return |
132 | /// of NoModRef. An additional alias check to set Must may be |
133 | /// expensive. Other cases may also not set Must(e.g. callCapturesBefore). |
134 | /// We refer to Must being *set* when the most significant bit is *cleared*. |
135 | /// Conversely we *clear* Must information by *setting* the Must bit to 1. |
136 | }; |
137 | |
138 | LLVM_NODISCARD[[clang::warn_unused_result]] inline bool isNoModRef(const ModRefInfo MRI) { |
139 | return (static_cast<int>(MRI) & static_cast<int>(ModRefInfo::MustModRef)) == |
140 | static_cast<int>(ModRefInfo::Must); |
141 | } |
142 | LLVM_NODISCARD[[clang::warn_unused_result]] inline bool isModOrRefSet(const ModRefInfo MRI) { |
143 | return static_cast<int>(MRI) & static_cast<int>(ModRefInfo::MustModRef); |
144 | } |
145 | LLVM_NODISCARD[[clang::warn_unused_result]] inline bool isModAndRefSet(const ModRefInfo MRI) { |
146 | return (static_cast<int>(MRI) & static_cast<int>(ModRefInfo::MustModRef)) == |
147 | static_cast<int>(ModRefInfo::MustModRef); |
148 | } |
149 | LLVM_NODISCARD[[clang::warn_unused_result]] inline bool isModSet(const ModRefInfo MRI) { |
150 | return static_cast<int>(MRI) & static_cast<int>(ModRefInfo::MustMod); |
151 | } |
152 | LLVM_NODISCARD[[clang::warn_unused_result]] inline bool isRefSet(const ModRefInfo MRI) { |
153 | return static_cast<int>(MRI) & static_cast<int>(ModRefInfo::MustRef); |
154 | } |
155 | LLVM_NODISCARD[[clang::warn_unused_result]] inline bool isMustSet(const ModRefInfo MRI) { |
156 | return !(static_cast<int>(MRI) & static_cast<int>(ModRefInfo::NoModRef)); |
157 | } |
158 | |
159 | LLVM_NODISCARD[[clang::warn_unused_result]] inline ModRefInfo setMod(const ModRefInfo MRI) { |
160 | return ModRefInfo(static_cast<int>(MRI) | |
161 | static_cast<int>(ModRefInfo::MustMod)); |
162 | } |
163 | LLVM_NODISCARD[[clang::warn_unused_result]] inline ModRefInfo setRef(const ModRefInfo MRI) { |
164 | return ModRefInfo(static_cast<int>(MRI) | |
165 | static_cast<int>(ModRefInfo::MustRef)); |
166 | } |
167 | LLVM_NODISCARD[[clang::warn_unused_result]] inline ModRefInfo setMust(const ModRefInfo MRI) { |
168 | return ModRefInfo(static_cast<int>(MRI) & |
169 | static_cast<int>(ModRefInfo::MustModRef)); |
170 | } |
171 | LLVM_NODISCARD[[clang::warn_unused_result]] inline ModRefInfo setModAndRef(const ModRefInfo MRI) { |
172 | return ModRefInfo(static_cast<int>(MRI) | |
173 | static_cast<int>(ModRefInfo::MustModRef)); |
174 | } |
175 | LLVM_NODISCARD[[clang::warn_unused_result]] inline ModRefInfo clearMod(const ModRefInfo MRI) { |
176 | return ModRefInfo(static_cast<int>(MRI) & static_cast<int>(ModRefInfo::Ref)); |
177 | } |
178 | LLVM_NODISCARD[[clang::warn_unused_result]] inline ModRefInfo clearRef(const ModRefInfo MRI) { |
179 | return ModRefInfo(static_cast<int>(MRI) & static_cast<int>(ModRefInfo::Mod)); |
180 | } |
181 | LLVM_NODISCARD[[clang::warn_unused_result]] inline ModRefInfo clearMust(const ModRefInfo MRI) { |
182 | return ModRefInfo(static_cast<int>(MRI) | |
183 | static_cast<int>(ModRefInfo::NoModRef)); |
184 | } |
185 | LLVM_NODISCARD[[clang::warn_unused_result]] inline ModRefInfo unionModRef(const ModRefInfo MRI1, |
186 | const ModRefInfo MRI2) { |
187 | return ModRefInfo(static_cast<int>(MRI1) | static_cast<int>(MRI2)); |
188 | } |
189 | LLVM_NODISCARD[[clang::warn_unused_result]] inline ModRefInfo intersectModRef(const ModRefInfo MRI1, |
190 | const ModRefInfo MRI2) { |
191 | return ModRefInfo(static_cast<int>(MRI1) & static_cast<int>(MRI2)); |
192 | } |
193 | |
194 | /// The locations at which a function might access memory. |
195 | /// |
196 | /// These are primarily used in conjunction with the \c AccessKind bits to |
197 | /// describe both the nature of access and the locations of access for a |
198 | /// function call. |
199 | enum FunctionModRefLocation { |
200 | /// Base case is no access to memory. |
201 | FMRL_Nowhere = 0, |
202 | /// Access to memory via argument pointers. |
203 | FMRL_ArgumentPointees = 8, |
204 | /// Memory that is inaccessible via LLVM IR. |
205 | FMRL_InaccessibleMem = 16, |
206 | /// Access to any memory. |
207 | FMRL_Anywhere = 32 | FMRL_InaccessibleMem | FMRL_ArgumentPointees |
208 | }; |
209 | |
210 | /// Summary of how a function affects memory in the program. |
211 | /// |
212 | /// Loads from constant globals are not considered memory accesses for this |
213 | /// interface. Also, functions may freely modify stack space local to their |
214 | /// invocation without having to report it through these interfaces. |
215 | enum FunctionModRefBehavior { |
216 | /// This function does not perform any non-local loads or stores to memory. |
217 | /// |
218 | /// This property corresponds to the GCC 'const' attribute. |
219 | /// This property corresponds to the LLVM IR 'readnone' attribute. |
220 | /// This property corresponds to the IntrNoMem LLVM intrinsic flag. |
221 | FMRB_DoesNotAccessMemory = |
222 | FMRL_Nowhere | static_cast<int>(ModRefInfo::NoModRef), |
223 | |
224 | /// The only memory references in this function (if it has any) are |
225 | /// non-volatile loads from objects pointed to by its pointer-typed |
226 | /// arguments, with arbitrary offsets. |
227 | /// |
228 | /// This property corresponds to the combination of the IntrReadMem |
229 | /// and IntrArgMemOnly LLVM intrinsic flags. |
230 | FMRB_OnlyReadsArgumentPointees = |
231 | FMRL_ArgumentPointees | static_cast<int>(ModRefInfo::Ref), |
232 | |
233 | /// The only memory references in this function (if it has any) are |
234 | /// non-volatile stores from objects pointed to by its pointer-typed |
235 | /// arguments, with arbitrary offsets. |
236 | /// |
237 | /// This property corresponds to the combination of the IntrWriteMem |
238 | /// and IntrArgMemOnly LLVM intrinsic flags. |
239 | FMRB_OnlyWritesArgumentPointees = |
240 | FMRL_ArgumentPointees | static_cast<int>(ModRefInfo::Mod), |
241 | |
242 | /// The only memory references in this function (if it has any) are |
243 | /// non-volatile loads and stores from objects pointed to by its |
244 | /// pointer-typed arguments, with arbitrary offsets. |
245 | /// |
246 | /// This property corresponds to the IntrArgMemOnly LLVM intrinsic flag. |
247 | FMRB_OnlyAccessesArgumentPointees = |
248 | FMRL_ArgumentPointees | static_cast<int>(ModRefInfo::ModRef), |
249 | |
250 | /// The only memory references in this function (if it has any) are |
251 | /// reads of memory that is otherwise inaccessible via LLVM IR. |
252 | /// |
253 | /// This property corresponds to the LLVM IR inaccessiblememonly attribute. |
254 | FMRB_OnlyReadsInaccessibleMem = |
255 | FMRL_InaccessibleMem | static_cast<int>(ModRefInfo::Ref), |
256 | |
257 | /// The only memory references in this function (if it has any) are |
258 | /// writes to memory that is otherwise inaccessible via LLVM IR. |
259 | /// |
260 | /// This property corresponds to the LLVM IR inaccessiblememonly attribute. |
261 | FMRB_OnlyWritesInaccessibleMem = |
262 | FMRL_InaccessibleMem | static_cast<int>(ModRefInfo::Mod), |
263 | |
264 | /// The only memory references in this function (if it has any) are |
265 | /// references of memory that is otherwise inaccessible via LLVM IR. |
266 | /// |
267 | /// This property corresponds to the LLVM IR inaccessiblememonly attribute. |
268 | FMRB_OnlyAccessesInaccessibleMem = |
269 | FMRL_InaccessibleMem | static_cast<int>(ModRefInfo::ModRef), |
270 | |
271 | /// The function may perform non-volatile loads from objects pointed |
272 | /// to by its pointer-typed arguments, with arbitrary offsets, and |
273 | /// it may also perform loads of memory that is otherwise |
274 | /// inaccessible via LLVM IR. |
275 | /// |
276 | /// This property corresponds to the LLVM IR |
277 | /// inaccessiblemem_or_argmemonly attribute. |
278 | FMRB_OnlyReadsInaccessibleOrArgMem = FMRL_InaccessibleMem | |
279 | FMRL_ArgumentPointees | |
280 | static_cast<int>(ModRefInfo::Ref), |
281 | |
282 | /// The function may perform non-volatile stores to objects pointed |
283 | /// to by its pointer-typed arguments, with arbitrary offsets, and |
284 | /// it may also perform stores of memory that is otherwise |
285 | /// inaccessible via LLVM IR. |
286 | /// |
287 | /// This property corresponds to the LLVM IR |
288 | /// inaccessiblemem_or_argmemonly attribute. |
289 | FMRB_OnlyWritesInaccessibleOrArgMem = FMRL_InaccessibleMem | |
290 | FMRL_ArgumentPointees | |
291 | static_cast<int>(ModRefInfo::Mod), |
292 | |
293 | /// The function may perform non-volatile loads and stores of objects |
294 | /// pointed to by its pointer-typed arguments, with arbitrary offsets, and |
295 | /// it may also perform loads and stores of memory that is otherwise |
296 | /// inaccessible via LLVM IR. |
297 | /// |
298 | /// This property corresponds to the LLVM IR |
299 | /// inaccessiblemem_or_argmemonly attribute. |
300 | FMRB_OnlyAccessesInaccessibleOrArgMem = FMRL_InaccessibleMem | |
301 | FMRL_ArgumentPointees | |
302 | static_cast<int>(ModRefInfo::ModRef), |
303 | |
304 | /// This function does not perform any non-local stores or volatile loads, |
305 | /// but may read from any memory location. |
306 | /// |
307 | /// This property corresponds to the GCC 'pure' attribute. |
308 | /// This property corresponds to the LLVM IR 'readonly' attribute. |
309 | /// This property corresponds to the IntrReadMem LLVM intrinsic flag. |
310 | FMRB_OnlyReadsMemory = FMRL_Anywhere | static_cast<int>(ModRefInfo::Ref), |
311 | |
312 | // This function does not read from memory anywhere, but may write to any |
313 | // memory location. |
314 | // |
315 | // This property corresponds to the LLVM IR 'writeonly' attribute. |
316 | // This property corresponds to the IntrWriteMem LLVM intrinsic flag. |
317 | FMRB_OnlyWritesMemory = FMRL_Anywhere | static_cast<int>(ModRefInfo::Mod), |
318 | |
319 | /// This indicates that the function could not be classified into one of the |
320 | /// behaviors above. |
321 | FMRB_UnknownModRefBehavior = |
322 | FMRL_Anywhere | static_cast<int>(ModRefInfo::ModRef) |
323 | }; |
324 | |
325 | // Wrapper method strips bits significant only in FunctionModRefBehavior, |
326 | // to obtain a valid ModRefInfo. The benefit of using the wrapper is that if |
327 | // ModRefInfo enum changes, the wrapper can be updated to & with the new enum |
328 | // entry with all bits set to 1. |
329 | LLVM_NODISCARD[[clang::warn_unused_result]] inline ModRefInfo |
330 | createModRefInfo(const FunctionModRefBehavior FMRB) { |
331 | return ModRefInfo(FMRB & static_cast<int>(ModRefInfo::ModRef)); |
332 | } |
333 | |
334 | /// This class stores info we want to provide to or retain within an alias |
335 | /// query. By default, the root query is stateless and starts with a freshly |
336 | /// constructed info object. Specific alias analyses can use this query info to |
337 | /// store per-query state that is important for recursive or nested queries to |
338 | /// avoid recomputing. To enable preserving this state across multiple queries |
339 | /// where safe (due to the IR not changing), use a `BatchAAResults` wrapper. |
340 | /// The information stored in an `AAQueryInfo` is currently limitted to the |
341 | /// caches used by BasicAA, but can further be extended to fit other AA needs. |
342 | class AAQueryInfo { |
343 | public: |
344 | using LocPair = std::pair<MemoryLocation, MemoryLocation>; |
345 | using AliasCacheT = SmallDenseMap<LocPair, AliasResult, 8>; |
346 | AliasCacheT AliasCache; |
347 | |
348 | using IsCapturedCacheT = SmallDenseMap<const Value *, bool, 8>; |
349 | IsCapturedCacheT IsCapturedCache; |
350 | |
351 | AAQueryInfo() : AliasCache(), IsCapturedCache() {} |
352 | |
353 | AliasResult updateResult(const LocPair &Locs, AliasResult Result) { |
354 | auto It = AliasCache.find(Locs); |
355 | assert(It != AliasCache.end() && "Entry must have existed")((It != AliasCache.end() && "Entry must have existed" ) ? static_cast<void> (0) : __assert_fail ("It != AliasCache.end() && \"Entry must have existed\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/include/llvm/Analysis/AliasAnalysis.h" , 355, __PRETTY_FUNCTION__)); |
356 | return It->second = Result; |
357 | } |
358 | }; |
359 | |
360 | class BatchAAResults; |
361 | |
362 | class AAResults { |
363 | public: |
364 | // Make these results default constructable and movable. We have to spell |
365 | // these out because MSVC won't synthesize them. |
366 | AAResults(const TargetLibraryInfo &TLI) : TLI(TLI) {} |
367 | AAResults(AAResults &&Arg); |
368 | ~AAResults(); |
369 | |
370 | /// Register a specific AA result. |
371 | template <typename AAResultT> void addAAResult(AAResultT &AAResult) { |
372 | // FIXME: We should use a much lighter weight system than the usual |
373 | // polymorphic pattern because we don't own AAResult. It should |
374 | // ideally involve two pointers and no separate allocation. |
375 | AAs.emplace_back(new Model<AAResultT>(AAResult, *this)); |
376 | } |
377 | |
378 | /// Register a function analysis ID that the results aggregation depends on. |
379 | /// |
380 | /// This is used in the new pass manager to implement the invalidation logic |
381 | /// where we must invalidate the results aggregation if any of our component |
382 | /// analyses become invalid. |
383 | void addAADependencyID(AnalysisKey *ID) { AADeps.push_back(ID); } |
384 | |
385 | /// Handle invalidation events in the new pass manager. |
386 | /// |
387 | /// The aggregation is invalidated if any of the underlying analyses is |
388 | /// invalidated. |
389 | bool invalidate(Function &F, const PreservedAnalyses &PA, |
390 | FunctionAnalysisManager::Invalidator &Inv); |
391 | |
392 | //===--------------------------------------------------------------------===// |
393 | /// \name Alias Queries |
394 | /// @{ |
395 | |
396 | /// The main low level interface to the alias analysis implementation. |
397 | /// Returns an AliasResult indicating whether the two pointers are aliased to |
398 | /// each other. This is the interface that must be implemented by specific |
399 | /// alias analysis implementations. |
400 | AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB); |
401 | |
402 | /// A convenience wrapper around the primary \c alias interface. |
403 | AliasResult alias(const Value *V1, LocationSize V1Size, const Value *V2, |
404 | LocationSize V2Size) { |
405 | return alias(MemoryLocation(V1, V1Size), MemoryLocation(V2, V2Size)); |
406 | } |
407 | |
408 | /// A convenience wrapper around the primary \c alias interface. |
409 | AliasResult alias(const Value *V1, const Value *V2) { |
410 | return alias(V1, LocationSize::unknown(), V2, LocationSize::unknown()); |
411 | } |
412 | |
413 | /// A trivial helper function to check to see if the specified pointers are |
414 | /// no-alias. |
415 | bool isNoAlias(const MemoryLocation &LocA, const MemoryLocation &LocB) { |
416 | return alias(LocA, LocB) == NoAlias; |
417 | } |
418 | |
419 | /// A convenience wrapper around the \c isNoAlias helper interface. |
420 | bool isNoAlias(const Value *V1, LocationSize V1Size, const Value *V2, |
421 | LocationSize V2Size) { |
422 | return isNoAlias(MemoryLocation(V1, V1Size), MemoryLocation(V2, V2Size)); |
423 | } |
424 | |
425 | /// A convenience wrapper around the \c isNoAlias helper interface. |
426 | bool isNoAlias(const Value *V1, const Value *V2) { |
427 | return isNoAlias(MemoryLocation(V1), MemoryLocation(V2)); |
428 | } |
429 | |
430 | /// A trivial helper function to check to see if the specified pointers are |
431 | /// must-alias. |
432 | bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB) { |
433 | return alias(LocA, LocB) == MustAlias; |
434 | } |
435 | |
436 | /// A convenience wrapper around the \c isMustAlias helper interface. |
437 | bool isMustAlias(const Value *V1, const Value *V2) { |
438 | return alias(V1, LocationSize::precise(1), V2, LocationSize::precise(1)) == |
439 | MustAlias; |
440 | } |
441 | |
442 | /// Checks whether the given location points to constant memory, or if |
443 | /// \p OrLocal is true whether it points to a local alloca. |
444 | bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal = false); |
445 | |
446 | /// A convenience wrapper around the primary \c pointsToConstantMemory |
447 | /// interface. |
448 | bool pointsToConstantMemory(const Value *P, bool OrLocal = false) { |
449 | return pointsToConstantMemory(MemoryLocation(P), OrLocal); |
450 | } |
451 | |
452 | /// @} |
453 | //===--------------------------------------------------------------------===// |
454 | /// \name Simple mod/ref information |
455 | /// @{ |
456 | |
457 | /// Get the ModRef info associated with a pointer argument of a call. The |
458 | /// result's bits are set to indicate the allowed aliasing ModRef kinds. Note |
459 | /// that these bits do not necessarily account for the overall behavior of |
460 | /// the function, but rather only provide additional per-argument |
461 | /// information. This never sets ModRefInfo::Must. |
462 | ModRefInfo getArgModRefInfo(const CallBase *Call, unsigned ArgIdx); |
463 | |
464 | /// Return the behavior of the given call site. |
465 | FunctionModRefBehavior getModRefBehavior(const CallBase *Call); |
466 | |
467 | /// Return the behavior when calling the given function. |
468 | FunctionModRefBehavior getModRefBehavior(const Function *F); |
469 | |
470 | /// Checks if the specified call is known to never read or write memory. |
471 | /// |
472 | /// Note that if the call only reads from known-constant memory, it is also |
473 | /// legal to return true. Also, calls that unwind the stack are legal for |
474 | /// this predicate. |
475 | /// |
476 | /// Many optimizations (such as CSE and LICM) can be performed on such calls |
477 | /// without worrying about aliasing properties, and many calls have this |
478 | /// property (e.g. calls to 'sin' and 'cos'). |
479 | /// |
480 | /// This property corresponds to the GCC 'const' attribute. |
481 | bool doesNotAccessMemory(const CallBase *Call) { |
482 | return getModRefBehavior(Call) == FMRB_DoesNotAccessMemory; |
483 | } |
484 | |
485 | /// Checks if the specified function is known to never read or write memory. |
486 | /// |
487 | /// Note that if the function only reads from known-constant memory, it is |
488 | /// also legal to return true. Also, function that unwind the stack are legal |
489 | /// for this predicate. |
490 | /// |
491 | /// Many optimizations (such as CSE and LICM) can be performed on such calls |
492 | /// to such functions without worrying about aliasing properties, and many |
493 | /// functions have this property (e.g. 'sin' and 'cos'). |
494 | /// |
495 | /// This property corresponds to the GCC 'const' attribute. |
496 | bool doesNotAccessMemory(const Function *F) { |
497 | return getModRefBehavior(F) == FMRB_DoesNotAccessMemory; |
498 | } |
499 | |
500 | /// Checks if the specified call is known to only read from non-volatile |
501 | /// memory (or not access memory at all). |
502 | /// |
503 | /// Calls that unwind the stack are legal for this predicate. |
504 | /// |
505 | /// This property allows many common optimizations to be performed in the |
506 | /// absence of interfering store instructions, such as CSE of strlen calls. |
507 | /// |
508 | /// This property corresponds to the GCC 'pure' attribute. |
509 | bool onlyReadsMemory(const CallBase *Call) { |
510 | return onlyReadsMemory(getModRefBehavior(Call)); |
511 | } |
512 | |
513 | /// Checks if the specified function is known to only read from non-volatile |
514 | /// memory (or not access memory at all). |
515 | /// |
516 | /// Functions that unwind the stack are legal for this predicate. |
517 | /// |
518 | /// This property allows many common optimizations to be performed in the |
519 | /// absence of interfering store instructions, such as CSE of strlen calls. |
520 | /// |
521 | /// This property corresponds to the GCC 'pure' attribute. |
522 | bool onlyReadsMemory(const Function *F) { |
523 | return onlyReadsMemory(getModRefBehavior(F)); |
524 | } |
525 | |
526 | /// Checks if functions with the specified behavior are known to only read |
527 | /// from non-volatile memory (or not access memory at all). |
528 | static bool onlyReadsMemory(FunctionModRefBehavior MRB) { |
529 | return !isModSet(createModRefInfo(MRB)); |
530 | } |
531 | |
532 | /// Checks if functions with the specified behavior are known to only write |
533 | /// memory (or not access memory at all). |
534 | static bool doesNotReadMemory(FunctionModRefBehavior MRB) { |
535 | return !isRefSet(createModRefInfo(MRB)); |
536 | } |
537 | |
538 | /// Checks if functions with the specified behavior are known to read and |
539 | /// write at most from objects pointed to by their pointer-typed arguments |
540 | /// (with arbitrary offsets). |
541 | static bool onlyAccessesArgPointees(FunctionModRefBehavior MRB) { |
542 | return !(MRB & FMRL_Anywhere & ~FMRL_ArgumentPointees); |
543 | } |
544 | |
545 | /// Checks if functions with the specified behavior are known to potentially |
546 | /// read or write from objects pointed to be their pointer-typed arguments |
547 | /// (with arbitrary offsets). |
548 | static bool doesAccessArgPointees(FunctionModRefBehavior MRB) { |
549 | return isModOrRefSet(createModRefInfo(MRB)) && |
550 | (MRB & FMRL_ArgumentPointees); |
551 | } |
552 | |
553 | /// Checks if functions with the specified behavior are known to read and |
554 | /// write at most from memory that is inaccessible from LLVM IR. |
555 | static bool onlyAccessesInaccessibleMem(FunctionModRefBehavior MRB) { |
556 | return !(MRB & FMRL_Anywhere & ~FMRL_InaccessibleMem); |
557 | } |
558 | |
559 | /// Checks if functions with the specified behavior are known to potentially |
560 | /// read or write from memory that is inaccessible from LLVM IR. |
561 | static bool doesAccessInaccessibleMem(FunctionModRefBehavior MRB) { |
562 | return isModOrRefSet(createModRefInfo(MRB)) && (MRB & FMRL_InaccessibleMem); |
563 | } |
564 | |
565 | /// Checks if functions with the specified behavior are known to read and |
566 | /// write at most from memory that is inaccessible from LLVM IR or objects |
567 | /// pointed to by their pointer-typed arguments (with arbitrary offsets). |
568 | static bool onlyAccessesInaccessibleOrArgMem(FunctionModRefBehavior MRB) { |
569 | return !(MRB & FMRL_Anywhere & |
570 | ~(FMRL_InaccessibleMem | FMRL_ArgumentPointees)); |
571 | } |
572 | |
573 | /// getModRefInfo (for call sites) - Return information about whether |
574 | /// a particular call site modifies or reads the specified memory location. |
575 | ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc); |
576 | |
577 | /// getModRefInfo (for call sites) - A convenience wrapper. |
578 | ModRefInfo getModRefInfo(const CallBase *Call, const Value *P, |
579 | LocationSize Size) { |
580 | return getModRefInfo(Call, MemoryLocation(P, Size)); |
581 | } |
582 | |
583 | /// getModRefInfo (for loads) - Return information about whether |
584 | /// a particular load modifies or reads the specified memory location. |
585 | ModRefInfo getModRefInfo(const LoadInst *L, const MemoryLocation &Loc); |
586 | |
587 | /// getModRefInfo (for loads) - A convenience wrapper. |
588 | ModRefInfo getModRefInfo(const LoadInst *L, const Value *P, |
589 | LocationSize Size) { |
590 | return getModRefInfo(L, MemoryLocation(P, Size)); |
591 | } |
592 | |
593 | /// getModRefInfo (for stores) - Return information about whether |
594 | /// a particular store modifies or reads the specified memory location. |
595 | ModRefInfo getModRefInfo(const StoreInst *S, const MemoryLocation &Loc); |
596 | |
597 | /// getModRefInfo (for stores) - A convenience wrapper. |
598 | ModRefInfo getModRefInfo(const StoreInst *S, const Value *P, |
599 | LocationSize Size) { |
600 | return getModRefInfo(S, MemoryLocation(P, Size)); |
601 | } |
602 | |
603 | /// getModRefInfo (for fences) - Return information about whether |
604 | /// a particular store modifies or reads the specified memory location. |
605 | ModRefInfo getModRefInfo(const FenceInst *S, const MemoryLocation &Loc); |
606 | |
607 | /// getModRefInfo (for fences) - A convenience wrapper. |
608 | ModRefInfo getModRefInfo(const FenceInst *S, const Value *P, |
609 | LocationSize Size) { |
610 | return getModRefInfo(S, MemoryLocation(P, Size)); |
611 | } |
612 | |
613 | /// getModRefInfo (for cmpxchges) - Return information about whether |
614 | /// a particular cmpxchg modifies or reads the specified memory location. |
615 | ModRefInfo getModRefInfo(const AtomicCmpXchgInst *CX, |
616 | const MemoryLocation &Loc); |
617 | |
618 | /// getModRefInfo (for cmpxchges) - A convenience wrapper. |
619 | ModRefInfo getModRefInfo(const AtomicCmpXchgInst *CX, const Value *P, |
620 | LocationSize Size) { |
621 | return getModRefInfo(CX, MemoryLocation(P, Size)); |
622 | } |
623 | |
624 | /// getModRefInfo (for atomicrmws) - Return information about whether |
625 | /// a particular atomicrmw modifies or reads the specified memory location. |
626 | ModRefInfo getModRefInfo(const AtomicRMWInst *RMW, const MemoryLocation &Loc); |
627 | |
628 | /// getModRefInfo (for atomicrmws) - A convenience wrapper. |
629 | ModRefInfo getModRefInfo(const AtomicRMWInst *RMW, const Value *P, |
630 | LocationSize Size) { |
631 | return getModRefInfo(RMW, MemoryLocation(P, Size)); |
632 | } |
633 | |
634 | /// getModRefInfo (for va_args) - Return information about whether |
635 | /// a particular va_arg modifies or reads the specified memory location. |
636 | ModRefInfo getModRefInfo(const VAArgInst *I, const MemoryLocation &Loc); |
637 | |
638 | /// getModRefInfo (for va_args) - A convenience wrapper. |
639 | ModRefInfo getModRefInfo(const VAArgInst *I, const Value *P, |
640 | LocationSize Size) { |
641 | return getModRefInfo(I, MemoryLocation(P, Size)); |
642 | } |
643 | |
644 | /// getModRefInfo (for catchpads) - Return information about whether |
645 | /// a particular catchpad modifies or reads the specified memory location. |
646 | ModRefInfo getModRefInfo(const CatchPadInst *I, const MemoryLocation &Loc); |
647 | |
648 | /// getModRefInfo (for catchpads) - A convenience wrapper. |
649 | ModRefInfo getModRefInfo(const CatchPadInst *I, const Value *P, |
650 | LocationSize Size) { |
651 | return getModRefInfo(I, MemoryLocation(P, Size)); |
652 | } |
653 | |
654 | /// getModRefInfo (for catchrets) - Return information about whether |
655 | /// a particular catchret modifies or reads the specified memory location. |
656 | ModRefInfo getModRefInfo(const CatchReturnInst *I, const MemoryLocation &Loc); |
657 | |
658 | /// getModRefInfo (for catchrets) - A convenience wrapper. |
659 | ModRefInfo getModRefInfo(const CatchReturnInst *I, const Value *P, |
660 | LocationSize Size) { |
661 | return getModRefInfo(I, MemoryLocation(P, Size)); |
662 | } |
663 | |
664 | /// Check whether or not an instruction may read or write the optionally |
665 | /// specified memory location. |
666 | /// |
667 | /// |
668 | /// An instruction that doesn't read or write memory may be trivially LICM'd |
669 | /// for example. |
670 | /// |
671 | /// For function calls, this delegates to the alias-analysis specific |
672 | /// call-site mod-ref behavior queries. Otherwise it delegates to the specific |
673 | /// helpers above. |
674 | ModRefInfo getModRefInfo(const Instruction *I, |
675 | const Optional<MemoryLocation> &OptLoc) { |
676 | AAQueryInfo AAQIP; |
677 | return getModRefInfo(I, OptLoc, AAQIP); |
678 | } |
679 | |
680 | /// A convenience wrapper for constructing the memory location. |
681 | ModRefInfo getModRefInfo(const Instruction *I, const Value *P, |
682 | LocationSize Size) { |
683 | return getModRefInfo(I, MemoryLocation(P, Size)); |
684 | } |
685 | |
686 | /// Return information about whether a call and an instruction may refer to |
687 | /// the same memory locations. |
688 | ModRefInfo getModRefInfo(Instruction *I, const CallBase *Call); |
689 | |
690 | /// Return information about whether two call sites may refer to the same set |
691 | /// of memory locations. See the AA documentation for details: |
692 | /// http://llvm.org/docs/AliasAnalysis.html#ModRefInfo |
693 | ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2); |
694 | |
695 | /// Return information about whether a particular call site modifies |
696 | /// or reads the specified memory location \p MemLoc before instruction \p I |
697 | /// in a BasicBlock. |
698 | /// Early exits in callCapturesBefore may lead to ModRefInfo::Must not being |
699 | /// set. |
700 | ModRefInfo callCapturesBefore(const Instruction *I, |
701 | const MemoryLocation &MemLoc, DominatorTree *DT); |
702 | |
703 | /// A convenience wrapper to synthesize a memory location. |
704 | ModRefInfo callCapturesBefore(const Instruction *I, const Value *P, |
705 | LocationSize Size, DominatorTree *DT) { |
706 | return callCapturesBefore(I, MemoryLocation(P, Size), DT); |
707 | } |
708 | |
709 | /// @} |
710 | //===--------------------------------------------------------------------===// |
711 | /// \name Higher level methods for querying mod/ref information. |
712 | /// @{ |
713 | |
714 | /// Check if it is possible for execution of the specified basic block to |
715 | /// modify the location Loc. |
716 | bool canBasicBlockModify(const BasicBlock &BB, const MemoryLocation &Loc); |
717 | |
718 | /// A convenience wrapper synthesizing a memory location. |
719 | bool canBasicBlockModify(const BasicBlock &BB, const Value *P, |
720 | LocationSize Size) { |
721 | return canBasicBlockModify(BB, MemoryLocation(P, Size)); |
722 | } |
723 | |
724 | /// Check if it is possible for the execution of the specified instructions |
725 | /// to mod\ref (according to the mode) the location Loc. |
726 | /// |
727 | /// The instructions to consider are all of the instructions in the range of |
728 | /// [I1,I2] INCLUSIVE. I1 and I2 must be in the same basic block. |
729 | bool canInstructionRangeModRef(const Instruction &I1, const Instruction &I2, |
730 | const MemoryLocation &Loc, |
731 | const ModRefInfo Mode); |
732 | |
733 | /// A convenience wrapper synthesizing a memory location. |
734 | bool canInstructionRangeModRef(const Instruction &I1, const Instruction &I2, |
735 | const Value *Ptr, LocationSize Size, |
736 | const ModRefInfo Mode) { |
737 | return canInstructionRangeModRef(I1, I2, MemoryLocation(Ptr, Size), Mode); |
738 | } |
739 | |
740 | private: |
741 | AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB, |
742 | AAQueryInfo &AAQI); |
743 | bool pointsToConstantMemory(const MemoryLocation &Loc, AAQueryInfo &AAQI, |
744 | bool OrLocal = false); |
745 | ModRefInfo getModRefInfo(Instruction *I, const CallBase *Call2, |
746 | AAQueryInfo &AAQIP); |
747 | ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc, |
748 | AAQueryInfo &AAQI); |
749 | ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2, |
750 | AAQueryInfo &AAQI); |
751 | ModRefInfo getModRefInfo(const VAArgInst *V, const MemoryLocation &Loc, |
752 | AAQueryInfo &AAQI); |
753 | ModRefInfo getModRefInfo(const LoadInst *L, const MemoryLocation &Loc, |
754 | AAQueryInfo &AAQI); |
755 | ModRefInfo getModRefInfo(const StoreInst *S, const MemoryLocation &Loc, |
756 | AAQueryInfo &AAQI); |
757 | ModRefInfo getModRefInfo(const FenceInst *S, const MemoryLocation &Loc, |
758 | AAQueryInfo &AAQI); |
759 | ModRefInfo getModRefInfo(const AtomicCmpXchgInst *CX, |
760 | const MemoryLocation &Loc, AAQueryInfo &AAQI); |
761 | ModRefInfo getModRefInfo(const AtomicRMWInst *RMW, const MemoryLocation &Loc, |
762 | AAQueryInfo &AAQI); |
763 | ModRefInfo getModRefInfo(const CatchPadInst *I, const MemoryLocation &Loc, |
764 | AAQueryInfo &AAQI); |
765 | ModRefInfo getModRefInfo(const CatchReturnInst *I, const MemoryLocation &Loc, |
766 | AAQueryInfo &AAQI); |
767 | ModRefInfo getModRefInfo(const Instruction *I, |
768 | const Optional<MemoryLocation> &OptLoc, |
769 | AAQueryInfo &AAQIP) { |
770 | if (OptLoc == None) { |
771 | if (const auto *Call = dyn_cast<CallBase>(I)) { |
772 | return createModRefInfo(getModRefBehavior(Call)); |
773 | } |
774 | } |
775 | |
776 | const MemoryLocation &Loc = OptLoc.getValueOr(MemoryLocation()); |
777 | |
778 | switch (I->getOpcode()) { |
779 | case Instruction::VAArg: |
780 | return getModRefInfo((const VAArgInst *)I, Loc, AAQIP); |
781 | case Instruction::Load: |
782 | return getModRefInfo((const LoadInst *)I, Loc, AAQIP); |
783 | case Instruction::Store: |
784 | return getModRefInfo((const StoreInst *)I, Loc, AAQIP); |
785 | case Instruction::Fence: |
786 | return getModRefInfo((const FenceInst *)I, Loc, AAQIP); |
787 | case Instruction::AtomicCmpXchg: |
788 | return getModRefInfo((const AtomicCmpXchgInst *)I, Loc, AAQIP); |
789 | case Instruction::AtomicRMW: |
790 | return getModRefInfo((const AtomicRMWInst *)I, Loc, AAQIP); |
791 | case Instruction::Call: |
792 | return getModRefInfo((const CallInst *)I, Loc, AAQIP); |
793 | case Instruction::Invoke: |
794 | return getModRefInfo((const InvokeInst *)I, Loc, AAQIP); |
795 | case Instruction::CatchPad: |
796 | return getModRefInfo((const CatchPadInst *)I, Loc, AAQIP); |
797 | case Instruction::CatchRet: |
798 | return getModRefInfo((const CatchReturnInst *)I, Loc, AAQIP); |
799 | default: |
800 | return ModRefInfo::NoModRef; |
801 | } |
802 | } |
803 | |
804 | class Concept; |
805 | |
806 | template <typename T> class Model; |
807 | |
808 | template <typename T> friend class AAResultBase; |
809 | |
810 | const TargetLibraryInfo &TLI; |
811 | |
812 | std::vector<std::unique_ptr<Concept>> AAs; |
813 | |
814 | std::vector<AnalysisKey *> AADeps; |
815 | |
816 | friend class BatchAAResults; |
817 | }; |
818 | |
819 | /// This class is a wrapper over an AAResults, and it is intended to be used |
820 | /// only when there are no IR changes inbetween queries. BatchAAResults is |
821 | /// reusing the same `AAQueryInfo` to preserve the state across queries, |
822 | /// esentially making AA work in "batch mode". The internal state cannot be |
823 | /// cleared, so to go "out-of-batch-mode", the user must either use AAResults, |
824 | /// or create a new BatchAAResults. |
825 | class BatchAAResults { |
826 | AAResults &AA; |
827 | AAQueryInfo AAQI; |
828 | |
829 | public: |
830 | BatchAAResults(AAResults &AAR) : AA(AAR), AAQI() {} |
831 | AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB) { |
832 | return AA.alias(LocA, LocB, AAQI); |
833 | } |
834 | bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal = false) { |
835 | return AA.pointsToConstantMemory(Loc, AAQI, OrLocal); |
836 | } |
837 | ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc) { |
838 | return AA.getModRefInfo(Call, Loc, AAQI); |
839 | } |
840 | ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2) { |
841 | return AA.getModRefInfo(Call1, Call2, AAQI); |
842 | } |
843 | ModRefInfo getModRefInfo(const Instruction *I, |
844 | const Optional<MemoryLocation> &OptLoc) { |
845 | return AA.getModRefInfo(I, OptLoc, AAQI); |
846 | } |
847 | ModRefInfo getModRefInfo(Instruction *I, const CallBase *Call2) { |
848 | return AA.getModRefInfo(I, Call2, AAQI); |
849 | } |
850 | ModRefInfo getArgModRefInfo(const CallBase *Call, unsigned ArgIdx) { |
851 | return AA.getArgModRefInfo(Call, ArgIdx); |
852 | } |
853 | FunctionModRefBehavior getModRefBehavior(const CallBase *Call) { |
854 | return AA.getModRefBehavior(Call); |
855 | } |
856 | bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB) { |
857 | return alias(LocA, LocB) == MustAlias; |
858 | } |
859 | bool isMustAlias(const Value *V1, const Value *V2) { |
860 | return alias(MemoryLocation(V1, LocationSize::precise(1)), |
861 | MemoryLocation(V2, LocationSize::precise(1))) == MustAlias; |
862 | } |
863 | }; |
864 | |
865 | /// Temporary typedef for legacy code that uses a generic \c AliasAnalysis |
866 | /// pointer or reference. |
867 | using AliasAnalysis = AAResults; |
868 | |
869 | /// A private abstract base class describing the concept of an individual alias |
870 | /// analysis implementation. |
871 | /// |
872 | /// This interface is implemented by any \c Model instantiation. It is also the |
873 | /// interface which a type used to instantiate the model must provide. |
874 | /// |
875 | /// All of these methods model methods by the same name in the \c |
876 | /// AAResults class. Only differences and specifics to how the |
877 | /// implementations are called are documented here. |
878 | class AAResults::Concept { |
879 | public: |
880 | virtual ~Concept() = 0; |
881 | |
882 | /// An update API used internally by the AAResults to provide |
883 | /// a handle back to the top level aggregation. |
884 | virtual void setAAResults(AAResults *NewAAR) = 0; |
885 | |
886 | //===--------------------------------------------------------------------===// |
887 | /// \name Alias Queries |
888 | /// @{ |
889 | |
890 | /// The main low level interface to the alias analysis implementation. |
891 | /// Returns an AliasResult indicating whether the two pointers are aliased to |
892 | /// each other. This is the interface that must be implemented by specific |
893 | /// alias analysis implementations. |
894 | virtual AliasResult alias(const MemoryLocation &LocA, |
895 | const MemoryLocation &LocB, AAQueryInfo &AAQI) = 0; |
896 | |
897 | /// Checks whether the given location points to constant memory, or if |
898 | /// \p OrLocal is true whether it points to a local alloca. |
899 | virtual bool pointsToConstantMemory(const MemoryLocation &Loc, |
900 | AAQueryInfo &AAQI, bool OrLocal) = 0; |
901 | |
902 | /// @} |
903 | //===--------------------------------------------------------------------===// |
904 | /// \name Simple mod/ref information |
905 | /// @{ |
906 | |
907 | /// Get the ModRef info associated with a pointer argument of a callsite. The |
908 | /// result's bits are set to indicate the allowed aliasing ModRef kinds. Note |
909 | /// that these bits do not necessarily account for the overall behavior of |
910 | /// the function, but rather only provide additional per-argument |
911 | /// information. |
912 | virtual ModRefInfo getArgModRefInfo(const CallBase *Call, |
913 | unsigned ArgIdx) = 0; |
914 | |
915 | /// Return the behavior of the given call site. |
916 | virtual FunctionModRefBehavior getModRefBehavior(const CallBase *Call) = 0; |
917 | |
918 | /// Return the behavior when calling the given function. |
919 | virtual FunctionModRefBehavior getModRefBehavior(const Function *F) = 0; |
920 | |
921 | /// getModRefInfo (for call sites) - Return information about whether |
922 | /// a particular call site modifies or reads the specified memory location. |
923 | virtual ModRefInfo getModRefInfo(const CallBase *Call, |
924 | const MemoryLocation &Loc, |
925 | AAQueryInfo &AAQI) = 0; |
926 | |
927 | /// Return information about whether two call sites may refer to the same set |
928 | /// of memory locations. See the AA documentation for details: |
929 | /// http://llvm.org/docs/AliasAnalysis.html#ModRefInfo |
930 | virtual ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2, |
931 | AAQueryInfo &AAQI) = 0; |
932 | |
933 | /// @} |
934 | }; |
935 | |
936 | /// A private class template which derives from \c Concept and wraps some other |
937 | /// type. |
938 | /// |
939 | /// This models the concept by directly forwarding each interface point to the |
940 | /// wrapped type which must implement a compatible interface. This provides |
941 | /// a type erased binding. |
942 | template <typename AAResultT> class AAResults::Model final : public Concept { |
943 | AAResultT &Result; |
944 | |
945 | public: |
946 | explicit Model(AAResultT &Result, AAResults &AAR) : Result(Result) { |
947 | Result.setAAResults(&AAR); |
948 | } |
949 | ~Model() override = default; |
950 | |
951 | void setAAResults(AAResults *NewAAR) override { Result.setAAResults(NewAAR); } |
952 | |
953 | AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB, |
954 | AAQueryInfo &AAQI) override { |
955 | return Result.alias(LocA, LocB, AAQI); |
956 | } |
957 | |
958 | bool pointsToConstantMemory(const MemoryLocation &Loc, AAQueryInfo &AAQI, |
959 | bool OrLocal) override { |
960 | return Result.pointsToConstantMemory(Loc, AAQI, OrLocal); |
961 | } |
962 | |
963 | ModRefInfo getArgModRefInfo(const CallBase *Call, unsigned ArgIdx) override { |
964 | return Result.getArgModRefInfo(Call, ArgIdx); |
965 | } |
966 | |
967 | FunctionModRefBehavior getModRefBehavior(const CallBase *Call) override { |
968 | return Result.getModRefBehavior(Call); |
969 | } |
970 | |
971 | FunctionModRefBehavior getModRefBehavior(const Function *F) override { |
972 | return Result.getModRefBehavior(F); |
973 | } |
974 | |
975 | ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc, |
976 | AAQueryInfo &AAQI) override { |
977 | return Result.getModRefInfo(Call, Loc, AAQI); |
978 | } |
979 | |
980 | ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2, |
981 | AAQueryInfo &AAQI) override { |
982 | return Result.getModRefInfo(Call1, Call2, AAQI); |
983 | } |
984 | }; |
985 | |
986 | /// A CRTP-driven "mixin" base class to help implement the function alias |
987 | /// analysis results concept. |
988 | /// |
989 | /// Because of the nature of many alias analysis implementations, they often |
990 | /// only implement a subset of the interface. This base class will attempt to |
991 | /// implement the remaining portions of the interface in terms of simpler forms |
992 | /// of the interface where possible, and otherwise provide conservatively |
993 | /// correct fallback implementations. |
994 | /// |
995 | /// Implementors of an alias analysis should derive from this CRTP, and then |
996 | /// override specific methods that they wish to customize. There is no need to |
997 | /// use virtual anywhere, the CRTP base class does static dispatch to the |
998 | /// derived type passed into it. |
999 | template <typename DerivedT> class AAResultBase { |
1000 | // Expose some parts of the interface only to the AAResults::Model |
1001 | // for wrapping. Specifically, this allows the model to call our |
1002 | // setAAResults method without exposing it as a fully public API. |
1003 | friend class AAResults::Model<DerivedT>; |
1004 | |
1005 | /// A pointer to the AAResults object that this AAResult is |
1006 | /// aggregated within. May be null if not aggregated. |
1007 | AAResults *AAR = nullptr; |
1008 | |
1009 | /// Helper to dispatch calls back through the derived type. |
1010 | DerivedT &derived() { return static_cast<DerivedT &>(*this); } |
1011 | |
1012 | /// A setter for the AAResults pointer, which is used to satisfy the |
1013 | /// AAResults::Model contract. |
1014 | void setAAResults(AAResults *NewAAR) { AAR = NewAAR; } |
1015 | |
1016 | protected: |
1017 | /// This proxy class models a common pattern where we delegate to either the |
1018 | /// top-level \c AAResults aggregation if one is registered, or to the |
1019 | /// current result if none are registered. |
1020 | class AAResultsProxy { |
1021 | AAResults *AAR; |
1022 | DerivedT &CurrentResult; |
1023 | |
1024 | public: |
1025 | AAResultsProxy(AAResults *AAR, DerivedT &CurrentResult) |
1026 | : AAR(AAR), CurrentResult(CurrentResult) {} |
1027 | |
1028 | AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB, |
1029 | AAQueryInfo &AAQI) { |
1030 | return AAR ? AAR->alias(LocA, LocB, AAQI) |
1031 | : CurrentResult.alias(LocA, LocB, AAQI); |
1032 | } |
1033 | |
1034 | bool pointsToConstantMemory(const MemoryLocation &Loc, AAQueryInfo &AAQI, |
1035 | bool OrLocal) { |
1036 | return AAR ? AAR->pointsToConstantMemory(Loc, AAQI, OrLocal) |
1037 | : CurrentResult.pointsToConstantMemory(Loc, AAQI, OrLocal); |
1038 | } |
1039 | |
1040 | ModRefInfo getArgModRefInfo(const CallBase *Call, unsigned ArgIdx) { |
1041 | return AAR ? AAR->getArgModRefInfo(Call, ArgIdx) |
1042 | : CurrentResult.getArgModRefInfo(Call, ArgIdx); |
1043 | } |
1044 | |
1045 | FunctionModRefBehavior getModRefBehavior(const CallBase *Call) { |
1046 | return AAR ? AAR->getModRefBehavior(Call) |
1047 | : CurrentResult.getModRefBehavior(Call); |
1048 | } |
1049 | |
1050 | FunctionModRefBehavior getModRefBehavior(const Function *F) { |
1051 | return AAR ? AAR->getModRefBehavior(F) : CurrentResult.getModRefBehavior(F); |
1052 | } |
1053 | |
1054 | ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc, |
1055 | AAQueryInfo &AAQI) { |
1056 | return AAR ? AAR->getModRefInfo(Call, Loc, AAQI) |
1057 | : CurrentResult.getModRefInfo(Call, Loc, AAQI); |
1058 | } |
1059 | |
1060 | ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2, |
1061 | AAQueryInfo &AAQI) { |
1062 | return AAR ? AAR->getModRefInfo(Call1, Call2, AAQI) |
1063 | : CurrentResult.getModRefInfo(Call1, Call2, AAQI); |
1064 | } |
1065 | }; |
1066 | |
1067 | explicit AAResultBase() = default; |
1068 | |
1069 | // Provide all the copy and move constructors so that derived types aren't |
1070 | // constrained. |
1071 | AAResultBase(const AAResultBase &Arg) {} |
1072 | AAResultBase(AAResultBase &&Arg) {} |
1073 | |
1074 | /// Get a proxy for the best AA result set to query at this time. |
1075 | /// |
1076 | /// When this result is part of a larger aggregation, this will proxy to that |
1077 | /// aggregation. When this result is used in isolation, it will just delegate |
1078 | /// back to the derived class's implementation. |
1079 | /// |
1080 | /// Note that callers of this need to take considerable care to not cause |
1081 | /// performance problems when they use this routine, in the case of a large |
1082 | /// number of alias analyses being aggregated, it can be expensive to walk |
1083 | /// back across the chain. |
1084 | AAResultsProxy getBestAAResults() { return AAResultsProxy(AAR, derived()); } |
1085 | |
1086 | public: |
1087 | AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB, |
1088 | AAQueryInfo &AAQI) { |
1089 | return MayAlias; |
1090 | } |
1091 | |
1092 | bool pointsToConstantMemory(const MemoryLocation &Loc, AAQueryInfo &AAQI, |
1093 | bool OrLocal) { |
1094 | return false; |
1095 | } |
1096 | |
1097 | ModRefInfo getArgModRefInfo(const CallBase *Call, unsigned ArgIdx) { |
1098 | return ModRefInfo::ModRef; |
1099 | } |
1100 | |
1101 | FunctionModRefBehavior getModRefBehavior(const CallBase *Call) { |
1102 | return FMRB_UnknownModRefBehavior; |
1103 | } |
1104 | |
1105 | FunctionModRefBehavior getModRefBehavior(const Function *F) { |
1106 | return FMRB_UnknownModRefBehavior; |
1107 | } |
1108 | |
1109 | ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc, |
1110 | AAQueryInfo &AAQI) { |
1111 | return ModRefInfo::ModRef; |
1112 | } |
1113 | |
1114 | ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2, |
1115 | AAQueryInfo &AAQI) { |
1116 | return ModRefInfo::ModRef; |
1117 | } |
1118 | }; |
1119 | |
1120 | /// Return true if this pointer is returned by a noalias function. |
1121 | bool isNoAliasCall(const Value *V); |
1122 | |
1123 | /// Return true if this is an argument with the noalias attribute. |
1124 | bool isNoAliasArgument(const Value *V); |
1125 | |
1126 | /// Return true if this pointer refers to a distinct and identifiable object. |
1127 | /// This returns true for: |
1128 | /// Global Variables and Functions (but not Global Aliases) |
1129 | /// Allocas |
1130 | /// ByVal and NoAlias Arguments |
1131 | /// NoAlias returns (e.g. calls to malloc) |
1132 | /// |
1133 | bool isIdentifiedObject(const Value *V); |
1134 | |
1135 | /// Return true if V is umabigously identified at the function-level. |
1136 | /// Different IdentifiedFunctionLocals can't alias. |
1137 | /// Further, an IdentifiedFunctionLocal can not alias with any function |
1138 | /// arguments other than itself, which is not necessarily true for |
1139 | /// IdentifiedObjects. |
1140 | bool isIdentifiedFunctionLocal(const Value *V); |
1141 | |
1142 | /// A manager for alias analyses. |
1143 | /// |
1144 | /// This class can have analyses registered with it and when run, it will run |
1145 | /// all of them and aggregate their results into single AA results interface |
1146 | /// that dispatches across all of the alias analysis results available. |
1147 | /// |
1148 | /// Note that the order in which analyses are registered is very significant. |
1149 | /// That is the order in which the results will be aggregated and queried. |
1150 | /// |
1151 | /// This manager effectively wraps the AnalysisManager for registering alias |
1152 | /// analyses. When you register your alias analysis with this manager, it will |
1153 | /// ensure the analysis itself is registered with its AnalysisManager. |
1154 | /// |
1155 | /// The result of this analysis is only invalidated if one of the particular |
1156 | /// aggregated AA results end up being invalidated. This removes the need to |
1157 | /// explicitly preserve the results of `AAManager`. Note that analyses should no |
1158 | /// longer be registered once the `AAManager` is run. |
1159 | class AAManager : public AnalysisInfoMixin<AAManager> { |
1160 | public: |
1161 | using Result = AAResults; |
1162 | |
1163 | /// Register a specific AA result. |
1164 | template <typename AnalysisT> void registerFunctionAnalysis() { |
1165 | ResultGetters.push_back(&getFunctionAAResultImpl<AnalysisT>); |
1166 | } |
1167 | |
1168 | /// Register a specific AA result. |
1169 | template <typename AnalysisT> void registerModuleAnalysis() { |
1170 | ResultGetters.push_back(&getModuleAAResultImpl<AnalysisT>); |
1171 | } |
1172 | |
1173 | Result run(Function &F, FunctionAnalysisManager &AM) { |
1174 | Result R(AM.getResult<TargetLibraryAnalysis>(F)); |
1175 | for (auto &Getter : ResultGetters) |
1176 | (*Getter)(F, AM, R); |
1177 | return R; |
1178 | } |
1179 | |
1180 | private: |
1181 | friend AnalysisInfoMixin<AAManager>; |
1182 | |
1183 | static AnalysisKey Key; |
1184 | |
1185 | SmallVector<void (*)(Function &F, FunctionAnalysisManager &AM, |
1186 | AAResults &AAResults), |
1187 | 4> ResultGetters; |
1188 | |
1189 | template <typename AnalysisT> |
1190 | static void getFunctionAAResultImpl(Function &F, |
1191 | FunctionAnalysisManager &AM, |
1192 | AAResults &AAResults) { |
1193 | AAResults.addAAResult(AM.template getResult<AnalysisT>(F)); |
1194 | AAResults.addAADependencyID(AnalysisT::ID()); |
1195 | } |
1196 | |
1197 | template <typename AnalysisT> |
1198 | static void getModuleAAResultImpl(Function &F, FunctionAnalysisManager &AM, |
1199 | AAResults &AAResults) { |
1200 | auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F); |
1201 | if (auto *R = |
1202 | MAMProxy.template getCachedResult<AnalysisT>(*F.getParent())) { |
1203 | AAResults.addAAResult(*R); |
1204 | MAMProxy |
1205 | .template registerOuterAnalysisInvalidation<AnalysisT, AAManager>(); |
1206 | } |
1207 | } |
1208 | }; |
1209 | |
1210 | /// A wrapper pass to provide the legacy pass manager access to a suitably |
1211 | /// prepared AAResults object. |
1212 | class AAResultsWrapperPass : public FunctionPass { |
1213 | std::unique_ptr<AAResults> AAR; |
1214 | |
1215 | public: |
1216 | static char ID; |
1217 | |
1218 | AAResultsWrapperPass(); |
1219 | |
1220 | AAResults &getAAResults() { return *AAR; } |
1221 | const AAResults &getAAResults() const { return *AAR; } |
1222 | |
1223 | bool runOnFunction(Function &F) override; |
1224 | |
1225 | void getAnalysisUsage(AnalysisUsage &AU) const override; |
1226 | }; |
1227 | |
1228 | /// A wrapper pass for external alias analyses. This just squirrels away the |
1229 | /// callback used to run any analyses and register their results. |
1230 | struct ExternalAAWrapperPass : ImmutablePass { |
1231 | using CallbackT = std::function<void(Pass &, Function &, AAResults &)>; |
1232 | |
1233 | CallbackT CB; |
1234 | |
1235 | static char ID; |
1236 | |
1237 | ExternalAAWrapperPass(); |
1238 | |
1239 | explicit ExternalAAWrapperPass(CallbackT CB); |
1240 | |
1241 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
1242 | AU.setPreservesAll(); |
1243 | } |
1244 | }; |
1245 | |
1246 | FunctionPass *createAAResultsWrapperPass(); |
1247 | |
1248 | /// A wrapper pass around a callback which can be used to populate the |
1249 | /// AAResults in the AAResultsWrapperPass from an external AA. |
1250 | /// |
1251 | /// The callback provided here will be used each time we prepare an AAResults |
1252 | /// object, and will receive a reference to the function wrapper pass, the |
1253 | /// function, and the AAResults object to populate. This should be used when |
1254 | /// setting up a custom pass pipeline to inject a hook into the AA results. |
1255 | ImmutablePass *createExternalAAWrapperPass( |
1256 | std::function<void(Pass &, Function &, AAResults &)> Callback); |
1257 | |
1258 | /// A helper for the legacy pass manager to create a \c AAResults |
1259 | /// object populated to the best of our ability for a particular function when |
1260 | /// inside of a \c ModulePass or a \c CallGraphSCCPass. |
1261 | /// |
1262 | /// If a \c ModulePass or a \c CallGraphSCCPass calls \p |
1263 | /// createLegacyPMAAResults, it also needs to call \p addUsedAAAnalyses in \p |
1264 | /// getAnalysisUsage. |
1265 | AAResults createLegacyPMAAResults(Pass &P, Function &F, BasicAAResult &BAR); |
1266 | |
1267 | /// A helper for the legacy pass manager to populate \p AU to add uses to make |
1268 | /// sure the analyses required by \p createLegacyPMAAResults are available. |
1269 | void getAAResultsAnalysisUsage(AnalysisUsage &AU); |
1270 | |
1271 | } // end namespace llvm |
1272 | |
1273 | #endif // LLVM_ANALYSIS_ALIASANALYSIS_H |
1 | //===- llvm/Type.h - Classes for handling data types ------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file contains the declaration of the Type class. For more "Type" |
10 | // stuff, look in DerivedTypes.h. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #ifndef LLVM_IR_TYPE_H |
15 | #define LLVM_IR_TYPE_H |
16 | |
17 | #include "llvm/ADT/APFloat.h" |
18 | #include "llvm/ADT/ArrayRef.h" |
19 | #include "llvm/ADT/SmallPtrSet.h" |
20 | #include "llvm/Support/CBindingWrapping.h" |
21 | #include "llvm/Support/Casting.h" |
22 | #include "llvm/Support/Compiler.h" |
23 | #include "llvm/Support/ErrorHandling.h" |
24 | #include "llvm/Support/TypeSize.h" |
25 | #include <cassert> |
26 | #include <cstdint> |
27 | #include <iterator> |
28 | |
29 | namespace llvm { |
30 | |
31 | template<class GraphType> struct GraphTraits; |
32 | class IntegerType; |
33 | class LLVMContext; |
34 | class PointerType; |
35 | class raw_ostream; |
36 | class StringRef; |
37 | |
38 | /// The instances of the Type class are immutable: once they are created, |
39 | /// they are never changed. Also note that only one instance of a particular |
40 | /// type is ever created. Thus seeing if two types are equal is a matter of |
41 | /// doing a trivial pointer comparison. To enforce that no two equal instances |
42 | /// are created, Type instances can only be created via static factory methods |
43 | /// in class Type and in derived classes. Once allocated, Types are never |
44 | /// free'd. |
45 | /// |
46 | class Type { |
47 | public: |
48 | //===--------------------------------------------------------------------===// |
49 | /// Definitions of all of the base types for the Type system. Based on this |
50 | /// value, you can cast to a class defined in DerivedTypes.h. |
51 | /// Note: If you add an element to this, you need to add an element to the |
52 | /// Type::getPrimitiveType function, or else things will break! |
53 | /// Also update LLVMTypeKind and LLVMGetTypeKind () in the C binding. |
54 | /// |
55 | enum TypeID { |
56 | // PrimitiveTypes |
57 | HalfTyID = 0, ///< 16-bit floating point type |
58 | BFloatTyID, ///< 16-bit floating point type (7-bit significand) |
59 | FloatTyID, ///< 32-bit floating point type |
60 | DoubleTyID, ///< 64-bit floating point type |
61 | X86_FP80TyID, ///< 80-bit floating point type (X87) |
62 | FP128TyID, ///< 128-bit floating point type (112-bit significand) |
63 | PPC_FP128TyID, ///< 128-bit floating point type (two 64-bits, PowerPC) |
64 | VoidTyID, ///< type with no size |
65 | LabelTyID, ///< Labels |
66 | MetadataTyID, ///< Metadata |
67 | X86_MMXTyID, ///< MMX vectors (64 bits, X86 specific) |
68 | TokenTyID, ///< Tokens |
69 | |
70 | // Derived types... see DerivedTypes.h file. |
71 | IntegerTyID, ///< Arbitrary bit width integers |
72 | FunctionTyID, ///< Functions |
73 | PointerTyID, ///< Pointers |
74 | StructTyID, ///< Structures |
75 | ArrayTyID, ///< Arrays |
76 | FixedVectorTyID, ///< Fixed width SIMD vector type |
77 | ScalableVectorTyID ///< Scalable SIMD vector type |
78 | }; |
79 | |
80 | private: |
81 | /// This refers to the LLVMContext in which this type was uniqued. |
82 | LLVMContext &Context; |
83 | |
84 | TypeID ID : 8; // The current base type of this type. |
85 | unsigned SubclassData : 24; // Space for subclasses to store data. |
86 | // Note that this should be synchronized with |
87 | // MAX_INT_BITS value in IntegerType class. |
88 | |
89 | protected: |
90 | friend class LLVMContextImpl; |
91 | |
92 | explicit Type(LLVMContext &C, TypeID tid) |
93 | : Context(C), ID(tid), SubclassData(0) {} |
94 | ~Type() = default; |
95 | |
96 | unsigned getSubclassData() const { return SubclassData; } |
97 | |
98 | void setSubclassData(unsigned val) { |
99 | SubclassData = val; |
100 | // Ensure we don't have any accidental truncation. |
101 | assert(getSubclassData() == val && "Subclass data too large for field")((getSubclassData() == val && "Subclass data too large for field" ) ? static_cast<void> (0) : __assert_fail ("getSubclassData() == val && \"Subclass data too large for field\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/include/llvm/IR/Type.h" , 101, __PRETTY_FUNCTION__)); |
102 | } |
103 | |
104 | /// Keeps track of how many Type*'s there are in the ContainedTys list. |
105 | unsigned NumContainedTys = 0; |
106 | |
107 | /// A pointer to the array of Types contained by this Type. For example, this |
108 | /// includes the arguments of a function type, the elements of a structure, |
109 | /// the pointee of a pointer, the element type of an array, etc. This pointer |
110 | /// may be 0 for types that don't contain other types (Integer, Double, |
111 | /// Float). |
112 | Type * const *ContainedTys = nullptr; |
113 | |
114 | public: |
115 | /// Print the current type. |
116 | /// Omit the type details if \p NoDetails == true. |
117 | /// E.g., let %st = type { i32, i16 } |
118 | /// When \p NoDetails is true, we only print %st. |
119 | /// Put differently, \p NoDetails prints the type as if |
120 | /// inlined with the operands when printing an instruction. |
121 | void print(raw_ostream &O, bool IsForDebug = false, |
122 | bool NoDetails = false) const; |
123 | |
124 | void dump() const; |
125 | |
126 | /// Return the LLVMContext in which this type was uniqued. |
127 | LLVMContext &getContext() const { return Context; } |
128 | |
129 | //===--------------------------------------------------------------------===// |
130 | // Accessors for working with types. |
131 | // |
132 | |
133 | /// Return the type id for the type. This will return one of the TypeID enum |
134 | /// elements defined above. |
135 | TypeID getTypeID() const { return ID; } |
136 | |
137 | /// Return true if this is 'void'. |
138 | bool isVoidTy() const { return getTypeID() == VoidTyID; } |
139 | |
140 | /// Return true if this is 'half', a 16-bit IEEE fp type. |
141 | bool isHalfTy() const { return getTypeID() == HalfTyID; } |
142 | |
143 | /// Return true if this is 'bfloat', a 16-bit bfloat type. |
144 | bool isBFloatTy() const { return getTypeID() == BFloatTyID; } |
145 | |
146 | /// Return true if this is 'float', a 32-bit IEEE fp type. |
147 | bool isFloatTy() const { return getTypeID() == FloatTyID; } |
148 | |
149 | /// Return true if this is 'double', a 64-bit IEEE fp type. |
150 | bool isDoubleTy() const { return getTypeID() == DoubleTyID; } |
151 | |
152 | /// Return true if this is x86 long double. |
153 | bool isX86_FP80Ty() const { return getTypeID() == X86_FP80TyID; } |
154 | |
155 | /// Return true if this is 'fp128'. |
156 | bool isFP128Ty() const { return getTypeID() == FP128TyID; } |
157 | |
158 | /// Return true if this is powerpc long double. |
159 | bool isPPC_FP128Ty() const { return getTypeID() == PPC_FP128TyID; } |
160 | |
161 | /// Return true if this is one of the six floating-point types |
162 | bool isFloatingPointTy() const { |
163 | return getTypeID() == HalfTyID || getTypeID() == BFloatTyID || |
164 | getTypeID() == FloatTyID || getTypeID() == DoubleTyID || |
165 | getTypeID() == X86_FP80TyID || getTypeID() == FP128TyID || |
166 | getTypeID() == PPC_FP128TyID; |
167 | } |
168 | |
169 | const fltSemantics &getFltSemantics() const { |
170 | switch (getTypeID()) { |
171 | case HalfTyID: return APFloat::IEEEhalf(); |
172 | case BFloatTyID: return APFloat::BFloat(); |
173 | case FloatTyID: return APFloat::IEEEsingle(); |
174 | case DoubleTyID: return APFloat::IEEEdouble(); |
175 | case X86_FP80TyID: return APFloat::x87DoubleExtended(); |
176 | case FP128TyID: return APFloat::IEEEquad(); |
177 | case PPC_FP128TyID: return APFloat::PPCDoubleDouble(); |
178 | default: llvm_unreachable("Invalid floating type")::llvm::llvm_unreachable_internal("Invalid floating type", "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/include/llvm/IR/Type.h" , 178); |
179 | } |
180 | } |
181 | |
182 | /// Return true if this is X86 MMX. |
183 | bool isX86_MMXTy() const { return getTypeID() == X86_MMXTyID; } |
184 | |
185 | /// Return true if this is a FP type or a vector of FP. |
186 | bool isFPOrFPVectorTy() const { return getScalarType()->isFloatingPointTy(); } |
187 | |
188 | /// Return true if this is 'label'. |
189 | bool isLabelTy() const { return getTypeID() == LabelTyID; } |
190 | |
191 | /// Return true if this is 'metadata'. |
192 | bool isMetadataTy() const { return getTypeID() == MetadataTyID; } |
193 | |
194 | /// Return true if this is 'token'. |
195 | bool isTokenTy() const { return getTypeID() == TokenTyID; } |
196 | |
197 | /// True if this is an instance of IntegerType. |
198 | bool isIntegerTy() const { return getTypeID() == IntegerTyID; } |
199 | |
200 | /// Return true if this is an IntegerType of the given width. |
201 | bool isIntegerTy(unsigned Bitwidth) const; |
202 | |
203 | /// Return true if this is an integer type or a vector of integer types. |
204 | bool isIntOrIntVectorTy() const { return getScalarType()->isIntegerTy(); } |
205 | |
206 | /// Return true if this is an integer type or a vector of integer types of |
207 | /// the given width. |
208 | bool isIntOrIntVectorTy(unsigned BitWidth) const { |
209 | return getScalarType()->isIntegerTy(BitWidth); |
210 | } |
211 | |
212 | /// Return true if this is an integer type or a pointer type. |
213 | bool isIntOrPtrTy() const { return isIntegerTy() || isPointerTy(); } |
214 | |
215 | /// True if this is an instance of FunctionType. |
216 | bool isFunctionTy() const { return getTypeID() == FunctionTyID; } |
217 | |
218 | /// True if this is an instance of StructType. |
219 | bool isStructTy() const { return getTypeID() == StructTyID; } |
220 | |
221 | /// True if this is an instance of ArrayType. |
222 | bool isArrayTy() const { return getTypeID() == ArrayTyID; } |
223 | |
224 | /// True if this is an instance of PointerType. |
225 | bool isPointerTy() const { return getTypeID() == PointerTyID; } |
226 | |
227 | /// Return true if this is a pointer type or a vector of pointer types. |
228 | bool isPtrOrPtrVectorTy() const { return getScalarType()->isPointerTy(); } |
229 | |
230 | /// True if this is an instance of VectorType. |
231 | inline bool isVectorTy() const { |
232 | return getTypeID() == ScalableVectorTyID || getTypeID() == FixedVectorTyID; |
233 | } |
234 | |
235 | /// Return true if this type could be converted with a lossless BitCast to |
236 | /// type 'Ty'. For example, i8* to i32*. BitCasts are valid for types of the |
237 | /// same size only where no re-interpretation of the bits is done. |
238 | /// Determine if this type could be losslessly bitcast to Ty |
239 | bool canLosslesslyBitCastTo(Type *Ty) const; |
240 | |
241 | /// Return true if this type is empty, that is, it has no elements or all of |
242 | /// its elements are empty. |
243 | bool isEmptyTy() const; |
244 | |
245 | /// Return true if the type is "first class", meaning it is a valid type for a |
246 | /// Value. |
247 | bool isFirstClassType() const { |
248 | return getTypeID() != FunctionTyID && getTypeID() != VoidTyID; |
249 | } |
250 | |
251 | /// Return true if the type is a valid type for a register in codegen. This |
252 | /// includes all first-class types except struct and array types. |
253 | bool isSingleValueType() const { |
254 | return isFloatingPointTy() || isX86_MMXTy() || isIntegerTy() || |
255 | isPointerTy() || isVectorTy(); |
256 | } |
257 | |
258 | /// Return true if the type is an aggregate type. This means it is valid as |
259 | /// the first operand of an insertvalue or extractvalue instruction. This |
260 | /// includes struct and array types, but does not include vector types. |
261 | bool isAggregateType() const { |
262 | return getTypeID() == StructTyID || getTypeID() == ArrayTyID; |
263 | } |
264 | |
265 | /// Return true if it makes sense to take the size of this type. To get the |
266 | /// actual size for a particular target, it is reasonable to use the |
267 | /// DataLayout subsystem to do this. |
268 | bool isSized(SmallPtrSetImpl<Type*> *Visited = nullptr) const { |
269 | // If it's a primitive, it is always sized. |
270 | if (getTypeID() == IntegerTyID || isFloatingPointTy() || |
271 | getTypeID() == PointerTyID || |
272 | getTypeID() == X86_MMXTyID) |
273 | return true; |
274 | // If it is not something that can have a size (e.g. a function or label), |
275 | // it doesn't have a size. |
276 | if (getTypeID() != StructTyID && getTypeID() != ArrayTyID && !isVectorTy()) |
277 | return false; |
278 | // Otherwise we have to try harder to decide. |
279 | return isSizedDerivedType(Visited); |
280 | } |
281 | |
282 | /// Return the basic size of this type if it is a primitive type. These are |
283 | /// fixed by LLVM and are not target-dependent. |
284 | /// This will return zero if the type does not have a size or is not a |
285 | /// primitive type. |
286 | /// |
287 | /// If this is a scalable vector type, the scalable property will be set and |
288 | /// the runtime size will be a positive integer multiple of the base size. |
289 | /// |
290 | /// Note that this may not reflect the size of memory allocated for an |
291 | /// instance of the type or the number of bytes that are written when an |
292 | /// instance of the type is stored to memory. The DataLayout class provides |
293 | /// additional query functions to provide this information. |
294 | /// |
295 | TypeSize getPrimitiveSizeInBits() const LLVM_READONLY__attribute__((__pure__)); |
296 | |
297 | /// If this is a vector type, return the getPrimitiveSizeInBits value for the |
298 | /// element type. Otherwise return the getPrimitiveSizeInBits value for this |
299 | /// type. |
300 | unsigned getScalarSizeInBits() const LLVM_READONLY__attribute__((__pure__)); |
301 | |
302 | /// Return the width of the mantissa of this type. This is only valid on |
303 | /// floating-point types. If the FP type does not have a stable mantissa (e.g. |
304 | /// ppc long double), this method returns -1. |
305 | int getFPMantissaWidth() const; |
306 | |
307 | /// If this is a vector type, return the element type, otherwise return |
308 | /// 'this'. |
309 | inline Type *getScalarType() const { |
310 | if (isVectorTy()) |
311 | return getContainedType(0); |
312 | return const_cast<Type *>(this); |
313 | } |
314 | |
315 | //===--------------------------------------------------------------------===// |
316 | // Type Iteration support. |
317 | // |
318 | using subtype_iterator = Type * const *; |
319 | |
320 | subtype_iterator subtype_begin() const { return ContainedTys; } |
321 | subtype_iterator subtype_end() const { return &ContainedTys[NumContainedTys];} |
322 | ArrayRef<Type*> subtypes() const { |
323 | return makeArrayRef(subtype_begin(), subtype_end()); |
324 | } |
325 | |
326 | using subtype_reverse_iterator = std::reverse_iterator<subtype_iterator>; |
327 | |
328 | subtype_reverse_iterator subtype_rbegin() const { |
329 | return subtype_reverse_iterator(subtype_end()); |
330 | } |
331 | subtype_reverse_iterator subtype_rend() const { |
332 | return subtype_reverse_iterator(subtype_begin()); |
333 | } |
334 | |
335 | /// This method is used to implement the type iterator (defined at the end of |
336 | /// the file). For derived types, this returns the types 'contained' in the |
337 | /// derived type. |
338 | Type *getContainedType(unsigned i) const { |
339 | assert(i < NumContainedTys && "Index out of range!")((i < NumContainedTys && "Index out of range!") ? static_cast <void> (0) : __assert_fail ("i < NumContainedTys && \"Index out of range!\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/include/llvm/IR/Type.h" , 339, __PRETTY_FUNCTION__)); |
340 | return ContainedTys[i]; |
341 | } |
342 | |
343 | /// Return the number of types in the derived type. |
344 | unsigned getNumContainedTypes() const { return NumContainedTys; } |
345 | |
346 | //===--------------------------------------------------------------------===// |
347 | // Helper methods corresponding to subclass methods. This forces a cast to |
348 | // the specified subclass and calls its accessor. "getArrayNumElements" (for |
349 | // example) is shorthand for cast<ArrayType>(Ty)->getNumElements(). This is |
350 | // only intended to cover the core methods that are frequently used, helper |
351 | // methods should not be added here. |
352 | |
353 | inline unsigned getIntegerBitWidth() const; |
354 | |
355 | inline Type *getFunctionParamType(unsigned i) const; |
356 | inline unsigned getFunctionNumParams() const; |
357 | inline bool isFunctionVarArg() const; |
358 | |
359 | inline StringRef getStructName() const; |
360 | inline unsigned getStructNumElements() const; |
361 | inline Type *getStructElementType(unsigned N) const; |
362 | |
363 | inline uint64_t getArrayNumElements() const; |
364 | |
365 | Type *getArrayElementType() const { |
366 | assert(getTypeID() == ArrayTyID)((getTypeID() == ArrayTyID) ? static_cast<void> (0) : __assert_fail ("getTypeID() == ArrayTyID", "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/include/llvm/IR/Type.h" , 366, __PRETTY_FUNCTION__)); |
367 | return ContainedTys[0]; |
368 | } |
369 | |
370 | Type *getPointerElementType() const { |
371 | assert(getTypeID() == PointerTyID)((getTypeID() == PointerTyID) ? static_cast<void> (0) : __assert_fail ("getTypeID() == PointerTyID", "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/include/llvm/IR/Type.h" , 371, __PRETTY_FUNCTION__)); |
372 | return ContainedTys[0]; |
373 | } |
374 | |
375 | /// Given an integer or vector type, change the lane bitwidth to NewBitwidth, |
376 | /// whilst keeping the old number of lanes. |
377 | inline Type *getWithNewBitWidth(unsigned NewBitWidth) const; |
378 | |
379 | /// Given scalar/vector integer type, returns a type with elements twice as |
380 | /// wide as in the original type. For vectors, preserves element count. |
381 | inline Type *getExtendedType() const; |
382 | |
383 | /// Get the address space of this pointer or pointer vector type. |
384 | inline unsigned getPointerAddressSpace() const; |
385 | |
386 | //===--------------------------------------------------------------------===// |
387 | // Static members exported by the Type class itself. Useful for getting |
388 | // instances of Type. |
389 | // |
390 | |
391 | /// Return a type based on an identifier. |
392 | static Type *getPrimitiveType(LLVMContext &C, TypeID IDNumber); |
393 | |
394 | //===--------------------------------------------------------------------===// |
395 | // These are the builtin types that are always available. |
396 | // |
397 | static Type *getVoidTy(LLVMContext &C); |
398 | static Type *getLabelTy(LLVMContext &C); |
399 | static Type *getHalfTy(LLVMContext &C); |
400 | static Type *getBFloatTy(LLVMContext &C); |
401 | static Type *getFloatTy(LLVMContext &C); |
402 | static Type *getDoubleTy(LLVMContext &C); |
403 | static Type *getMetadataTy(LLVMContext &C); |
404 | static Type *getX86_FP80Ty(LLVMContext &C); |
405 | static Type *getFP128Ty(LLVMContext &C); |
406 | static Type *getPPC_FP128Ty(LLVMContext &C); |
407 | static Type *getX86_MMXTy(LLVMContext &C); |
408 | static Type *getTokenTy(LLVMContext &C); |
409 | static IntegerType *getIntNTy(LLVMContext &C, unsigned N); |
410 | static IntegerType *getInt1Ty(LLVMContext &C); |
411 | static IntegerType *getInt8Ty(LLVMContext &C); |
412 | static IntegerType *getInt16Ty(LLVMContext &C); |
413 | static IntegerType *getInt32Ty(LLVMContext &C); |
414 | static IntegerType *getInt64Ty(LLVMContext &C); |
415 | static IntegerType *getInt128Ty(LLVMContext &C); |
416 | template <typename ScalarTy> static Type *getScalarTy(LLVMContext &C) { |
417 | int noOfBits = sizeof(ScalarTy) * CHAR_BIT8; |
418 | if (std::is_integral<ScalarTy>::value) { |
419 | return (Type*) Type::getIntNTy(C, noOfBits); |
420 | } else if (std::is_floating_point<ScalarTy>::value) { |
421 | switch (noOfBits) { |
422 | case 32: |
423 | return Type::getFloatTy(C); |
424 | case 64: |
425 | return Type::getDoubleTy(C); |
426 | } |
427 | } |
428 | llvm_unreachable("Unsupported type in Type::getScalarTy")::llvm::llvm_unreachable_internal("Unsupported type in Type::getScalarTy" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/include/llvm/IR/Type.h" , 428); |
429 | } |
430 | static Type *getFloatingPointTy(LLVMContext &C, const fltSemantics &S) { |
431 | Type *Ty; |
432 | if (&S == &APFloat::IEEEhalf()) |
433 | Ty = Type::getHalfTy(C); |
434 | else if (&S == &APFloat::BFloat()) |
435 | Ty = Type::getBFloatTy(C); |
436 | else if (&S == &APFloat::IEEEsingle()) |
437 | Ty = Type::getFloatTy(C); |
438 | else if (&S == &APFloat::IEEEdouble()) |
439 | Ty = Type::getDoubleTy(C); |
440 | else if (&S == &APFloat::x87DoubleExtended()) |
441 | Ty = Type::getX86_FP80Ty(C); |
442 | else if (&S == &APFloat::IEEEquad()) |
443 | Ty = Type::getFP128Ty(C); |
444 | else { |
445 | assert(&S == &APFloat::PPCDoubleDouble() && "Unknown FP format")((&S == &APFloat::PPCDoubleDouble() && "Unknown FP format" ) ? static_cast<void> (0) : __assert_fail ("&S == &APFloat::PPCDoubleDouble() && \"Unknown FP format\"" , "/build/llvm-toolchain-snapshot-12.0.0~++20201102111116+1ed2ca68191/llvm/include/llvm/IR/Type.h" , 445, __PRETTY_FUNCTION__)); |
446 | Ty = Type::getPPC_FP128Ty(C); |
447 | } |
448 | return Ty; |
449 | } |
450 | |
451 | //===--------------------------------------------------------------------===// |
452 | // Convenience methods for getting pointer types with one of the above builtin |
453 | // types as pointee. |
454 | // |
455 | static PointerType *getHalfPtrTy(LLVMContext &C, unsigned AS = 0); |
456 | static PointerType *getBFloatPtrTy(LLVMContext &C, unsigned AS = 0); |
457 | static PointerType *getFloatPtrTy(LLVMContext &C, unsigned AS = 0); |
458 | static PointerType *getDoublePtrTy(LLVMContext &C, unsigned AS = 0); |
459 | static PointerType *getX86_FP80PtrTy(LLVMContext &C, unsigned AS = 0); |
460 | static PointerType *getFP128PtrTy(LLVMContext &C, unsigned AS = 0); |
461 | static PointerType *getPPC_FP128PtrTy(LLVMContext &C, unsigned AS = 0); |
462 | static PointerType *getX86_MMXPtrTy(LLVMContext &C, unsigned AS = 0); |
463 | static PointerType *getIntNPtrTy(LLVMContext &C, unsigned N, unsigned AS = 0); |
464 | static PointerType *getInt1PtrTy(LLVMContext &C, unsigned AS = 0); |
465 | static PointerType *getInt8PtrTy(LLVMContext &C, unsigned AS = 0); |
466 | static PointerType *getInt16PtrTy(LLVMContext &C, unsigned AS = 0); |
467 | static PointerType *getInt32PtrTy(LLVMContext &C, unsigned AS = 0); |
468 | static PointerType *getInt64PtrTy(LLVMContext &C, unsigned AS = 0); |
469 | |
470 | /// Return a pointer to the current type. This is equivalent to |
471 | /// PointerType::get(Foo, AddrSpace). |
472 | PointerType *getPointerTo(unsigned AddrSpace = 0) const; |
473 | |
474 | private: |
475 | /// Derived types like structures and arrays are sized iff all of the members |
476 | /// of the type are sized as well. Since asking for their size is relatively |
477 | /// uncommon, move this operation out-of-line. |
478 | bool isSizedDerivedType(SmallPtrSetImpl<Type*> *Visited = nullptr) const; |
479 | }; |
480 | |
481 | // Printing of types. |
482 | inline raw_ostream &operator<<(raw_ostream &OS, const Type &T) { |
483 | T.print(OS); |
484 | return OS; |
485 | } |
486 | |
487 | // allow isa<PointerType>(x) to work without DerivedTypes.h included. |
488 | template <> struct isa_impl<PointerType, Type> { |
489 | static inline bool doit(const Type &Ty) { |
490 | return Ty.getTypeID() == Type::PointerTyID; |
491 | } |
492 | }; |
493 | |
494 | // Create wrappers for C Binding types (see CBindingWrapping.h). |
495 | DEFINE_ISA_CONVERSION_FUNCTIONS(Type, LLVMTypeRef)inline Type *unwrap(LLVMTypeRef P) { return reinterpret_cast< Type*>(P); } inline LLVMTypeRef wrap(const Type *P) { return reinterpret_cast<LLVMTypeRef>(const_cast<Type*>( P)); } template<typename T> inline T *unwrap(LLVMTypeRef P) { return cast<T>(unwrap(P)); } |
496 | |
497 | /* Specialized opaque type conversions. |
498 | */ |
499 | inline Type **unwrap(LLVMTypeRef* Tys) { |
500 | return reinterpret_cast<Type**>(Tys); |
501 | } |
502 | |
503 | inline LLVMTypeRef *wrap(Type **Tys) { |
504 | return reinterpret_cast<LLVMTypeRef*>(const_cast<Type**>(Tys)); |
505 | } |
506 | |
507 | } // end namespace llvm |
508 | |
509 | #endif // LLVM_IR_TYPE_H |