File: | llvm/lib/Transforms/IPO/OpenMPOpt.cpp |
Warning: | line 2767, column 48 Called C++ object pointer is null |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===-- IPO/OpenMPOpt.cpp - Collection of OpenMP specific optimizations ---===// | ||||
2 | // | ||||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||||
4 | // See https://llvm.org/LICENSE.txt for license information. | ||||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||||
6 | // | ||||
7 | //===----------------------------------------------------------------------===// | ||||
8 | // | ||||
9 | // OpenMP specific optimizations: | ||||
10 | // | ||||
11 | // - Deduplication of runtime calls, e.g., omp_get_thread_num. | ||||
12 | // - Replacing globalized device memory with stack memory. | ||||
13 | // - Replacing globalized device memory with shared memory. | ||||
14 | // - Parallel region merging. | ||||
15 | // - Transforming generic-mode device kernels to SPMD mode. | ||||
16 | // - Specializing the state machine for generic-mode device kernels. | ||||
17 | // | ||||
18 | //===----------------------------------------------------------------------===// | ||||
19 | |||||
20 | #include "llvm/Transforms/IPO/OpenMPOpt.h" | ||||
21 | |||||
22 | #include "llvm/ADT/EnumeratedArray.h" | ||||
23 | #include "llvm/ADT/PostOrderIterator.h" | ||||
24 | #include "llvm/ADT/SetVector.h" | ||||
25 | #include "llvm/ADT/Statistic.h" | ||||
26 | #include "llvm/ADT/StringRef.h" | ||||
27 | #include "llvm/Analysis/CallGraph.h" | ||||
28 | #include "llvm/Analysis/CallGraphSCCPass.h" | ||||
29 | #include "llvm/Analysis/OptimizationRemarkEmitter.h" | ||||
30 | #include "llvm/Analysis/ValueTracking.h" | ||||
31 | #include "llvm/Frontend/OpenMP/OMPConstants.h" | ||||
32 | #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" | ||||
33 | #include "llvm/IR/Assumptions.h" | ||||
34 | #include "llvm/IR/DiagnosticInfo.h" | ||||
35 | #include "llvm/IR/GlobalValue.h" | ||||
36 | #include "llvm/IR/Instruction.h" | ||||
37 | #include "llvm/IR/IntrinsicInst.h" | ||||
38 | #include "llvm/IR/IntrinsicsAMDGPU.h" | ||||
39 | #include "llvm/IR/IntrinsicsNVPTX.h" | ||||
40 | #include "llvm/InitializePasses.h" | ||||
41 | #include "llvm/Support/CommandLine.h" | ||||
42 | #include "llvm/Transforms/IPO.h" | ||||
43 | #include "llvm/Transforms/IPO/Attributor.h" | ||||
44 | #include "llvm/Transforms/Utils/BasicBlockUtils.h" | ||||
45 | #include "llvm/Transforms/Utils/CallGraphUpdater.h" | ||||
46 | #include "llvm/Transforms/Utils/CodeExtractor.h" | ||||
47 | |||||
48 | #include <algorithm> | ||||
49 | |||||
50 | using namespace llvm; | ||||
51 | using namespace omp; | ||||
52 | |||||
53 | #define DEBUG_TYPE"openmp-opt" "openmp-opt" | ||||
54 | |||||
55 | static cl::opt<bool> DisableOpenMPOptimizations( | ||||
56 | "openmp-opt-disable", cl::ZeroOrMore, | ||||
57 | cl::desc("Disable OpenMP specific optimizations."), cl::Hidden, | ||||
58 | cl::init(false)); | ||||
59 | |||||
60 | static cl::opt<bool> EnableParallelRegionMerging( | ||||
61 | "openmp-opt-enable-merging", cl::ZeroOrMore, | ||||
62 | cl::desc("Enable the OpenMP region merging optimization."), cl::Hidden, | ||||
63 | cl::init(false)); | ||||
64 | |||||
65 | static cl::opt<bool> | ||||
66 | DisableInternalization("openmp-opt-disable-internalization", cl::ZeroOrMore, | ||||
67 | cl::desc("Disable function internalization."), | ||||
68 | cl::Hidden, cl::init(false)); | ||||
69 | |||||
70 | static cl::opt<bool> PrintICVValues("openmp-print-icv-values", cl::init(false), | ||||
71 | cl::Hidden); | ||||
72 | static cl::opt<bool> PrintOpenMPKernels("openmp-print-gpu-kernels", | ||||
73 | cl::init(false), cl::Hidden); | ||||
74 | |||||
75 | static cl::opt<bool> HideMemoryTransferLatency( | ||||
76 | "openmp-hide-memory-transfer-latency", | ||||
77 | cl::desc("[WIP] Tries to hide the latency of host to device memory" | ||||
78 | " transfers"), | ||||
79 | cl::Hidden, cl::init(false)); | ||||
80 | |||||
81 | static cl::opt<bool> DisableOpenMPOptDeglobalization( | ||||
82 | "openmp-opt-disable-deglobalization", cl::ZeroOrMore, | ||||
83 | cl::desc("Disable OpenMP optimizations involving deglobalization."), | ||||
84 | cl::Hidden, cl::init(false)); | ||||
85 | |||||
86 | static cl::opt<bool> DisableOpenMPOptSPMDization( | ||||
87 | "openmp-opt-disable-spmdization", cl::ZeroOrMore, | ||||
88 | cl::desc("Disable OpenMP optimizations involving SPMD-ization."), | ||||
89 | cl::Hidden, cl::init(false)); | ||||
90 | |||||
91 | static cl::opt<bool> DisableOpenMPOptFolding( | ||||
92 | "openmp-opt-disable-folding", cl::ZeroOrMore, | ||||
93 | cl::desc("Disable OpenMP optimizations involving folding."), cl::Hidden, | ||||
94 | cl::init(false)); | ||||
95 | |||||
96 | static cl::opt<bool> DisableOpenMPOptStateMachineRewrite( | ||||
97 | "openmp-opt-disable-state-machine-rewrite", cl::ZeroOrMore, | ||||
98 | cl::desc("Disable OpenMP optimizations that replace the state machine."), | ||||
99 | cl::Hidden, cl::init(false)); | ||||
100 | |||||
101 | static cl::opt<bool> PrintModuleAfterOptimizations( | ||||
102 | "openmp-opt-print-module", cl::ZeroOrMore, | ||||
103 | cl::desc("Print the current module after OpenMP optimizations."), | ||||
104 | cl::Hidden, cl::init(false)); | ||||
105 | |||||
106 | static cl::opt<bool> AlwaysInlineDeviceFunctions( | ||||
107 | "openmp-opt-inline-device", cl::ZeroOrMore, | ||||
108 | cl::desc("Inline all applicible functions on the device."), cl::Hidden, | ||||
109 | cl::init(false)); | ||||
110 | |||||
111 | static cl::opt<bool> | ||||
112 | EnableVerboseRemarks("openmp-opt-verbose-remarks", cl::ZeroOrMore, | ||||
113 | cl::desc("Enables more verbose remarks."), cl::Hidden, | ||||
114 | cl::init(false)); | ||||
115 | |||||
116 | static cl::opt<unsigned> | ||||
117 | SetFixpointIterations("openmp-opt-max-iterations", cl::Hidden, | ||||
118 | cl::desc("Maximal number of attributor iterations."), | ||||
119 | cl::init(256)); | ||||
120 | |||||
121 | STATISTIC(NumOpenMPRuntimeCallsDeduplicated,static llvm::Statistic NumOpenMPRuntimeCallsDeduplicated = {"openmp-opt" , "NumOpenMPRuntimeCallsDeduplicated", "Number of OpenMP runtime calls deduplicated" } | ||||
122 | "Number of OpenMP runtime calls deduplicated")static llvm::Statistic NumOpenMPRuntimeCallsDeduplicated = {"openmp-opt" , "NumOpenMPRuntimeCallsDeduplicated", "Number of OpenMP runtime calls deduplicated" }; | ||||
123 | STATISTIC(NumOpenMPParallelRegionsDeleted,static llvm::Statistic NumOpenMPParallelRegionsDeleted = {"openmp-opt" , "NumOpenMPParallelRegionsDeleted", "Number of OpenMP parallel regions deleted" } | ||||
124 | "Number of OpenMP parallel regions deleted")static llvm::Statistic NumOpenMPParallelRegionsDeleted = {"openmp-opt" , "NumOpenMPParallelRegionsDeleted", "Number of OpenMP parallel regions deleted" }; | ||||
125 | STATISTIC(NumOpenMPRuntimeFunctionsIdentified,static llvm::Statistic NumOpenMPRuntimeFunctionsIdentified = { "openmp-opt", "NumOpenMPRuntimeFunctionsIdentified", "Number of OpenMP runtime functions identified" } | ||||
126 | "Number of OpenMP runtime functions identified")static llvm::Statistic NumOpenMPRuntimeFunctionsIdentified = { "openmp-opt", "NumOpenMPRuntimeFunctionsIdentified", "Number of OpenMP runtime functions identified" }; | ||||
127 | STATISTIC(NumOpenMPRuntimeFunctionUsesIdentified,static llvm::Statistic NumOpenMPRuntimeFunctionUsesIdentified = {"openmp-opt", "NumOpenMPRuntimeFunctionUsesIdentified", "Number of OpenMP runtime function uses identified" } | ||||
128 | "Number of OpenMP runtime function uses identified")static llvm::Statistic NumOpenMPRuntimeFunctionUsesIdentified = {"openmp-opt", "NumOpenMPRuntimeFunctionUsesIdentified", "Number of OpenMP runtime function uses identified" }; | ||||
129 | STATISTIC(NumOpenMPTargetRegionKernels,static llvm::Statistic NumOpenMPTargetRegionKernels = {"openmp-opt" , "NumOpenMPTargetRegionKernels", "Number of OpenMP target region entry points (=kernels) identified" } | ||||
130 | "Number of OpenMP target region entry points (=kernels) identified")static llvm::Statistic NumOpenMPTargetRegionKernels = {"openmp-opt" , "NumOpenMPTargetRegionKernels", "Number of OpenMP target region entry points (=kernels) identified" }; | ||||
131 | STATISTIC(NumOpenMPTargetRegionKernelsSPMD,static llvm::Statistic NumOpenMPTargetRegionKernelsSPMD = {"openmp-opt" , "NumOpenMPTargetRegionKernelsSPMD", "Number of OpenMP target region entry points (=kernels) executed in " "SPMD-mode instead of generic-mode"} | ||||
132 | "Number of OpenMP target region entry points (=kernels) executed in "static llvm::Statistic NumOpenMPTargetRegionKernelsSPMD = {"openmp-opt" , "NumOpenMPTargetRegionKernelsSPMD", "Number of OpenMP target region entry points (=kernels) executed in " "SPMD-mode instead of generic-mode"} | ||||
133 | "SPMD-mode instead of generic-mode")static llvm::Statistic NumOpenMPTargetRegionKernelsSPMD = {"openmp-opt" , "NumOpenMPTargetRegionKernelsSPMD", "Number of OpenMP target region entry points (=kernels) executed in " "SPMD-mode instead of generic-mode"}; | ||||
134 | STATISTIC(NumOpenMPTargetRegionKernelsWithoutStateMachine,static llvm::Statistic NumOpenMPTargetRegionKernelsWithoutStateMachine = {"openmp-opt", "NumOpenMPTargetRegionKernelsWithoutStateMachine" , "Number of OpenMP target region entry points (=kernels) executed in " "generic-mode without a state machines"} | ||||
135 | "Number of OpenMP target region entry points (=kernels) executed in "static llvm::Statistic NumOpenMPTargetRegionKernelsWithoutStateMachine = {"openmp-opt", "NumOpenMPTargetRegionKernelsWithoutStateMachine" , "Number of OpenMP target region entry points (=kernels) executed in " "generic-mode without a state machines"} | ||||
136 | "generic-mode without a state machines")static llvm::Statistic NumOpenMPTargetRegionKernelsWithoutStateMachine = {"openmp-opt", "NumOpenMPTargetRegionKernelsWithoutStateMachine" , "Number of OpenMP target region entry points (=kernels) executed in " "generic-mode without a state machines"}; | ||||
137 | STATISTIC(NumOpenMPTargetRegionKernelsCustomStateMachineWithFallback,static llvm::Statistic NumOpenMPTargetRegionKernelsCustomStateMachineWithFallback = {"openmp-opt", "NumOpenMPTargetRegionKernelsCustomStateMachineWithFallback" , "Number of OpenMP target region entry points (=kernels) executed in " "generic-mode with customized state machines with fallback"} | ||||
138 | "Number of OpenMP target region entry points (=kernels) executed in "static llvm::Statistic NumOpenMPTargetRegionKernelsCustomStateMachineWithFallback = {"openmp-opt", "NumOpenMPTargetRegionKernelsCustomStateMachineWithFallback" , "Number of OpenMP target region entry points (=kernels) executed in " "generic-mode with customized state machines with fallback"} | ||||
139 | "generic-mode with customized state machines with fallback")static llvm::Statistic NumOpenMPTargetRegionKernelsCustomStateMachineWithFallback = {"openmp-opt", "NumOpenMPTargetRegionKernelsCustomStateMachineWithFallback" , "Number of OpenMP target region entry points (=kernels) executed in " "generic-mode with customized state machines with fallback"}; | ||||
140 | STATISTIC(NumOpenMPTargetRegionKernelsCustomStateMachineWithoutFallback,static llvm::Statistic NumOpenMPTargetRegionKernelsCustomStateMachineWithoutFallback = {"openmp-opt", "NumOpenMPTargetRegionKernelsCustomStateMachineWithoutFallback" , "Number of OpenMP target region entry points (=kernels) executed in " "generic-mode with customized state machines without fallback" } | ||||
141 | "Number of OpenMP target region entry points (=kernels) executed in "static llvm::Statistic NumOpenMPTargetRegionKernelsCustomStateMachineWithoutFallback = {"openmp-opt", "NumOpenMPTargetRegionKernelsCustomStateMachineWithoutFallback" , "Number of OpenMP target region entry points (=kernels) executed in " "generic-mode with customized state machines without fallback" } | ||||
142 | "generic-mode with customized state machines without fallback")static llvm::Statistic NumOpenMPTargetRegionKernelsCustomStateMachineWithoutFallback = {"openmp-opt", "NumOpenMPTargetRegionKernelsCustomStateMachineWithoutFallback" , "Number of OpenMP target region entry points (=kernels) executed in " "generic-mode with customized state machines without fallback" }; | ||||
143 | STATISTIC(static llvm::Statistic NumOpenMPParallelRegionsReplacedInGPUStateMachine = {"openmp-opt", "NumOpenMPParallelRegionsReplacedInGPUStateMachine" , "Number of OpenMP parallel regions replaced with ID in GPU state machines" } | ||||
144 | NumOpenMPParallelRegionsReplacedInGPUStateMachine,static llvm::Statistic NumOpenMPParallelRegionsReplacedInGPUStateMachine = {"openmp-opt", "NumOpenMPParallelRegionsReplacedInGPUStateMachine" , "Number of OpenMP parallel regions replaced with ID in GPU state machines" } | ||||
145 | "Number of OpenMP parallel regions replaced with ID in GPU state machines")static llvm::Statistic NumOpenMPParallelRegionsReplacedInGPUStateMachine = {"openmp-opt", "NumOpenMPParallelRegionsReplacedInGPUStateMachine" , "Number of OpenMP parallel regions replaced with ID in GPU state machines" }; | ||||
146 | STATISTIC(NumOpenMPParallelRegionsMerged,static llvm::Statistic NumOpenMPParallelRegionsMerged = {"openmp-opt" , "NumOpenMPParallelRegionsMerged", "Number of OpenMP parallel regions merged" } | ||||
147 | "Number of OpenMP parallel regions merged")static llvm::Statistic NumOpenMPParallelRegionsMerged = {"openmp-opt" , "NumOpenMPParallelRegionsMerged", "Number of OpenMP parallel regions merged" }; | ||||
148 | STATISTIC(NumBytesMovedToSharedMemory,static llvm::Statistic NumBytesMovedToSharedMemory = {"openmp-opt" , "NumBytesMovedToSharedMemory", "Amount of memory pushed to shared memory" } | ||||
149 | "Amount of memory pushed to shared memory")static llvm::Statistic NumBytesMovedToSharedMemory = {"openmp-opt" , "NumBytesMovedToSharedMemory", "Amount of memory pushed to shared memory" }; | ||||
150 | |||||
151 | #if !defined(NDEBUG) | ||||
152 | static constexpr auto TAG = "[" DEBUG_TYPE"openmp-opt" "]"; | ||||
153 | #endif | ||||
154 | |||||
155 | namespace { | ||||
156 | |||||
157 | struct AAHeapToShared; | ||||
158 | |||||
159 | struct AAICVTracker; | ||||
160 | |||||
161 | /// OpenMP specific information. For now, stores RFIs and ICVs also needed for | ||||
162 | /// Attributor runs. | ||||
163 | struct OMPInformationCache : public InformationCache { | ||||
164 | OMPInformationCache(Module &M, AnalysisGetter &AG, | ||||
165 | BumpPtrAllocator &Allocator, SetVector<Function *> &CGSCC, | ||||
166 | SmallPtrSetImpl<Kernel> &Kernels) | ||||
167 | : InformationCache(M, AG, Allocator, &CGSCC), OMPBuilder(M), | ||||
168 | Kernels(Kernels) { | ||||
169 | |||||
170 | OMPBuilder.initialize(); | ||||
171 | initializeRuntimeFunctions(); | ||||
172 | initializeInternalControlVars(); | ||||
173 | } | ||||
174 | |||||
175 | /// Generic information that describes an internal control variable. | ||||
176 | struct InternalControlVarInfo { | ||||
177 | /// The kind, as described by InternalControlVar enum. | ||||
178 | InternalControlVar Kind; | ||||
179 | |||||
180 | /// The name of the ICV. | ||||
181 | StringRef Name; | ||||
182 | |||||
183 | /// Environment variable associated with this ICV. | ||||
184 | StringRef EnvVarName; | ||||
185 | |||||
186 | /// Initial value kind. | ||||
187 | ICVInitValue InitKind; | ||||
188 | |||||
189 | /// Initial value. | ||||
190 | ConstantInt *InitValue; | ||||
191 | |||||
192 | /// Setter RTL function associated with this ICV. | ||||
193 | RuntimeFunction Setter; | ||||
194 | |||||
195 | /// Getter RTL function associated with this ICV. | ||||
196 | RuntimeFunction Getter; | ||||
197 | |||||
198 | /// RTL Function corresponding to the override clause of this ICV | ||||
199 | RuntimeFunction Clause; | ||||
200 | }; | ||||
201 | |||||
202 | /// Generic information that describes a runtime function | ||||
203 | struct RuntimeFunctionInfo { | ||||
204 | |||||
205 | /// The kind, as described by the RuntimeFunction enum. | ||||
206 | RuntimeFunction Kind; | ||||
207 | |||||
208 | /// The name of the function. | ||||
209 | StringRef Name; | ||||
210 | |||||
211 | /// Flag to indicate a variadic function. | ||||
212 | bool IsVarArg; | ||||
213 | |||||
214 | /// The return type of the function. | ||||
215 | Type *ReturnType; | ||||
216 | |||||
217 | /// The argument types of the function. | ||||
218 | SmallVector<Type *, 8> ArgumentTypes; | ||||
219 | |||||
220 | /// The declaration if available. | ||||
221 | Function *Declaration = nullptr; | ||||
222 | |||||
223 | /// Uses of this runtime function per function containing the use. | ||||
224 | using UseVector = SmallVector<Use *, 16>; | ||||
225 | |||||
226 | /// Clear UsesMap for runtime function. | ||||
227 | void clearUsesMap() { UsesMap.clear(); } | ||||
228 | |||||
229 | /// Boolean conversion that is true if the runtime function was found. | ||||
230 | operator bool() const { return Declaration; } | ||||
231 | |||||
232 | /// Return the vector of uses in function \p F. | ||||
233 | UseVector &getOrCreateUseVector(Function *F) { | ||||
234 | std::shared_ptr<UseVector> &UV = UsesMap[F]; | ||||
235 | if (!UV) | ||||
236 | UV = std::make_shared<UseVector>(); | ||||
237 | return *UV; | ||||
238 | } | ||||
239 | |||||
240 | /// Return the vector of uses in function \p F or `nullptr` if there are | ||||
241 | /// none. | ||||
242 | const UseVector *getUseVector(Function &F) const { | ||||
243 | auto I = UsesMap.find(&F); | ||||
244 | if (I != UsesMap.end()) | ||||
245 | return I->second.get(); | ||||
246 | return nullptr; | ||||
247 | } | ||||
248 | |||||
249 | /// Return how many functions contain uses of this runtime function. | ||||
250 | size_t getNumFunctionsWithUses() const { return UsesMap.size(); } | ||||
251 | |||||
252 | /// Return the number of arguments (or the minimal number for variadic | ||||
253 | /// functions). | ||||
254 | size_t getNumArgs() const { return ArgumentTypes.size(); } | ||||
255 | |||||
256 | /// Run the callback \p CB on each use and forget the use if the result is | ||||
257 | /// true. The callback will be fed the function in which the use was | ||||
258 | /// encountered as second argument. | ||||
259 | void foreachUse(SmallVectorImpl<Function *> &SCC, | ||||
260 | function_ref<bool(Use &, Function &)> CB) { | ||||
261 | for (Function *F : SCC) | ||||
262 | foreachUse(CB, F); | ||||
263 | } | ||||
264 | |||||
265 | /// Run the callback \p CB on each use within the function \p F and forget | ||||
266 | /// the use if the result is true. | ||||
267 | void foreachUse(function_ref<bool(Use &, Function &)> CB, Function *F) { | ||||
268 | SmallVector<unsigned, 8> ToBeDeleted; | ||||
269 | ToBeDeleted.clear(); | ||||
270 | |||||
271 | unsigned Idx = 0; | ||||
272 | UseVector &UV = getOrCreateUseVector(F); | ||||
273 | |||||
274 | for (Use *U : UV) { | ||||
275 | if (CB(*U, *F)) | ||||
276 | ToBeDeleted.push_back(Idx); | ||||
277 | ++Idx; | ||||
278 | } | ||||
279 | |||||
280 | // Remove the to-be-deleted indices in reverse order as prior | ||||
281 | // modifications will not modify the smaller indices. | ||||
282 | while (!ToBeDeleted.empty()) { | ||||
283 | unsigned Idx = ToBeDeleted.pop_back_val(); | ||||
284 | UV[Idx] = UV.back(); | ||||
285 | UV.pop_back(); | ||||
286 | } | ||||
287 | } | ||||
288 | |||||
289 | private: | ||||
290 | /// Map from functions to all uses of this runtime function contained in | ||||
291 | /// them. | ||||
292 | DenseMap<Function *, std::shared_ptr<UseVector>> UsesMap; | ||||
293 | |||||
294 | public: | ||||
295 | /// Iterators for the uses of this runtime function. | ||||
296 | decltype(UsesMap)::iterator begin() { return UsesMap.begin(); } | ||||
297 | decltype(UsesMap)::iterator end() { return UsesMap.end(); } | ||||
298 | }; | ||||
299 | |||||
300 | /// An OpenMP-IR-Builder instance | ||||
301 | OpenMPIRBuilder OMPBuilder; | ||||
302 | |||||
303 | /// Map from runtime function kind to the runtime function description. | ||||
304 | EnumeratedArray<RuntimeFunctionInfo, RuntimeFunction, | ||||
305 | RuntimeFunction::OMPRTL___last> | ||||
306 | RFIs; | ||||
307 | |||||
308 | /// Map from function declarations/definitions to their runtime enum type. | ||||
309 | DenseMap<Function *, RuntimeFunction> RuntimeFunctionIDMap; | ||||
310 | |||||
311 | /// Map from ICV kind to the ICV description. | ||||
312 | EnumeratedArray<InternalControlVarInfo, InternalControlVar, | ||||
313 | InternalControlVar::ICV___last> | ||||
314 | ICVs; | ||||
315 | |||||
316 | /// Helper to initialize all internal control variable information for those | ||||
317 | /// defined in OMPKinds.def. | ||||
318 | void initializeInternalControlVars() { | ||||
319 | #define ICV_RT_SET(_Name, RTL) \ | ||||
320 | { \ | ||||
321 | auto &ICV = ICVs[_Name]; \ | ||||
322 | ICV.Setter = RTL; \ | ||||
323 | } | ||||
324 | #define ICV_RT_GET(Name, RTL) \ | ||||
325 | { \ | ||||
326 | auto &ICV = ICVs[Name]; \ | ||||
327 | ICV.Getter = RTL; \ | ||||
328 | } | ||||
329 | #define ICV_DATA_ENV(Enum, _Name, _EnvVarName, Init) \ | ||||
330 | { \ | ||||
331 | auto &ICV = ICVs[Enum]; \ | ||||
332 | ICV.Name = _Name; \ | ||||
333 | ICV.Kind = Enum; \ | ||||
334 | ICV.InitKind = Init; \ | ||||
335 | ICV.EnvVarName = _EnvVarName; \ | ||||
336 | switch (ICV.InitKind) { \ | ||||
337 | case ICV_IMPLEMENTATION_DEFINED: \ | ||||
338 | ICV.InitValue = nullptr; \ | ||||
339 | break; \ | ||||
340 | case ICV_ZERO: \ | ||||
341 | ICV.InitValue = ConstantInt::get( \ | ||||
342 | Type::getInt32Ty(OMPBuilder.Int32->getContext()), 0); \ | ||||
343 | break; \ | ||||
344 | case ICV_FALSE: \ | ||||
345 | ICV.InitValue = ConstantInt::getFalse(OMPBuilder.Int1->getContext()); \ | ||||
346 | break; \ | ||||
347 | case ICV_LAST: \ | ||||
348 | break; \ | ||||
349 | } \ | ||||
350 | } | ||||
351 | #include "llvm/Frontend/OpenMP/OMPKinds.def" | ||||
352 | } | ||||
353 | |||||
354 | /// Returns true if the function declaration \p F matches the runtime | ||||
355 | /// function types, that is, return type \p RTFRetType, and argument types | ||||
356 | /// \p RTFArgTypes. | ||||
357 | static bool declMatchesRTFTypes(Function *F, Type *RTFRetType, | ||||
358 | SmallVector<Type *, 8> &RTFArgTypes) { | ||||
359 | // TODO: We should output information to the user (under debug output | ||||
360 | // and via remarks). | ||||
361 | |||||
362 | if (!F) | ||||
363 | return false; | ||||
364 | if (F->getReturnType() != RTFRetType) | ||||
365 | return false; | ||||
366 | if (F->arg_size() != RTFArgTypes.size()) | ||||
367 | return false; | ||||
368 | |||||
369 | auto *RTFTyIt = RTFArgTypes.begin(); | ||||
370 | for (Argument &Arg : F->args()) { | ||||
371 | if (Arg.getType() != *RTFTyIt) | ||||
372 | return false; | ||||
373 | |||||
374 | ++RTFTyIt; | ||||
375 | } | ||||
376 | |||||
377 | return true; | ||||
378 | } | ||||
379 | |||||
380 | // Helper to collect all uses of the declaration in the UsesMap. | ||||
381 | unsigned collectUses(RuntimeFunctionInfo &RFI, bool CollectStats = true) { | ||||
382 | unsigned NumUses = 0; | ||||
383 | if (!RFI.Declaration) | ||||
384 | return NumUses; | ||||
385 | OMPBuilder.addAttributes(RFI.Kind, *RFI.Declaration); | ||||
386 | |||||
387 | if (CollectStats) { | ||||
388 | NumOpenMPRuntimeFunctionsIdentified += 1; | ||||
389 | NumOpenMPRuntimeFunctionUsesIdentified += RFI.Declaration->getNumUses(); | ||||
390 | } | ||||
391 | |||||
392 | // TODO: We directly convert uses into proper calls and unknown uses. | ||||
393 | for (Use &U : RFI.Declaration->uses()) { | ||||
394 | if (Instruction *UserI = dyn_cast<Instruction>(U.getUser())) { | ||||
395 | if (ModuleSlice.count(UserI->getFunction())) { | ||||
396 | RFI.getOrCreateUseVector(UserI->getFunction()).push_back(&U); | ||||
397 | ++NumUses; | ||||
398 | } | ||||
399 | } else { | ||||
400 | RFI.getOrCreateUseVector(nullptr).push_back(&U); | ||||
401 | ++NumUses; | ||||
402 | } | ||||
403 | } | ||||
404 | return NumUses; | ||||
405 | } | ||||
406 | |||||
407 | // Helper function to recollect uses of a runtime function. | ||||
408 | void recollectUsesForFunction(RuntimeFunction RTF) { | ||||
409 | auto &RFI = RFIs[RTF]; | ||||
410 | RFI.clearUsesMap(); | ||||
411 | collectUses(RFI, /*CollectStats*/ false); | ||||
412 | } | ||||
413 | |||||
414 | // Helper function to recollect uses of all runtime functions. | ||||
415 | void recollectUses() { | ||||
416 | for (int Idx = 0; Idx < RFIs.size(); ++Idx) | ||||
417 | recollectUsesForFunction(static_cast<RuntimeFunction>(Idx)); | ||||
418 | } | ||||
419 | |||||
420 | /// Helper to initialize all runtime function information for those defined | ||||
421 | /// in OpenMPKinds.def. | ||||
422 | void initializeRuntimeFunctions() { | ||||
423 | Module &M = *((*ModuleSlice.begin())->getParent()); | ||||
424 | |||||
425 | // Helper macros for handling __VA_ARGS__ in OMP_RTL | ||||
426 | #define OMP_TYPE(VarName, ...) \ | ||||
427 | Type *VarName = OMPBuilder.VarName; \ | ||||
428 | (void)VarName; | ||||
429 | |||||
430 | #define OMP_ARRAY_TYPE(VarName, ...) \ | ||||
431 | ArrayType *VarName##Ty = OMPBuilder.VarName##Ty; \ | ||||
432 | (void)VarName##Ty; \ | ||||
433 | PointerType *VarName##PtrTy = OMPBuilder.VarName##PtrTy; \ | ||||
434 | (void)VarName##PtrTy; | ||||
435 | |||||
436 | #define OMP_FUNCTION_TYPE(VarName, ...) \ | ||||
437 | FunctionType *VarName = OMPBuilder.VarName; \ | ||||
438 | (void)VarName; \ | ||||
439 | PointerType *VarName##Ptr = OMPBuilder.VarName##Ptr; \ | ||||
440 | (void)VarName##Ptr; | ||||
441 | |||||
442 | #define OMP_STRUCT_TYPE(VarName, ...) \ | ||||
443 | StructType *VarName = OMPBuilder.VarName; \ | ||||
444 | (void)VarName; \ | ||||
445 | PointerType *VarName##Ptr = OMPBuilder.VarName##Ptr; \ | ||||
446 | (void)VarName##Ptr; | ||||
447 | |||||
448 | #define OMP_RTL(_Enum, _Name, _IsVarArg, _ReturnType, ...) \ | ||||
449 | { \ | ||||
450 | SmallVector<Type *, 8> ArgsTypes({__VA_ARGS__}); \ | ||||
451 | Function *F = M.getFunction(_Name); \ | ||||
452 | RTLFunctions.insert(F); \ | ||||
453 | if (declMatchesRTFTypes(F, OMPBuilder._ReturnType, ArgsTypes)) { \ | ||||
454 | RuntimeFunctionIDMap[F] = _Enum; \ | ||||
455 | F->removeFnAttr(Attribute::NoInline); \ | ||||
456 | auto &RFI = RFIs[_Enum]; \ | ||||
457 | RFI.Kind = _Enum; \ | ||||
458 | RFI.Name = _Name; \ | ||||
459 | RFI.IsVarArg = _IsVarArg; \ | ||||
460 | RFI.ReturnType = OMPBuilder._ReturnType; \ | ||||
461 | RFI.ArgumentTypes = std::move(ArgsTypes); \ | ||||
462 | RFI.Declaration = F; \ | ||||
463 | unsigned NumUses = collectUses(RFI); \ | ||||
464 | (void)NumUses; \ | ||||
465 | LLVM_DEBUG({ \do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { { dbgs() << TAG << RFI.Name << (RFI.Declaration ? "" : " not") << " found\n"; if (RFI .Declaration) dbgs() << TAG << "-> got " << NumUses << " uses in " << RFI.getNumFunctionsWithUses () << " different functions.\n"; }; } } while (false) | ||||
466 | dbgs() << TAG << RFI.Name << (RFI.Declaration ? "" : " not") \do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { { dbgs() << TAG << RFI.Name << (RFI.Declaration ? "" : " not") << " found\n"; if (RFI .Declaration) dbgs() << TAG << "-> got " << NumUses << " uses in " << RFI.getNumFunctionsWithUses () << " different functions.\n"; }; } } while (false) | ||||
467 | << " found\n"; \do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { { dbgs() << TAG << RFI.Name << (RFI.Declaration ? "" : " not") << " found\n"; if (RFI .Declaration) dbgs() << TAG << "-> got " << NumUses << " uses in " << RFI.getNumFunctionsWithUses () << " different functions.\n"; }; } } while (false) | ||||
468 | if (RFI.Declaration) \do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { { dbgs() << TAG << RFI.Name << (RFI.Declaration ? "" : " not") << " found\n"; if (RFI .Declaration) dbgs() << TAG << "-> got " << NumUses << " uses in " << RFI.getNumFunctionsWithUses () << " different functions.\n"; }; } } while (false) | ||||
469 | dbgs() << TAG << "-> got " << NumUses << " uses in " \do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { { dbgs() << TAG << RFI.Name << (RFI.Declaration ? "" : " not") << " found\n"; if (RFI .Declaration) dbgs() << TAG << "-> got " << NumUses << " uses in " << RFI.getNumFunctionsWithUses () << " different functions.\n"; }; } } while (false) | ||||
470 | << RFI.getNumFunctionsWithUses() \do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { { dbgs() << TAG << RFI.Name << (RFI.Declaration ? "" : " not") << " found\n"; if (RFI .Declaration) dbgs() << TAG << "-> got " << NumUses << " uses in " << RFI.getNumFunctionsWithUses () << " different functions.\n"; }; } } while (false) | ||||
471 | << " different functions.\n"; \do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { { dbgs() << TAG << RFI.Name << (RFI.Declaration ? "" : " not") << " found\n"; if (RFI .Declaration) dbgs() << TAG << "-> got " << NumUses << " uses in " << RFI.getNumFunctionsWithUses () << " different functions.\n"; }; } } while (false) | ||||
472 | })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { { dbgs() << TAG << RFI.Name << (RFI.Declaration ? "" : " not") << " found\n"; if (RFI .Declaration) dbgs() << TAG << "-> got " << NumUses << " uses in " << RFI.getNumFunctionsWithUses () << " different functions.\n"; }; } } while (false); \ | ||||
473 | } \ | ||||
474 | } | ||||
475 | #include "llvm/Frontend/OpenMP/OMPKinds.def" | ||||
476 | |||||
477 | // TODO: We should attach the attributes defined in OMPKinds.def. | ||||
478 | } | ||||
479 | |||||
480 | /// Collection of known kernels (\see Kernel) in the module. | ||||
481 | SmallPtrSetImpl<Kernel> &Kernels; | ||||
482 | |||||
483 | /// Collection of known OpenMP runtime functions.. | ||||
484 | DenseSet<const Function *> RTLFunctions; | ||||
485 | }; | ||||
486 | |||||
487 | template <typename Ty, bool InsertInvalidates = true> | ||||
488 | struct BooleanStateWithSetVector : public BooleanState { | ||||
489 | bool contains(const Ty &Elem) const { return Set.contains(Elem); } | ||||
490 | bool insert(const Ty &Elem) { | ||||
491 | if (InsertInvalidates) | ||||
492 | BooleanState::indicatePessimisticFixpoint(); | ||||
493 | return Set.insert(Elem); | ||||
494 | } | ||||
495 | |||||
496 | const Ty &operator[](int Idx) const { return Set[Idx]; } | ||||
497 | bool operator==(const BooleanStateWithSetVector &RHS) const { | ||||
498 | return BooleanState::operator==(RHS) && Set == RHS.Set; | ||||
499 | } | ||||
500 | bool operator!=(const BooleanStateWithSetVector &RHS) const { | ||||
501 | return !(*this == RHS); | ||||
502 | } | ||||
503 | |||||
504 | bool empty() const { return Set.empty(); } | ||||
505 | size_t size() const { return Set.size(); } | ||||
506 | |||||
507 | /// "Clamp" this state with \p RHS. | ||||
508 | BooleanStateWithSetVector &operator^=(const BooleanStateWithSetVector &RHS) { | ||||
509 | BooleanState::operator^=(RHS); | ||||
510 | Set.insert(RHS.Set.begin(), RHS.Set.end()); | ||||
511 | return *this; | ||||
512 | } | ||||
513 | |||||
514 | private: | ||||
515 | /// A set to keep track of elements. | ||||
516 | SetVector<Ty> Set; | ||||
517 | |||||
518 | public: | ||||
519 | typename decltype(Set)::iterator begin() { return Set.begin(); } | ||||
520 | typename decltype(Set)::iterator end() { return Set.end(); } | ||||
521 | typename decltype(Set)::const_iterator begin() const { return Set.begin(); } | ||||
522 | typename decltype(Set)::const_iterator end() const { return Set.end(); } | ||||
523 | }; | ||||
524 | |||||
525 | template <typename Ty, bool InsertInvalidates = true> | ||||
526 | using BooleanStateWithPtrSetVector = | ||||
527 | BooleanStateWithSetVector<Ty *, InsertInvalidates>; | ||||
528 | |||||
529 | struct KernelInfoState : AbstractState { | ||||
530 | /// Flag to track if we reached a fixpoint. | ||||
531 | bool IsAtFixpoint = false; | ||||
532 | |||||
533 | /// The parallel regions (identified by the outlined parallel functions) that | ||||
534 | /// can be reached from the associated function. | ||||
535 | BooleanStateWithPtrSetVector<Function, /* InsertInvalidates */ false> | ||||
536 | ReachedKnownParallelRegions; | ||||
537 | |||||
538 | /// State to track what parallel region we might reach. | ||||
539 | BooleanStateWithPtrSetVector<CallBase> ReachedUnknownParallelRegions; | ||||
540 | |||||
541 | /// State to track if we are in SPMD-mode, assumed or know, and why we decided | ||||
542 | /// we cannot be. If it is assumed, then RequiresFullRuntime should also be | ||||
543 | /// false. | ||||
544 | BooleanStateWithPtrSetVector<Instruction, false> SPMDCompatibilityTracker; | ||||
545 | |||||
546 | /// The __kmpc_target_init call in this kernel, if any. If we find more than | ||||
547 | /// one we abort as the kernel is malformed. | ||||
548 | CallBase *KernelInitCB = nullptr; | ||||
549 | |||||
550 | /// The __kmpc_target_deinit call in this kernel, if any. If we find more than | ||||
551 | /// one we abort as the kernel is malformed. | ||||
552 | CallBase *KernelDeinitCB = nullptr; | ||||
553 | |||||
554 | /// Flag to indicate if the associated function is a kernel entry. | ||||
555 | bool IsKernelEntry = false; | ||||
556 | |||||
557 | /// State to track what kernel entries can reach the associated function. | ||||
558 | BooleanStateWithPtrSetVector<Function, false> ReachingKernelEntries; | ||||
559 | |||||
560 | /// State to indicate if we can track parallel level of the associated | ||||
561 | /// function. We will give up tracking if we encounter unknown caller or the | ||||
562 | /// caller is __kmpc_parallel_51. | ||||
563 | BooleanStateWithSetVector<uint8_t> ParallelLevels; | ||||
564 | |||||
565 | /// Abstract State interface | ||||
566 | ///{ | ||||
567 | |||||
568 | KernelInfoState() {} | ||||
569 | KernelInfoState(bool BestState) { | ||||
570 | if (!BestState) | ||||
571 | indicatePessimisticFixpoint(); | ||||
572 | } | ||||
573 | |||||
574 | /// See AbstractState::isValidState(...) | ||||
575 | bool isValidState() const override { return true; } | ||||
576 | |||||
577 | /// See AbstractState::isAtFixpoint(...) | ||||
578 | bool isAtFixpoint() const override { return IsAtFixpoint; } | ||||
579 | |||||
580 | /// See AbstractState::indicatePessimisticFixpoint(...) | ||||
581 | ChangeStatus indicatePessimisticFixpoint() override { | ||||
582 | IsAtFixpoint = true; | ||||
583 | ReachingKernelEntries.indicatePessimisticFixpoint(); | ||||
584 | SPMDCompatibilityTracker.indicatePessimisticFixpoint(); | ||||
585 | ReachedKnownParallelRegions.indicatePessimisticFixpoint(); | ||||
586 | ReachedUnknownParallelRegions.indicatePessimisticFixpoint(); | ||||
587 | return ChangeStatus::CHANGED; | ||||
588 | } | ||||
589 | |||||
590 | /// See AbstractState::indicateOptimisticFixpoint(...) | ||||
591 | ChangeStatus indicateOptimisticFixpoint() override { | ||||
592 | IsAtFixpoint = true; | ||||
593 | ReachingKernelEntries.indicateOptimisticFixpoint(); | ||||
594 | SPMDCompatibilityTracker.indicateOptimisticFixpoint(); | ||||
595 | ReachedKnownParallelRegions.indicateOptimisticFixpoint(); | ||||
596 | ReachedUnknownParallelRegions.indicateOptimisticFixpoint(); | ||||
597 | return ChangeStatus::UNCHANGED; | ||||
598 | } | ||||
599 | |||||
600 | /// Return the assumed state | ||||
601 | KernelInfoState &getAssumed() { return *this; } | ||||
602 | const KernelInfoState &getAssumed() const { return *this; } | ||||
603 | |||||
604 | bool operator==(const KernelInfoState &RHS) const { | ||||
605 | if (SPMDCompatibilityTracker != RHS.SPMDCompatibilityTracker) | ||||
606 | return false; | ||||
607 | if (ReachedKnownParallelRegions != RHS.ReachedKnownParallelRegions) | ||||
608 | return false; | ||||
609 | if (ReachedUnknownParallelRegions != RHS.ReachedUnknownParallelRegions) | ||||
610 | return false; | ||||
611 | if (ReachingKernelEntries != RHS.ReachingKernelEntries) | ||||
612 | return false; | ||||
613 | return true; | ||||
614 | } | ||||
615 | |||||
616 | /// Returns true if this kernel contains any OpenMP parallel regions. | ||||
617 | bool mayContainParallelRegion() { | ||||
618 | return !ReachedKnownParallelRegions.empty() || | ||||
619 | !ReachedUnknownParallelRegions.empty(); | ||||
620 | } | ||||
621 | |||||
622 | /// Return empty set as the best state of potential values. | ||||
623 | static KernelInfoState getBestState() { return KernelInfoState(true); } | ||||
624 | |||||
625 | static KernelInfoState getBestState(KernelInfoState &KIS) { | ||||
626 | return getBestState(); | ||||
627 | } | ||||
628 | |||||
629 | /// Return full set as the worst state of potential values. | ||||
630 | static KernelInfoState getWorstState() { return KernelInfoState(false); } | ||||
631 | |||||
632 | /// "Clamp" this state with \p KIS. | ||||
633 | KernelInfoState operator^=(const KernelInfoState &KIS) { | ||||
634 | // Do not merge two different _init and _deinit call sites. | ||||
635 | if (KIS.KernelInitCB) { | ||||
636 | if (KernelInitCB && KernelInitCB != KIS.KernelInitCB) | ||||
637 | llvm_unreachable("Kernel that calls another kernel violates OpenMP-Opt "::llvm::llvm_unreachable_internal("Kernel that calls another kernel violates OpenMP-Opt " "assumptions.", "llvm/lib/Transforms/IPO/OpenMPOpt.cpp", 638 ) | ||||
638 | "assumptions.")::llvm::llvm_unreachable_internal("Kernel that calls another kernel violates OpenMP-Opt " "assumptions.", "llvm/lib/Transforms/IPO/OpenMPOpt.cpp", 638 ); | ||||
639 | KernelInitCB = KIS.KernelInitCB; | ||||
640 | } | ||||
641 | if (KIS.KernelDeinitCB) { | ||||
642 | if (KernelDeinitCB && KernelDeinitCB != KIS.KernelDeinitCB) | ||||
643 | llvm_unreachable("Kernel that calls another kernel violates OpenMP-Opt "::llvm::llvm_unreachable_internal("Kernel that calls another kernel violates OpenMP-Opt " "assumptions.", "llvm/lib/Transforms/IPO/OpenMPOpt.cpp", 644 ) | ||||
644 | "assumptions.")::llvm::llvm_unreachable_internal("Kernel that calls another kernel violates OpenMP-Opt " "assumptions.", "llvm/lib/Transforms/IPO/OpenMPOpt.cpp", 644 ); | ||||
645 | KernelDeinitCB = KIS.KernelDeinitCB; | ||||
646 | } | ||||
647 | SPMDCompatibilityTracker ^= KIS.SPMDCompatibilityTracker; | ||||
648 | ReachedKnownParallelRegions ^= KIS.ReachedKnownParallelRegions; | ||||
649 | ReachedUnknownParallelRegions ^= KIS.ReachedUnknownParallelRegions; | ||||
650 | return *this; | ||||
651 | } | ||||
652 | |||||
653 | KernelInfoState operator&=(const KernelInfoState &KIS) { | ||||
654 | return (*this ^= KIS); | ||||
655 | } | ||||
656 | |||||
657 | ///} | ||||
658 | }; | ||||
659 | |||||
660 | /// Used to map the values physically (in the IR) stored in an offload | ||||
661 | /// array, to a vector in memory. | ||||
662 | struct OffloadArray { | ||||
663 | /// Physical array (in the IR). | ||||
664 | AllocaInst *Array = nullptr; | ||||
665 | /// Mapped values. | ||||
666 | SmallVector<Value *, 8> StoredValues; | ||||
667 | /// Last stores made in the offload array. | ||||
668 | SmallVector<StoreInst *, 8> LastAccesses; | ||||
669 | |||||
670 | OffloadArray() = default; | ||||
671 | |||||
672 | /// Initializes the OffloadArray with the values stored in \p Array before | ||||
673 | /// instruction \p Before is reached. Returns false if the initialization | ||||
674 | /// fails. | ||||
675 | /// This MUST be used immediately after the construction of the object. | ||||
676 | bool initialize(AllocaInst &Array, Instruction &Before) { | ||||
677 | if (!Array.getAllocatedType()->isArrayTy()) | ||||
678 | return false; | ||||
679 | |||||
680 | if (!getValues(Array, Before)) | ||||
681 | return false; | ||||
682 | |||||
683 | this->Array = &Array; | ||||
684 | return true; | ||||
685 | } | ||||
686 | |||||
687 | static const unsigned DeviceIDArgNum = 1; | ||||
688 | static const unsigned BasePtrsArgNum = 3; | ||||
689 | static const unsigned PtrsArgNum = 4; | ||||
690 | static const unsigned SizesArgNum = 5; | ||||
691 | |||||
692 | private: | ||||
693 | /// Traverses the BasicBlock where \p Array is, collecting the stores made to | ||||
694 | /// \p Array, leaving StoredValues with the values stored before the | ||||
695 | /// instruction \p Before is reached. | ||||
696 | bool getValues(AllocaInst &Array, Instruction &Before) { | ||||
697 | // Initialize container. | ||||
698 | const uint64_t NumValues = Array.getAllocatedType()->getArrayNumElements(); | ||||
699 | StoredValues.assign(NumValues, nullptr); | ||||
700 | LastAccesses.assign(NumValues, nullptr); | ||||
701 | |||||
702 | // TODO: This assumes the instruction \p Before is in the same | ||||
703 | // BasicBlock as Array. Make it general, for any control flow graph. | ||||
704 | BasicBlock *BB = Array.getParent(); | ||||
705 | if (BB != Before.getParent()) | ||||
706 | return false; | ||||
707 | |||||
708 | const DataLayout &DL = Array.getModule()->getDataLayout(); | ||||
709 | const unsigned int PointerSize = DL.getPointerSize(); | ||||
710 | |||||
711 | for (Instruction &I : *BB) { | ||||
712 | if (&I == &Before) | ||||
713 | break; | ||||
714 | |||||
715 | if (!isa<StoreInst>(&I)) | ||||
716 | continue; | ||||
717 | |||||
718 | auto *S = cast<StoreInst>(&I); | ||||
719 | int64_t Offset = -1; | ||||
720 | auto *Dst = | ||||
721 | GetPointerBaseWithConstantOffset(S->getPointerOperand(), Offset, DL); | ||||
722 | if (Dst == &Array) { | ||||
723 | int64_t Idx = Offset / PointerSize; | ||||
724 | StoredValues[Idx] = getUnderlyingObject(S->getValueOperand()); | ||||
725 | LastAccesses[Idx] = S; | ||||
726 | } | ||||
727 | } | ||||
728 | |||||
729 | return isFilled(); | ||||
730 | } | ||||
731 | |||||
732 | /// Returns true if all values in StoredValues and | ||||
733 | /// LastAccesses are not nullptrs. | ||||
734 | bool isFilled() { | ||||
735 | const unsigned NumValues = StoredValues.size(); | ||||
736 | for (unsigned I = 0; I < NumValues; ++I) { | ||||
737 | if (!StoredValues[I] || !LastAccesses[I]) | ||||
738 | return false; | ||||
739 | } | ||||
740 | |||||
741 | return true; | ||||
742 | } | ||||
743 | }; | ||||
744 | |||||
745 | struct OpenMPOpt { | ||||
746 | |||||
747 | using OptimizationRemarkGetter = | ||||
748 | function_ref<OptimizationRemarkEmitter &(Function *)>; | ||||
749 | |||||
750 | OpenMPOpt(SmallVectorImpl<Function *> &SCC, CallGraphUpdater &CGUpdater, | ||||
751 | OptimizationRemarkGetter OREGetter, | ||||
752 | OMPInformationCache &OMPInfoCache, Attributor &A) | ||||
753 | : M(*(*SCC.begin())->getParent()), SCC(SCC), CGUpdater(CGUpdater), | ||||
754 | OREGetter(OREGetter), OMPInfoCache(OMPInfoCache), A(A) {} | ||||
755 | |||||
756 | /// Check if any remarks are enabled for openmp-opt | ||||
757 | bool remarksEnabled() { | ||||
758 | auto &Ctx = M.getContext(); | ||||
759 | return Ctx.getDiagHandlerPtr()->isAnyRemarkEnabled(DEBUG_TYPE"openmp-opt"); | ||||
760 | } | ||||
761 | |||||
762 | /// Run all OpenMP optimizations on the underlying SCC/ModuleSlice. | ||||
763 | bool run(bool IsModulePass) { | ||||
764 | if (SCC.empty()) | ||||
765 | return false; | ||||
766 | |||||
767 | bool Changed = false; | ||||
768 | |||||
769 | LLVM_DEBUG(dbgs() << TAG << "Run on SCC with " << SCC.size()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { dbgs() << TAG << "Run on SCC with " << SCC.size() << " functions in a slice with " << OMPInfoCache.ModuleSlice.size() << " functions\n"; } } while (false) | ||||
770 | << " functions in a slice with "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { dbgs() << TAG << "Run on SCC with " << SCC.size() << " functions in a slice with " << OMPInfoCache.ModuleSlice.size() << " functions\n"; } } while (false) | ||||
771 | << OMPInfoCache.ModuleSlice.size() << " functions\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { dbgs() << TAG << "Run on SCC with " << SCC.size() << " functions in a slice with " << OMPInfoCache.ModuleSlice.size() << " functions\n"; } } while (false); | ||||
772 | |||||
773 | if (IsModulePass) { | ||||
774 | Changed |= runAttributor(IsModulePass); | ||||
775 | |||||
776 | // Recollect uses, in case Attributor deleted any. | ||||
777 | OMPInfoCache.recollectUses(); | ||||
778 | |||||
779 | // TODO: This should be folded into buildCustomStateMachine. | ||||
780 | Changed |= rewriteDeviceCodeStateMachine(); | ||||
781 | |||||
782 | if (remarksEnabled()) | ||||
783 | analysisGlobalization(); | ||||
784 | } else { | ||||
785 | if (PrintICVValues) | ||||
786 | printICVs(); | ||||
787 | if (PrintOpenMPKernels) | ||||
788 | printKernels(); | ||||
789 | |||||
790 | Changed |= runAttributor(IsModulePass); | ||||
791 | |||||
792 | // Recollect uses, in case Attributor deleted any. | ||||
793 | OMPInfoCache.recollectUses(); | ||||
794 | |||||
795 | Changed |= deleteParallelRegions(); | ||||
796 | |||||
797 | if (HideMemoryTransferLatency) | ||||
798 | Changed |= hideMemTransfersLatency(); | ||||
799 | Changed |= deduplicateRuntimeCalls(); | ||||
800 | if (EnableParallelRegionMerging) { | ||||
801 | if (mergeParallelRegions()) { | ||||
802 | deduplicateRuntimeCalls(); | ||||
803 | Changed = true; | ||||
804 | } | ||||
805 | } | ||||
806 | } | ||||
807 | |||||
808 | return Changed; | ||||
809 | } | ||||
810 | |||||
811 | /// Print initial ICV values for testing. | ||||
812 | /// FIXME: This should be done from the Attributor once it is added. | ||||
813 | void printICVs() const { | ||||
814 | InternalControlVar ICVs[] = {ICV_nthreads, ICV_active_levels, ICV_cancel, | ||||
815 | ICV_proc_bind}; | ||||
816 | |||||
817 | for (Function *F : OMPInfoCache.ModuleSlice) { | ||||
818 | for (auto ICV : ICVs) { | ||||
819 | auto ICVInfo = OMPInfoCache.ICVs[ICV]; | ||||
820 | auto Remark = [&](OptimizationRemarkAnalysis ORA) { | ||||
821 | return ORA << "OpenMP ICV " << ore::NV("OpenMPICV", ICVInfo.Name) | ||||
822 | << " Value: " | ||||
823 | << (ICVInfo.InitValue | ||||
824 | ? toString(ICVInfo.InitValue->getValue(), 10, true) | ||||
825 | : "IMPLEMENTATION_DEFINED"); | ||||
826 | }; | ||||
827 | |||||
828 | emitRemark<OptimizationRemarkAnalysis>(F, "OpenMPICVTracker", Remark); | ||||
829 | } | ||||
830 | } | ||||
831 | } | ||||
832 | |||||
833 | /// Print OpenMP GPU kernels for testing. | ||||
834 | void printKernels() const { | ||||
835 | for (Function *F : SCC) { | ||||
836 | if (!OMPInfoCache.Kernels.count(F)) | ||||
837 | continue; | ||||
838 | |||||
839 | auto Remark = [&](OptimizationRemarkAnalysis ORA) { | ||||
840 | return ORA << "OpenMP GPU kernel " | ||||
841 | << ore::NV("OpenMPGPUKernel", F->getName()) << "\n"; | ||||
842 | }; | ||||
843 | |||||
844 | emitRemark<OptimizationRemarkAnalysis>(F, "OpenMPGPU", Remark); | ||||
845 | } | ||||
846 | } | ||||
847 | |||||
848 | /// Return the call if \p U is a callee use in a regular call. If \p RFI is | ||||
849 | /// given it has to be the callee or a nullptr is returned. | ||||
850 | static CallInst *getCallIfRegularCall( | ||||
851 | Use &U, OMPInformationCache::RuntimeFunctionInfo *RFI = nullptr) { | ||||
852 | CallInst *CI = dyn_cast<CallInst>(U.getUser()); | ||||
853 | if (CI && CI->isCallee(&U) && !CI->hasOperandBundles() && | ||||
854 | (!RFI || | ||||
855 | (RFI->Declaration && CI->getCalledFunction() == RFI->Declaration))) | ||||
856 | return CI; | ||||
857 | return nullptr; | ||||
858 | } | ||||
859 | |||||
860 | /// Return the call if \p V is a regular call. If \p RFI is given it has to be | ||||
861 | /// the callee or a nullptr is returned. | ||||
862 | static CallInst *getCallIfRegularCall( | ||||
863 | Value &V, OMPInformationCache::RuntimeFunctionInfo *RFI = nullptr) { | ||||
864 | CallInst *CI = dyn_cast<CallInst>(&V); | ||||
865 | if (CI && !CI->hasOperandBundles() && | ||||
866 | (!RFI || | ||||
867 | (RFI->Declaration && CI->getCalledFunction() == RFI->Declaration))) | ||||
868 | return CI; | ||||
869 | return nullptr; | ||||
870 | } | ||||
871 | |||||
872 | private: | ||||
873 | /// Merge parallel regions when it is safe. | ||||
874 | bool mergeParallelRegions() { | ||||
875 | const unsigned CallbackCalleeOperand = 2; | ||||
876 | const unsigned CallbackFirstArgOperand = 3; | ||||
877 | using InsertPointTy = OpenMPIRBuilder::InsertPointTy; | ||||
878 | |||||
879 | // Check if there are any __kmpc_fork_call calls to merge. | ||||
880 | OMPInformationCache::RuntimeFunctionInfo &RFI = | ||||
881 | OMPInfoCache.RFIs[OMPRTL___kmpc_fork_call]; | ||||
882 | |||||
883 | if (!RFI.Declaration) | ||||
884 | return false; | ||||
885 | |||||
886 | // Unmergable calls that prevent merging a parallel region. | ||||
887 | OMPInformationCache::RuntimeFunctionInfo UnmergableCallsInfo[] = { | ||||
888 | OMPInfoCache.RFIs[OMPRTL___kmpc_push_proc_bind], | ||||
889 | OMPInfoCache.RFIs[OMPRTL___kmpc_push_num_threads], | ||||
890 | }; | ||||
891 | |||||
892 | bool Changed = false; | ||||
893 | LoopInfo *LI = nullptr; | ||||
894 | DominatorTree *DT = nullptr; | ||||
895 | |||||
896 | SmallDenseMap<BasicBlock *, SmallPtrSet<Instruction *, 4>> BB2PRMap; | ||||
897 | |||||
898 | BasicBlock *StartBB = nullptr, *EndBB = nullptr; | ||||
899 | auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, | ||||
900 | BasicBlock &ContinuationIP) { | ||||
901 | BasicBlock *CGStartBB = CodeGenIP.getBlock(); | ||||
902 | BasicBlock *CGEndBB = | ||||
903 | SplitBlock(CGStartBB, &*CodeGenIP.getPoint(), DT, LI); | ||||
904 | assert(StartBB != nullptr && "StartBB should not be null")(static_cast <bool> (StartBB != nullptr && "StartBB should not be null" ) ? void (0) : __assert_fail ("StartBB != nullptr && \"StartBB should not be null\"" , "llvm/lib/Transforms/IPO/OpenMPOpt.cpp", 904, __extension__ __PRETTY_FUNCTION__)); | ||||
905 | CGStartBB->getTerminator()->setSuccessor(0, StartBB); | ||||
906 | assert(EndBB != nullptr && "EndBB should not be null")(static_cast <bool> (EndBB != nullptr && "EndBB should not be null" ) ? void (0) : __assert_fail ("EndBB != nullptr && \"EndBB should not be null\"" , "llvm/lib/Transforms/IPO/OpenMPOpt.cpp", 906, __extension__ __PRETTY_FUNCTION__)); | ||||
907 | EndBB->getTerminator()->setSuccessor(0, CGEndBB); | ||||
908 | }; | ||||
909 | |||||
910 | auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &, | ||||
911 | Value &Inner, Value *&ReplacementValue) -> InsertPointTy { | ||||
912 | ReplacementValue = &Inner; | ||||
913 | return CodeGenIP; | ||||
914 | }; | ||||
915 | |||||
916 | auto FiniCB = [&](InsertPointTy CodeGenIP) {}; | ||||
917 | |||||
918 | /// Create a sequential execution region within a merged parallel region, | ||||
919 | /// encapsulated in a master construct with a barrier for synchronization. | ||||
920 | auto CreateSequentialRegion = [&](Function *OuterFn, | ||||
921 | BasicBlock *OuterPredBB, | ||||
922 | Instruction *SeqStartI, | ||||
923 | Instruction *SeqEndI) { | ||||
924 | // Isolate the instructions of the sequential region to a separate | ||||
925 | // block. | ||||
926 | BasicBlock *ParentBB = SeqStartI->getParent(); | ||||
927 | BasicBlock *SeqEndBB = | ||||
928 | SplitBlock(ParentBB, SeqEndI->getNextNode(), DT, LI); | ||||
929 | BasicBlock *SeqAfterBB = | ||||
930 | SplitBlock(SeqEndBB, &*SeqEndBB->getFirstInsertionPt(), DT, LI); | ||||
931 | BasicBlock *SeqStartBB = | ||||
932 | SplitBlock(ParentBB, SeqStartI, DT, LI, nullptr, "seq.par.merged"); | ||||
933 | |||||
934 | assert(ParentBB->getUniqueSuccessor() == SeqStartBB &&(static_cast <bool> (ParentBB->getUniqueSuccessor() == SeqStartBB && "Expected a different CFG") ? void (0) : __assert_fail ("ParentBB->getUniqueSuccessor() == SeqStartBB && \"Expected a different CFG\"" , "llvm/lib/Transforms/IPO/OpenMPOpt.cpp", 935, __extension__ __PRETTY_FUNCTION__)) | ||||
935 | "Expected a different CFG")(static_cast <bool> (ParentBB->getUniqueSuccessor() == SeqStartBB && "Expected a different CFG") ? void (0) : __assert_fail ("ParentBB->getUniqueSuccessor() == SeqStartBB && \"Expected a different CFG\"" , "llvm/lib/Transforms/IPO/OpenMPOpt.cpp", 935, __extension__ __PRETTY_FUNCTION__)); | ||||
936 | const DebugLoc DL = ParentBB->getTerminator()->getDebugLoc(); | ||||
937 | ParentBB->getTerminator()->eraseFromParent(); | ||||
938 | |||||
939 | auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, | ||||
940 | BasicBlock &ContinuationIP) { | ||||
941 | BasicBlock *CGStartBB = CodeGenIP.getBlock(); | ||||
942 | BasicBlock *CGEndBB = | ||||
943 | SplitBlock(CGStartBB, &*CodeGenIP.getPoint(), DT, LI); | ||||
944 | assert(SeqStartBB != nullptr && "SeqStartBB should not be null")(static_cast <bool> (SeqStartBB != nullptr && "SeqStartBB should not be null" ) ? void (0) : __assert_fail ("SeqStartBB != nullptr && \"SeqStartBB should not be null\"" , "llvm/lib/Transforms/IPO/OpenMPOpt.cpp", 944, __extension__ __PRETTY_FUNCTION__)); | ||||
945 | CGStartBB->getTerminator()->setSuccessor(0, SeqStartBB); | ||||
946 | assert(SeqEndBB != nullptr && "SeqEndBB should not be null")(static_cast <bool> (SeqEndBB != nullptr && "SeqEndBB should not be null" ) ? void (0) : __assert_fail ("SeqEndBB != nullptr && \"SeqEndBB should not be null\"" , "llvm/lib/Transforms/IPO/OpenMPOpt.cpp", 946, __extension__ __PRETTY_FUNCTION__)); | ||||
947 | SeqEndBB->getTerminator()->setSuccessor(0, CGEndBB); | ||||
948 | }; | ||||
949 | auto FiniCB = [&](InsertPointTy CodeGenIP) {}; | ||||
950 | |||||
951 | // Find outputs from the sequential region to outside users and | ||||
952 | // broadcast their values to them. | ||||
953 | for (Instruction &I : *SeqStartBB) { | ||||
954 | SmallPtrSet<Instruction *, 4> OutsideUsers; | ||||
955 | for (User *Usr : I.users()) { | ||||
956 | Instruction &UsrI = *cast<Instruction>(Usr); | ||||
957 | // Ignore outputs to LT intrinsics, code extraction for the merged | ||||
958 | // parallel region will fix them. | ||||
959 | if (UsrI.isLifetimeStartOrEnd()) | ||||
960 | continue; | ||||
961 | |||||
962 | if (UsrI.getParent() != SeqStartBB) | ||||
963 | OutsideUsers.insert(&UsrI); | ||||
964 | } | ||||
965 | |||||
966 | if (OutsideUsers.empty()) | ||||
967 | continue; | ||||
968 | |||||
969 | // Emit an alloca in the outer region to store the broadcasted | ||||
970 | // value. | ||||
971 | const DataLayout &DL = M.getDataLayout(); | ||||
972 | AllocaInst *AllocaI = new AllocaInst( | ||||
973 | I.getType(), DL.getAllocaAddrSpace(), nullptr, | ||||
974 | I.getName() + ".seq.output.alloc", &OuterFn->front().front()); | ||||
975 | |||||
976 | // Emit a store instruction in the sequential BB to update the | ||||
977 | // value. | ||||
978 | new StoreInst(&I, AllocaI, SeqStartBB->getTerminator()); | ||||
979 | |||||
980 | // Emit a load instruction and replace the use of the output value | ||||
981 | // with it. | ||||
982 | for (Instruction *UsrI : OutsideUsers) { | ||||
983 | LoadInst *LoadI = new LoadInst( | ||||
984 | I.getType(), AllocaI, I.getName() + ".seq.output.load", UsrI); | ||||
985 | UsrI->replaceUsesOfWith(&I, LoadI); | ||||
986 | } | ||||
987 | } | ||||
988 | |||||
989 | OpenMPIRBuilder::LocationDescription Loc( | ||||
990 | InsertPointTy(ParentBB, ParentBB->end()), DL); | ||||
991 | InsertPointTy SeqAfterIP = | ||||
992 | OMPInfoCache.OMPBuilder.createMaster(Loc, BodyGenCB, FiniCB); | ||||
993 | |||||
994 | OMPInfoCache.OMPBuilder.createBarrier(SeqAfterIP, OMPD_parallel); | ||||
995 | |||||
996 | BranchInst::Create(SeqAfterBB, SeqAfterIP.getBlock()); | ||||
997 | |||||
998 | LLVM_DEBUG(dbgs() << TAG << "After sequential inlining " << *OuterFndo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { dbgs() << TAG << "After sequential inlining " << *OuterFn << "\n"; } } while (false) | ||||
999 | << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { dbgs() << TAG << "After sequential inlining " << *OuterFn << "\n"; } } while (false); | ||||
1000 | }; | ||||
1001 | |||||
1002 | // Helper to merge the __kmpc_fork_call calls in MergableCIs. They are all | ||||
1003 | // contained in BB and only separated by instructions that can be | ||||
1004 | // redundantly executed in parallel. The block BB is split before the first | ||||
1005 | // call (in MergableCIs) and after the last so the entire region we merge | ||||
1006 | // into a single parallel region is contained in a single basic block | ||||
1007 | // without any other instructions. We use the OpenMPIRBuilder to outline | ||||
1008 | // that block and call the resulting function via __kmpc_fork_call. | ||||
1009 | auto Merge = [&](SmallVectorImpl<CallInst *> &MergableCIs, BasicBlock *BB) { | ||||
1010 | // TODO: Change the interface to allow single CIs expanded, e.g, to | ||||
1011 | // include an outer loop. | ||||
1012 | assert(MergableCIs.size() > 1 && "Assumed multiple mergable CIs")(static_cast <bool> (MergableCIs.size() > 1 && "Assumed multiple mergable CIs") ? void (0) : __assert_fail ( "MergableCIs.size() > 1 && \"Assumed multiple mergable CIs\"" , "llvm/lib/Transforms/IPO/OpenMPOpt.cpp", 1012, __extension__ __PRETTY_FUNCTION__)); | ||||
1013 | |||||
1014 | auto Remark = [&](OptimizationRemark OR) { | ||||
1015 | OR << "Parallel region merged with parallel region" | ||||
1016 | << (MergableCIs.size() > 2 ? "s" : "") << " at "; | ||||
1017 | for (auto *CI : llvm::drop_begin(MergableCIs)) { | ||||
1018 | OR << ore::NV("OpenMPParallelMerge", CI->getDebugLoc()); | ||||
1019 | if (CI != MergableCIs.back()) | ||||
1020 | OR << ", "; | ||||
1021 | } | ||||
1022 | return OR << "."; | ||||
1023 | }; | ||||
1024 | |||||
1025 | emitRemark<OptimizationRemark>(MergableCIs.front(), "OMP150", Remark); | ||||
1026 | |||||
1027 | Function *OriginalFn = BB->getParent(); | ||||
1028 | LLVM_DEBUG(dbgs() << TAG << "Merge " << MergableCIs.size()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { dbgs() << TAG << "Merge " << MergableCIs.size() << " parallel regions in " << OriginalFn->getName() << "\n"; } } while (false) | ||||
1029 | << " parallel regions in " << OriginalFn->getName()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { dbgs() << TAG << "Merge " << MergableCIs.size() << " parallel regions in " << OriginalFn->getName() << "\n"; } } while (false) | ||||
1030 | << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { dbgs() << TAG << "Merge " << MergableCIs.size() << " parallel regions in " << OriginalFn->getName() << "\n"; } } while (false); | ||||
1031 | |||||
1032 | // Isolate the calls to merge in a separate block. | ||||
1033 | EndBB = SplitBlock(BB, MergableCIs.back()->getNextNode(), DT, LI); | ||||
1034 | BasicBlock *AfterBB = | ||||
1035 | SplitBlock(EndBB, &*EndBB->getFirstInsertionPt(), DT, LI); | ||||
1036 | StartBB = SplitBlock(BB, MergableCIs.front(), DT, LI, nullptr, | ||||
1037 | "omp.par.merged"); | ||||
1038 | |||||
1039 | assert(BB->getUniqueSuccessor() == StartBB && "Expected a different CFG")(static_cast <bool> (BB->getUniqueSuccessor() == StartBB && "Expected a different CFG") ? void (0) : __assert_fail ("BB->getUniqueSuccessor() == StartBB && \"Expected a different CFG\"" , "llvm/lib/Transforms/IPO/OpenMPOpt.cpp", 1039, __extension__ __PRETTY_FUNCTION__)); | ||||
1040 | const DebugLoc DL = BB->getTerminator()->getDebugLoc(); | ||||
1041 | BB->getTerminator()->eraseFromParent(); | ||||
1042 | |||||
1043 | // Create sequential regions for sequential instructions that are | ||||
1044 | // in-between mergable parallel regions. | ||||
1045 | for (auto *It = MergableCIs.begin(), *End = MergableCIs.end() - 1; | ||||
1046 | It != End; ++It) { | ||||
1047 | Instruction *ForkCI = *It; | ||||
1048 | Instruction *NextForkCI = *(It + 1); | ||||
1049 | |||||
1050 | // Continue if there are not in-between instructions. | ||||
1051 | if (ForkCI->getNextNode() == NextForkCI) | ||||
1052 | continue; | ||||
1053 | |||||
1054 | CreateSequentialRegion(OriginalFn, BB, ForkCI->getNextNode(), | ||||
1055 | NextForkCI->getPrevNode()); | ||||
1056 | } | ||||
1057 | |||||
1058 | OpenMPIRBuilder::LocationDescription Loc(InsertPointTy(BB, BB->end()), | ||||
1059 | DL); | ||||
1060 | IRBuilder<>::InsertPoint AllocaIP( | ||||
1061 | &OriginalFn->getEntryBlock(), | ||||
1062 | OriginalFn->getEntryBlock().getFirstInsertionPt()); | ||||
1063 | // Create the merged parallel region with default proc binding, to | ||||
1064 | // avoid overriding binding settings, and without explicit cancellation. | ||||
1065 | InsertPointTy AfterIP = OMPInfoCache.OMPBuilder.createParallel( | ||||
1066 | Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, nullptr, nullptr, | ||||
1067 | OMP_PROC_BIND_default, /* IsCancellable */ false); | ||||
1068 | BranchInst::Create(AfterBB, AfterIP.getBlock()); | ||||
1069 | |||||
1070 | // Perform the actual outlining. | ||||
1071 | OMPInfoCache.OMPBuilder.finalize(OriginalFn, | ||||
1072 | /* AllowExtractorSinking */ true); | ||||
1073 | |||||
1074 | Function *OutlinedFn = MergableCIs.front()->getCaller(); | ||||
1075 | |||||
1076 | // Replace the __kmpc_fork_call calls with direct calls to the outlined | ||||
1077 | // callbacks. | ||||
1078 | SmallVector<Value *, 8> Args; | ||||
1079 | for (auto *CI : MergableCIs) { | ||||
1080 | Value *Callee = | ||||
1081 | CI->getArgOperand(CallbackCalleeOperand)->stripPointerCasts(); | ||||
1082 | FunctionType *FT = | ||||
1083 | cast<FunctionType>(Callee->getType()->getPointerElementType()); | ||||
1084 | Args.clear(); | ||||
1085 | Args.push_back(OutlinedFn->getArg(0)); | ||||
1086 | Args.push_back(OutlinedFn->getArg(1)); | ||||
1087 | for (unsigned U = CallbackFirstArgOperand, E = CI->arg_size(); U < E; | ||||
1088 | ++U) | ||||
1089 | Args.push_back(CI->getArgOperand(U)); | ||||
1090 | |||||
1091 | CallInst *NewCI = CallInst::Create(FT, Callee, Args, "", CI); | ||||
1092 | if (CI->getDebugLoc()) | ||||
1093 | NewCI->setDebugLoc(CI->getDebugLoc()); | ||||
1094 | |||||
1095 | // Forward parameter attributes from the callback to the callee. | ||||
1096 | for (unsigned U = CallbackFirstArgOperand, E = CI->arg_size(); U < E; | ||||
1097 | ++U) | ||||
1098 | for (const Attribute &A : CI->getAttributes().getParamAttrs(U)) | ||||
1099 | NewCI->addParamAttr( | ||||
1100 | U - (CallbackFirstArgOperand - CallbackCalleeOperand), A); | ||||
1101 | |||||
1102 | // Emit an explicit barrier to replace the implicit fork-join barrier. | ||||
1103 | if (CI != MergableCIs.back()) { | ||||
1104 | // TODO: Remove barrier if the merged parallel region includes the | ||||
1105 | // 'nowait' clause. | ||||
1106 | OMPInfoCache.OMPBuilder.createBarrier( | ||||
1107 | InsertPointTy(NewCI->getParent(), | ||||
1108 | NewCI->getNextNode()->getIterator()), | ||||
1109 | OMPD_parallel); | ||||
1110 | } | ||||
1111 | |||||
1112 | CI->eraseFromParent(); | ||||
1113 | } | ||||
1114 | |||||
1115 | assert(OutlinedFn != OriginalFn && "Outlining failed")(static_cast <bool> (OutlinedFn != OriginalFn && "Outlining failed") ? void (0) : __assert_fail ("OutlinedFn != OriginalFn && \"Outlining failed\"" , "llvm/lib/Transforms/IPO/OpenMPOpt.cpp", 1115, __extension__ __PRETTY_FUNCTION__)); | ||||
1116 | CGUpdater.registerOutlinedFunction(*OriginalFn, *OutlinedFn); | ||||
1117 | CGUpdater.reanalyzeFunction(*OriginalFn); | ||||
1118 | |||||
1119 | NumOpenMPParallelRegionsMerged += MergableCIs.size(); | ||||
1120 | |||||
1121 | return true; | ||||
1122 | }; | ||||
1123 | |||||
1124 | // Helper function that identifes sequences of | ||||
1125 | // __kmpc_fork_call uses in a basic block. | ||||
1126 | auto DetectPRsCB = [&](Use &U, Function &F) { | ||||
1127 | CallInst *CI = getCallIfRegularCall(U, &RFI); | ||||
1128 | BB2PRMap[CI->getParent()].insert(CI); | ||||
1129 | |||||
1130 | return false; | ||||
1131 | }; | ||||
1132 | |||||
1133 | BB2PRMap.clear(); | ||||
1134 | RFI.foreachUse(SCC, DetectPRsCB); | ||||
1135 | SmallVector<SmallVector<CallInst *, 4>, 4> MergableCIsVector; | ||||
1136 | // Find mergable parallel regions within a basic block that are | ||||
1137 | // safe to merge, that is any in-between instructions can safely | ||||
1138 | // execute in parallel after merging. | ||||
1139 | // TODO: support merging across basic-blocks. | ||||
1140 | for (auto &It : BB2PRMap) { | ||||
1141 | auto &CIs = It.getSecond(); | ||||
1142 | if (CIs.size() < 2) | ||||
1143 | continue; | ||||
1144 | |||||
1145 | BasicBlock *BB = It.getFirst(); | ||||
1146 | SmallVector<CallInst *, 4> MergableCIs; | ||||
1147 | |||||
1148 | /// Returns true if the instruction is mergable, false otherwise. | ||||
1149 | /// A terminator instruction is unmergable by definition since merging | ||||
1150 | /// works within a BB. Instructions before the mergable region are | ||||
1151 | /// mergable if they are not calls to OpenMP runtime functions that may | ||||
1152 | /// set different execution parameters for subsequent parallel regions. | ||||
1153 | /// Instructions in-between parallel regions are mergable if they are not | ||||
1154 | /// calls to any non-intrinsic function since that may call a non-mergable | ||||
1155 | /// OpenMP runtime function. | ||||
1156 | auto IsMergable = [&](Instruction &I, bool IsBeforeMergableRegion) { | ||||
1157 | // We do not merge across BBs, hence return false (unmergable) if the | ||||
1158 | // instruction is a terminator. | ||||
1159 | if (I.isTerminator()) | ||||
1160 | return false; | ||||
1161 | |||||
1162 | if (!isa<CallInst>(&I)) | ||||
1163 | return true; | ||||
1164 | |||||
1165 | CallInst *CI = cast<CallInst>(&I); | ||||
1166 | if (IsBeforeMergableRegion) { | ||||
1167 | Function *CalledFunction = CI->getCalledFunction(); | ||||
1168 | if (!CalledFunction) | ||||
1169 | return false; | ||||
1170 | // Return false (unmergable) if the call before the parallel | ||||
1171 | // region calls an explicit affinity (proc_bind) or number of | ||||
1172 | // threads (num_threads) compiler-generated function. Those settings | ||||
1173 | // may be incompatible with following parallel regions. | ||||
1174 | // TODO: ICV tracking to detect compatibility. | ||||
1175 | for (const auto &RFI : UnmergableCallsInfo) { | ||||
1176 | if (CalledFunction == RFI.Declaration) | ||||
1177 | return false; | ||||
1178 | } | ||||
1179 | } else { | ||||
1180 | // Return false (unmergable) if there is a call instruction | ||||
1181 | // in-between parallel regions when it is not an intrinsic. It | ||||
1182 | // may call an unmergable OpenMP runtime function in its callpath. | ||||
1183 | // TODO: Keep track of possible OpenMP calls in the callpath. | ||||
1184 | if (!isa<IntrinsicInst>(CI)) | ||||
1185 | return false; | ||||
1186 | } | ||||
1187 | |||||
1188 | return true; | ||||
1189 | }; | ||||
1190 | // Find maximal number of parallel region CIs that are safe to merge. | ||||
1191 | for (auto It = BB->begin(), End = BB->end(); It != End;) { | ||||
1192 | Instruction &I = *It; | ||||
1193 | ++It; | ||||
1194 | |||||
1195 | if (CIs.count(&I)) { | ||||
1196 | MergableCIs.push_back(cast<CallInst>(&I)); | ||||
1197 | continue; | ||||
1198 | } | ||||
1199 | |||||
1200 | // Continue expanding if the instruction is mergable. | ||||
1201 | if (IsMergable(I, MergableCIs.empty())) | ||||
1202 | continue; | ||||
1203 | |||||
1204 | // Forward the instruction iterator to skip the next parallel region | ||||
1205 | // since there is an unmergable instruction which can affect it. | ||||
1206 | for (; It != End; ++It) { | ||||
1207 | Instruction &SkipI = *It; | ||||
1208 | if (CIs.count(&SkipI)) { | ||||
1209 | LLVM_DEBUG(dbgs() << TAG << "Skip parallel region " << SkipIdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { dbgs() << TAG << "Skip parallel region " << SkipI << " due to " << I << "\n"; } } while (false) | ||||
1210 | << " due to " << I << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { dbgs() << TAG << "Skip parallel region " << SkipI << " due to " << I << "\n"; } } while (false); | ||||
1211 | ++It; | ||||
1212 | break; | ||||
1213 | } | ||||
1214 | } | ||||
1215 | |||||
1216 | // Store mergable regions found. | ||||
1217 | if (MergableCIs.size() > 1) { | ||||
1218 | MergableCIsVector.push_back(MergableCIs); | ||||
1219 | LLVM_DEBUG(dbgs() << TAG << "Found " << MergableCIs.size()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { dbgs() << TAG << "Found " << MergableCIs.size() << " parallel regions in block " << BB->getName() << " of function " << BB->getParent ()->getName() << "\n";; } } while (false) | ||||
1220 | << " parallel regions in block " << BB->getName()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { dbgs() << TAG << "Found " << MergableCIs.size() << " parallel regions in block " << BB->getName() << " of function " << BB->getParent ()->getName() << "\n";; } } while (false) | ||||
1221 | << " of function " << BB->getParent()->getName()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { dbgs() << TAG << "Found " << MergableCIs.size() << " parallel regions in block " << BB->getName() << " of function " << BB->getParent ()->getName() << "\n";; } } while (false) | ||||
1222 | << "\n";)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { dbgs() << TAG << "Found " << MergableCIs.size() << " parallel regions in block " << BB->getName() << " of function " << BB->getParent ()->getName() << "\n";; } } while (false); | ||||
1223 | } | ||||
1224 | |||||
1225 | MergableCIs.clear(); | ||||
1226 | } | ||||
1227 | |||||
1228 | if (!MergableCIsVector.empty()) { | ||||
1229 | Changed = true; | ||||
1230 | |||||
1231 | for (auto &MergableCIs : MergableCIsVector) | ||||
1232 | Merge(MergableCIs, BB); | ||||
1233 | MergableCIsVector.clear(); | ||||
1234 | } | ||||
1235 | } | ||||
1236 | |||||
1237 | if (Changed) { | ||||
1238 | /// Re-collect use for fork calls, emitted barrier calls, and | ||||
1239 | /// any emitted master/end_master calls. | ||||
1240 | OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_fork_call); | ||||
1241 | OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_barrier); | ||||
1242 | OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_master); | ||||
1243 | OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_end_master); | ||||
1244 | } | ||||
1245 | |||||
1246 | return Changed; | ||||
1247 | } | ||||
1248 | |||||
1249 | /// Try to delete parallel regions if possible. | ||||
1250 | bool deleteParallelRegions() { | ||||
1251 | const unsigned CallbackCalleeOperand = 2; | ||||
1252 | |||||
1253 | OMPInformationCache::RuntimeFunctionInfo &RFI = | ||||
1254 | OMPInfoCache.RFIs[OMPRTL___kmpc_fork_call]; | ||||
1255 | |||||
1256 | if (!RFI.Declaration) | ||||
1257 | return false; | ||||
1258 | |||||
1259 | bool Changed = false; | ||||
1260 | auto DeleteCallCB = [&](Use &U, Function &) { | ||||
1261 | CallInst *CI = getCallIfRegularCall(U); | ||||
1262 | if (!CI) | ||||
1263 | return false; | ||||
1264 | auto *Fn = dyn_cast<Function>( | ||||
1265 | CI->getArgOperand(CallbackCalleeOperand)->stripPointerCasts()); | ||||
1266 | if (!Fn) | ||||
1267 | return false; | ||||
1268 | if (!Fn->onlyReadsMemory()) | ||||
1269 | return false; | ||||
1270 | if (!Fn->hasFnAttribute(Attribute::WillReturn)) | ||||
1271 | return false; | ||||
1272 | |||||
1273 | LLVM_DEBUG(dbgs() << TAG << "Delete read-only parallel region in "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { dbgs() << TAG << "Delete read-only parallel region in " << CI->getCaller()->getName() << "\n"; } } while (false) | ||||
1274 | << CI->getCaller()->getName() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { dbgs() << TAG << "Delete read-only parallel region in " << CI->getCaller()->getName() << "\n"; } } while (false); | ||||
1275 | |||||
1276 | auto Remark = [&](OptimizationRemark OR) { | ||||
1277 | return OR << "Removing parallel region with no side-effects."; | ||||
1278 | }; | ||||
1279 | emitRemark<OptimizationRemark>(CI, "OMP160", Remark); | ||||
1280 | |||||
1281 | CGUpdater.removeCallSite(*CI); | ||||
1282 | CI->eraseFromParent(); | ||||
1283 | Changed = true; | ||||
1284 | ++NumOpenMPParallelRegionsDeleted; | ||||
1285 | return true; | ||||
1286 | }; | ||||
1287 | |||||
1288 | RFI.foreachUse(SCC, DeleteCallCB); | ||||
1289 | |||||
1290 | return Changed; | ||||
1291 | } | ||||
1292 | |||||
1293 | /// Try to eliminate runtime calls by reusing existing ones. | ||||
1294 | bool deduplicateRuntimeCalls() { | ||||
1295 | bool Changed = false; | ||||
1296 | |||||
1297 | RuntimeFunction DeduplicableRuntimeCallIDs[] = { | ||||
1298 | OMPRTL_omp_get_num_threads, | ||||
1299 | OMPRTL_omp_in_parallel, | ||||
1300 | OMPRTL_omp_get_cancellation, | ||||
1301 | OMPRTL_omp_get_thread_limit, | ||||
1302 | OMPRTL_omp_get_supported_active_levels, | ||||
1303 | OMPRTL_omp_get_level, | ||||
1304 | OMPRTL_omp_get_ancestor_thread_num, | ||||
1305 | OMPRTL_omp_get_team_size, | ||||
1306 | OMPRTL_omp_get_active_level, | ||||
1307 | OMPRTL_omp_in_final, | ||||
1308 | OMPRTL_omp_get_proc_bind, | ||||
1309 | OMPRTL_omp_get_num_places, | ||||
1310 | OMPRTL_omp_get_num_procs, | ||||
1311 | OMPRTL_omp_get_place_num, | ||||
1312 | OMPRTL_omp_get_partition_num_places, | ||||
1313 | OMPRTL_omp_get_partition_place_nums}; | ||||
1314 | |||||
1315 | // Global-tid is handled separately. | ||||
1316 | SmallSetVector<Value *, 16> GTIdArgs; | ||||
1317 | collectGlobalThreadIdArguments(GTIdArgs); | ||||
1318 | LLVM_DEBUG(dbgs() << TAG << "Found " << GTIdArgs.size()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { dbgs() << TAG << "Found " << GTIdArgs.size() << " global thread ID arguments\n"; } } while (false) | ||||
1319 | << " global thread ID arguments\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { dbgs() << TAG << "Found " << GTIdArgs.size() << " global thread ID arguments\n"; } } while (false); | ||||
1320 | |||||
1321 | for (Function *F : SCC) { | ||||
1322 | for (auto DeduplicableRuntimeCallID : DeduplicableRuntimeCallIDs) | ||||
1323 | Changed |= deduplicateRuntimeCalls( | ||||
1324 | *F, OMPInfoCache.RFIs[DeduplicableRuntimeCallID]); | ||||
1325 | |||||
1326 | // __kmpc_global_thread_num is special as we can replace it with an | ||||
1327 | // argument in enough cases to make it worth trying. | ||||
1328 | Value *GTIdArg = nullptr; | ||||
1329 | for (Argument &Arg : F->args()) | ||||
1330 | if (GTIdArgs.count(&Arg)) { | ||||
1331 | GTIdArg = &Arg; | ||||
1332 | break; | ||||
1333 | } | ||||
1334 | Changed |= deduplicateRuntimeCalls( | ||||
1335 | *F, OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num], GTIdArg); | ||||
1336 | } | ||||
1337 | |||||
1338 | return Changed; | ||||
1339 | } | ||||
1340 | |||||
1341 | /// Tries to hide the latency of runtime calls that involve host to | ||||
1342 | /// device memory transfers by splitting them into their "issue" and "wait" | ||||
1343 | /// versions. The "issue" is moved upwards as much as possible. The "wait" is | ||||
1344 | /// moved downards as much as possible. The "issue" issues the memory transfer | ||||
1345 | /// asynchronously, returning a handle. The "wait" waits in the returned | ||||
1346 | /// handle for the memory transfer to finish. | ||||
1347 | bool hideMemTransfersLatency() { | ||||
1348 | auto &RFI = OMPInfoCache.RFIs[OMPRTL___tgt_target_data_begin_mapper]; | ||||
1349 | bool Changed = false; | ||||
1350 | auto SplitMemTransfers = [&](Use &U, Function &Decl) { | ||||
1351 | auto *RTCall = getCallIfRegularCall(U, &RFI); | ||||
1352 | if (!RTCall) | ||||
1353 | return false; | ||||
1354 | |||||
1355 | OffloadArray OffloadArrays[3]; | ||||
1356 | if (!getValuesInOffloadArrays(*RTCall, OffloadArrays)) | ||||
1357 | return false; | ||||
1358 | |||||
1359 | LLVM_DEBUG(dumpValuesInOffloadArrays(OffloadArrays))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { dumpValuesInOffloadArrays(OffloadArrays); } } while (false); | ||||
1360 | |||||
1361 | // TODO: Check if can be moved upwards. | ||||
1362 | bool WasSplit = false; | ||||
1363 | Instruction *WaitMovementPoint = canBeMovedDownwards(*RTCall); | ||||
1364 | if (WaitMovementPoint) | ||||
1365 | WasSplit = splitTargetDataBeginRTC(*RTCall, *WaitMovementPoint); | ||||
1366 | |||||
1367 | Changed |= WasSplit; | ||||
1368 | return WasSplit; | ||||
1369 | }; | ||||
1370 | RFI.foreachUse(SCC, SplitMemTransfers); | ||||
1371 | |||||
1372 | return Changed; | ||||
1373 | } | ||||
1374 | |||||
1375 | void analysisGlobalization() { | ||||
1376 | auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared]; | ||||
1377 | |||||
1378 | auto CheckGlobalization = [&](Use &U, Function &Decl) { | ||||
1379 | if (CallInst *CI = getCallIfRegularCall(U, &RFI)) { | ||||
1380 | auto Remark = [&](OptimizationRemarkMissed ORM) { | ||||
1381 | return ORM | ||||
1382 | << "Found thread data sharing on the GPU. " | ||||
1383 | << "Expect degraded performance due to data globalization."; | ||||
1384 | }; | ||||
1385 | emitRemark<OptimizationRemarkMissed>(CI, "OMP112", Remark); | ||||
1386 | } | ||||
1387 | |||||
1388 | return false; | ||||
1389 | }; | ||||
1390 | |||||
1391 | RFI.foreachUse(SCC, CheckGlobalization); | ||||
1392 | } | ||||
1393 | |||||
1394 | /// Maps the values stored in the offload arrays passed as arguments to | ||||
1395 | /// \p RuntimeCall into the offload arrays in \p OAs. | ||||
1396 | bool getValuesInOffloadArrays(CallInst &RuntimeCall, | ||||
1397 | MutableArrayRef<OffloadArray> OAs) { | ||||
1398 | assert(OAs.size() == 3 && "Need space for three offload arrays!")(static_cast <bool> (OAs.size() == 3 && "Need space for three offload arrays!" ) ? void (0) : __assert_fail ("OAs.size() == 3 && \"Need space for three offload arrays!\"" , "llvm/lib/Transforms/IPO/OpenMPOpt.cpp", 1398, __extension__ __PRETTY_FUNCTION__)); | ||||
1399 | |||||
1400 | // A runtime call that involves memory offloading looks something like: | ||||
1401 | // call void @__tgt_target_data_begin_mapper(arg0, arg1, | ||||
1402 | // i8** %offload_baseptrs, i8** %offload_ptrs, i64* %offload_sizes, | ||||
1403 | // ...) | ||||
1404 | // So, the idea is to access the allocas that allocate space for these | ||||
1405 | // offload arrays, offload_baseptrs, offload_ptrs, offload_sizes. | ||||
1406 | // Therefore: | ||||
1407 | // i8** %offload_baseptrs. | ||||
1408 | Value *BasePtrsArg = | ||||
1409 | RuntimeCall.getArgOperand(OffloadArray::BasePtrsArgNum); | ||||
1410 | // i8** %offload_ptrs. | ||||
1411 | Value *PtrsArg = RuntimeCall.getArgOperand(OffloadArray::PtrsArgNum); | ||||
1412 | // i8** %offload_sizes. | ||||
1413 | Value *SizesArg = RuntimeCall.getArgOperand(OffloadArray::SizesArgNum); | ||||
1414 | |||||
1415 | // Get values stored in **offload_baseptrs. | ||||
1416 | auto *V = getUnderlyingObject(BasePtrsArg); | ||||
1417 | if (!isa<AllocaInst>(V)) | ||||
1418 | return false; | ||||
1419 | auto *BasePtrsArray = cast<AllocaInst>(V); | ||||
1420 | if (!OAs[0].initialize(*BasePtrsArray, RuntimeCall)) | ||||
1421 | return false; | ||||
1422 | |||||
1423 | // Get values stored in **offload_baseptrs. | ||||
1424 | V = getUnderlyingObject(PtrsArg); | ||||
1425 | if (!isa<AllocaInst>(V)) | ||||
1426 | return false; | ||||
1427 | auto *PtrsArray = cast<AllocaInst>(V); | ||||
1428 | if (!OAs[1].initialize(*PtrsArray, RuntimeCall)) | ||||
1429 | return false; | ||||
1430 | |||||
1431 | // Get values stored in **offload_sizes. | ||||
1432 | V = getUnderlyingObject(SizesArg); | ||||
1433 | // If it's a [constant] global array don't analyze it. | ||||
1434 | if (isa<GlobalValue>(V)) | ||||
1435 | return isa<Constant>(V); | ||||
1436 | if (!isa<AllocaInst>(V)) | ||||
1437 | return false; | ||||
1438 | |||||
1439 | auto *SizesArray = cast<AllocaInst>(V); | ||||
1440 | if (!OAs[2].initialize(*SizesArray, RuntimeCall)) | ||||
1441 | return false; | ||||
1442 | |||||
1443 | return true; | ||||
1444 | } | ||||
1445 | |||||
1446 | /// Prints the values in the OffloadArrays \p OAs using LLVM_DEBUG. | ||||
1447 | /// For now this is a way to test that the function getValuesInOffloadArrays | ||||
1448 | /// is working properly. | ||||
1449 | /// TODO: Move this to a unittest when unittests are available for OpenMPOpt. | ||||
1450 | void dumpValuesInOffloadArrays(ArrayRef<OffloadArray> OAs) { | ||||
1451 | assert(OAs.size() == 3 && "There are three offload arrays to debug!")(static_cast <bool> (OAs.size() == 3 && "There are three offload arrays to debug!" ) ? void (0) : __assert_fail ("OAs.size() == 3 && \"There are three offload arrays to debug!\"" , "llvm/lib/Transforms/IPO/OpenMPOpt.cpp", 1451, __extension__ __PRETTY_FUNCTION__)); | ||||
1452 | |||||
1453 | LLVM_DEBUG(dbgs() << TAG << " Successfully got offload values:\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { dbgs() << TAG << " Successfully got offload values:\n" ; } } while (false); | ||||
1454 | std::string ValuesStr; | ||||
1455 | raw_string_ostream Printer(ValuesStr); | ||||
1456 | std::string Separator = " --- "; | ||||
1457 | |||||
1458 | for (auto *BP : OAs[0].StoredValues) { | ||||
1459 | BP->print(Printer); | ||||
1460 | Printer << Separator; | ||||
1461 | } | ||||
1462 | LLVM_DEBUG(dbgs() << "\t\toffload_baseptrs: " << Printer.str() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { dbgs() << "\t\toffload_baseptrs: " << Printer.str() << "\n"; } } while (false); | ||||
1463 | ValuesStr.clear(); | ||||
1464 | |||||
1465 | for (auto *P : OAs[1].StoredValues) { | ||||
1466 | P->print(Printer); | ||||
1467 | Printer << Separator; | ||||
1468 | } | ||||
1469 | LLVM_DEBUG(dbgs() << "\t\toffload_ptrs: " << Printer.str() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { dbgs() << "\t\toffload_ptrs: " << Printer.str() << "\n"; } } while (false); | ||||
1470 | ValuesStr.clear(); | ||||
1471 | |||||
1472 | for (auto *S : OAs[2].StoredValues) { | ||||
1473 | S->print(Printer); | ||||
1474 | Printer << Separator; | ||||
1475 | } | ||||
1476 | LLVM_DEBUG(dbgs() << "\t\toffload_sizes: " << Printer.str() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { dbgs() << "\t\toffload_sizes: " << Printer.str() << "\n"; } } while (false); | ||||
1477 | } | ||||
1478 | |||||
1479 | /// Returns the instruction where the "wait" counterpart \p RuntimeCall can be | ||||
1480 | /// moved. Returns nullptr if the movement is not possible, or not worth it. | ||||
1481 | Instruction *canBeMovedDownwards(CallInst &RuntimeCall) { | ||||
1482 | // FIXME: This traverses only the BasicBlock where RuntimeCall is. | ||||
1483 | // Make it traverse the CFG. | ||||
1484 | |||||
1485 | Instruction *CurrentI = &RuntimeCall; | ||||
1486 | bool IsWorthIt = false; | ||||
1487 | while ((CurrentI = CurrentI->getNextNode())) { | ||||
1488 | |||||
1489 | // TODO: Once we detect the regions to be offloaded we should use the | ||||
1490 | // alias analysis manager to check if CurrentI may modify one of | ||||
1491 | // the offloaded regions. | ||||
1492 | if (CurrentI->mayHaveSideEffects() || CurrentI->mayReadFromMemory()) { | ||||
1493 | if (IsWorthIt) | ||||
1494 | return CurrentI; | ||||
1495 | |||||
1496 | return nullptr; | ||||
1497 | } | ||||
1498 | |||||
1499 | // FIXME: For now if we move it over anything without side effect | ||||
1500 | // is worth it. | ||||
1501 | IsWorthIt = true; | ||||
1502 | } | ||||
1503 | |||||
1504 | // Return end of BasicBlock. | ||||
1505 | return RuntimeCall.getParent()->getTerminator(); | ||||
1506 | } | ||||
1507 | |||||
1508 | /// Splits \p RuntimeCall into its "issue" and "wait" counterparts. | ||||
1509 | bool splitTargetDataBeginRTC(CallInst &RuntimeCall, | ||||
1510 | Instruction &WaitMovementPoint) { | ||||
1511 | // Create stack allocated handle (__tgt_async_info) at the beginning of the | ||||
1512 | // function. Used for storing information of the async transfer, allowing to | ||||
1513 | // wait on it later. | ||||
1514 | auto &IRBuilder = OMPInfoCache.OMPBuilder; | ||||
1515 | auto *F = RuntimeCall.getCaller(); | ||||
1516 | Instruction *FirstInst = &(F->getEntryBlock().front()); | ||||
1517 | AllocaInst *Handle = new AllocaInst( | ||||
1518 | IRBuilder.AsyncInfo, F->getAddressSpace(), "handle", FirstInst); | ||||
1519 | |||||
1520 | // Add "issue" runtime call declaration: | ||||
1521 | // declare %struct.tgt_async_info @__tgt_target_data_begin_issue(i64, i32, | ||||
1522 | // i8**, i8**, i64*, i64*) | ||||
1523 | FunctionCallee IssueDecl = IRBuilder.getOrCreateRuntimeFunction( | ||||
1524 | M, OMPRTL___tgt_target_data_begin_mapper_issue); | ||||
1525 | |||||
1526 | // Change RuntimeCall call site for its asynchronous version. | ||||
1527 | SmallVector<Value *, 16> Args; | ||||
1528 | for (auto &Arg : RuntimeCall.args()) | ||||
1529 | Args.push_back(Arg.get()); | ||||
1530 | Args.push_back(Handle); | ||||
1531 | |||||
1532 | CallInst *IssueCallsite = | ||||
1533 | CallInst::Create(IssueDecl, Args, /*NameStr=*/"", &RuntimeCall); | ||||
1534 | RuntimeCall.eraseFromParent(); | ||||
1535 | |||||
1536 | // Add "wait" runtime call declaration: | ||||
1537 | // declare void @__tgt_target_data_begin_wait(i64, %struct.__tgt_async_info) | ||||
1538 | FunctionCallee WaitDecl = IRBuilder.getOrCreateRuntimeFunction( | ||||
1539 | M, OMPRTL___tgt_target_data_begin_mapper_wait); | ||||
1540 | |||||
1541 | Value *WaitParams[2] = { | ||||
1542 | IssueCallsite->getArgOperand( | ||||
1543 | OffloadArray::DeviceIDArgNum), // device_id. | ||||
1544 | Handle // handle to wait on. | ||||
1545 | }; | ||||
1546 | CallInst::Create(WaitDecl, WaitParams, /*NameStr=*/"", &WaitMovementPoint); | ||||
1547 | |||||
1548 | return true; | ||||
1549 | } | ||||
1550 | |||||
1551 | static Value *combinedIdentStruct(Value *CurrentIdent, Value *NextIdent, | ||||
1552 | bool GlobalOnly, bool &SingleChoice) { | ||||
1553 | if (CurrentIdent == NextIdent) | ||||
1554 | return CurrentIdent; | ||||
1555 | |||||
1556 | // TODO: Figure out how to actually combine multiple debug locations. For | ||||
1557 | // now we just keep an existing one if there is a single choice. | ||||
1558 | if (!GlobalOnly || isa<GlobalValue>(NextIdent)) { | ||||
1559 | SingleChoice = !CurrentIdent; | ||||
1560 | return NextIdent; | ||||
1561 | } | ||||
1562 | return nullptr; | ||||
1563 | } | ||||
1564 | |||||
1565 | /// Return an `struct ident_t*` value that represents the ones used in the | ||||
1566 | /// calls of \p RFI inside of \p F. If \p GlobalOnly is true, we will not | ||||
1567 | /// return a local `struct ident_t*`. For now, if we cannot find a suitable | ||||
1568 | /// return value we create one from scratch. We also do not yet combine | ||||
1569 | /// information, e.g., the source locations, see combinedIdentStruct. | ||||
1570 | Value * | ||||
1571 | getCombinedIdentFromCallUsesIn(OMPInformationCache::RuntimeFunctionInfo &RFI, | ||||
1572 | Function &F, bool GlobalOnly) { | ||||
1573 | bool SingleChoice = true; | ||||
1574 | Value *Ident = nullptr; | ||||
1575 | auto CombineIdentStruct = [&](Use &U, Function &Caller) { | ||||
1576 | CallInst *CI = getCallIfRegularCall(U, &RFI); | ||||
1577 | if (!CI || &F != &Caller) | ||||
1578 | return false; | ||||
1579 | Ident = combinedIdentStruct(Ident, CI->getArgOperand(0), | ||||
1580 | /* GlobalOnly */ true, SingleChoice); | ||||
1581 | return false; | ||||
1582 | }; | ||||
1583 | RFI.foreachUse(SCC, CombineIdentStruct); | ||||
1584 | |||||
1585 | if (!Ident || !SingleChoice) { | ||||
1586 | // The IRBuilder uses the insertion block to get to the module, this is | ||||
1587 | // unfortunate but we work around it for now. | ||||
1588 | if (!OMPInfoCache.OMPBuilder.getInsertionPoint().getBlock()) | ||||
1589 | OMPInfoCache.OMPBuilder.updateToLocation(OpenMPIRBuilder::InsertPointTy( | ||||
1590 | &F.getEntryBlock(), F.getEntryBlock().begin())); | ||||
1591 | // Create a fallback location if non was found. | ||||
1592 | // TODO: Use the debug locations of the calls instead. | ||||
1593 | uint32_t SrcLocStrSize; | ||||
1594 | Constant *Loc = | ||||
1595 | OMPInfoCache.OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize); | ||||
1596 | Ident = OMPInfoCache.OMPBuilder.getOrCreateIdent(Loc, SrcLocStrSize); | ||||
1597 | } | ||||
1598 | return Ident; | ||||
1599 | } | ||||
1600 | |||||
1601 | /// Try to eliminate calls of \p RFI in \p F by reusing an existing one or | ||||
1602 | /// \p ReplVal if given. | ||||
1603 | bool deduplicateRuntimeCalls(Function &F, | ||||
1604 | OMPInformationCache::RuntimeFunctionInfo &RFI, | ||||
1605 | Value *ReplVal = nullptr) { | ||||
1606 | auto *UV = RFI.getUseVector(F); | ||||
1607 | if (!UV || UV->size() + (ReplVal != nullptr) < 2) | ||||
1608 | return false; | ||||
1609 | |||||
1610 | LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { dbgs() << TAG << "Deduplicate " << UV->size() << " uses of " << RFI.Name << (ReplVal ? " with an existing value\n" : "\n") << "\n"; } } while (false) | ||||
1611 | dbgs() << TAG << "Deduplicate " << UV->size() << " uses of " << RFI.Namedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { dbgs() << TAG << "Deduplicate " << UV->size() << " uses of " << RFI.Name << (ReplVal ? " with an existing value\n" : "\n") << "\n"; } } while (false) | ||||
1612 | << (ReplVal ? " with an existing value\n" : "\n") << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { dbgs() << TAG << "Deduplicate " << UV->size() << " uses of " << RFI.Name << (ReplVal ? " with an existing value\n" : "\n") << "\n"; } } while (false); | ||||
1613 | |||||
1614 | assert((!ReplVal || (isa<Argument>(ReplVal) &&(static_cast <bool> ((!ReplVal || (isa<Argument>( ReplVal) && cast<Argument>(ReplVal)->getParent () == &F)) && "Unexpected replacement value!") ? void (0) : __assert_fail ("(!ReplVal || (isa<Argument>(ReplVal) && cast<Argument>(ReplVal)->getParent() == &F)) && \"Unexpected replacement value!\"" , "llvm/lib/Transforms/IPO/OpenMPOpt.cpp", 1616, __extension__ __PRETTY_FUNCTION__)) | ||||
1615 | cast<Argument>(ReplVal)->getParent() == &F)) &&(static_cast <bool> ((!ReplVal || (isa<Argument>( ReplVal) && cast<Argument>(ReplVal)->getParent () == &F)) && "Unexpected replacement value!") ? void (0) : __assert_fail ("(!ReplVal || (isa<Argument>(ReplVal) && cast<Argument>(ReplVal)->getParent() == &F)) && \"Unexpected replacement value!\"" , "llvm/lib/Transforms/IPO/OpenMPOpt.cpp", 1616, __extension__ __PRETTY_FUNCTION__)) | ||||
1616 | "Unexpected replacement value!")(static_cast <bool> ((!ReplVal || (isa<Argument>( ReplVal) && cast<Argument>(ReplVal)->getParent () == &F)) && "Unexpected replacement value!") ? void (0) : __assert_fail ("(!ReplVal || (isa<Argument>(ReplVal) && cast<Argument>(ReplVal)->getParent() == &F)) && \"Unexpected replacement value!\"" , "llvm/lib/Transforms/IPO/OpenMPOpt.cpp", 1616, __extension__ __PRETTY_FUNCTION__)); | ||||
1617 | |||||
1618 | // TODO: Use dominance to find a good position instead. | ||||
1619 | auto CanBeMoved = [this](CallBase &CB) { | ||||
1620 | unsigned NumArgs = CB.arg_size(); | ||||
1621 | if (NumArgs == 0) | ||||
1622 | return true; | ||||
1623 | if (CB.getArgOperand(0)->getType() != OMPInfoCache.OMPBuilder.IdentPtr) | ||||
1624 | return false; | ||||
1625 | for (unsigned U = 1; U < NumArgs; ++U) | ||||
1626 | if (isa<Instruction>(CB.getArgOperand(U))) | ||||
1627 | return false; | ||||
1628 | return true; | ||||
1629 | }; | ||||
1630 | |||||
1631 | if (!ReplVal) { | ||||
1632 | for (Use *U : *UV) | ||||
1633 | if (CallInst *CI = getCallIfRegularCall(*U, &RFI)) { | ||||
1634 | if (!CanBeMoved(*CI)) | ||||
1635 | continue; | ||||
1636 | |||||
1637 | // If the function is a kernel, dedup will move | ||||
1638 | // the runtime call right after the kernel init callsite. Otherwise, | ||||
1639 | // it will move it to the beginning of the caller function. | ||||
1640 | if (isKernel(F)) { | ||||
1641 | auto &KernelInitRFI = OMPInfoCache.RFIs[OMPRTL___kmpc_target_init]; | ||||
1642 | auto *KernelInitUV = KernelInitRFI.getUseVector(F); | ||||
1643 | |||||
1644 | if (KernelInitUV->empty()) | ||||
1645 | continue; | ||||
1646 | |||||
1647 | assert(KernelInitUV->size() == 1 &&(static_cast <bool> (KernelInitUV->size() == 1 && "Expected a single __kmpc_target_init in kernel\n") ? void ( 0) : __assert_fail ("KernelInitUV->size() == 1 && \"Expected a single __kmpc_target_init in kernel\\n\"" , "llvm/lib/Transforms/IPO/OpenMPOpt.cpp", 1648, __extension__ __PRETTY_FUNCTION__)) | ||||
1648 | "Expected a single __kmpc_target_init in kernel\n")(static_cast <bool> (KernelInitUV->size() == 1 && "Expected a single __kmpc_target_init in kernel\n") ? void ( 0) : __assert_fail ("KernelInitUV->size() == 1 && \"Expected a single __kmpc_target_init in kernel\\n\"" , "llvm/lib/Transforms/IPO/OpenMPOpt.cpp", 1648, __extension__ __PRETTY_FUNCTION__)); | ||||
1649 | |||||
1650 | CallInst *KernelInitCI = | ||||
1651 | getCallIfRegularCall(*KernelInitUV->front(), &KernelInitRFI); | ||||
1652 | assert(KernelInitCI &&(static_cast <bool> (KernelInitCI && "Expected a call to __kmpc_target_init in kernel\n" ) ? void (0) : __assert_fail ("KernelInitCI && \"Expected a call to __kmpc_target_init in kernel\\n\"" , "llvm/lib/Transforms/IPO/OpenMPOpt.cpp", 1653, __extension__ __PRETTY_FUNCTION__)) | ||||
1653 | "Expected a call to __kmpc_target_init in kernel\n")(static_cast <bool> (KernelInitCI && "Expected a call to __kmpc_target_init in kernel\n" ) ? void (0) : __assert_fail ("KernelInitCI && \"Expected a call to __kmpc_target_init in kernel\\n\"" , "llvm/lib/Transforms/IPO/OpenMPOpt.cpp", 1653, __extension__ __PRETTY_FUNCTION__)); | ||||
1654 | |||||
1655 | CI->moveAfter(KernelInitCI); | ||||
1656 | } else | ||||
1657 | CI->moveBefore(&*F.getEntryBlock().getFirstInsertionPt()); | ||||
1658 | ReplVal = CI; | ||||
1659 | break; | ||||
1660 | } | ||||
1661 | if (!ReplVal) | ||||
1662 | return false; | ||||
1663 | } | ||||
1664 | |||||
1665 | // If we use a call as a replacement value we need to make sure the ident is | ||||
1666 | // valid at the new location. For now we just pick a global one, either | ||||
1667 | // existing and used by one of the calls, or created from scratch. | ||||
1668 | if (CallBase *CI = dyn_cast<CallBase>(ReplVal)) { | ||||
1669 | if (!CI->arg_empty() && | ||||
1670 | CI->getArgOperand(0)->getType() == OMPInfoCache.OMPBuilder.IdentPtr) { | ||||
1671 | Value *Ident = getCombinedIdentFromCallUsesIn(RFI, F, | ||||
1672 | /* GlobalOnly */ true); | ||||
1673 | CI->setArgOperand(0, Ident); | ||||
1674 | } | ||||
1675 | } | ||||
1676 | |||||
1677 | bool Changed = false; | ||||
1678 | auto ReplaceAndDeleteCB = [&](Use &U, Function &Caller) { | ||||
1679 | CallInst *CI = getCallIfRegularCall(U, &RFI); | ||||
1680 | if (!CI || CI == ReplVal || &F != &Caller) | ||||
1681 | return false; | ||||
1682 | assert(CI->getCaller() == &F && "Unexpected call!")(static_cast <bool> (CI->getCaller() == &F && "Unexpected call!") ? void (0) : __assert_fail ("CI->getCaller() == &F && \"Unexpected call!\"" , "llvm/lib/Transforms/IPO/OpenMPOpt.cpp", 1682, __extension__ __PRETTY_FUNCTION__)); | ||||
1683 | |||||
1684 | auto Remark = [&](OptimizationRemark OR) { | ||||
1685 | return OR << "OpenMP runtime call " | ||||
1686 | << ore::NV("OpenMPOptRuntime", RFI.Name) << " deduplicated."; | ||||
1687 | }; | ||||
1688 | if (CI->getDebugLoc()) | ||||
1689 | emitRemark<OptimizationRemark>(CI, "OMP170", Remark); | ||||
1690 | else | ||||
1691 | emitRemark<OptimizationRemark>(&F, "OMP170", Remark); | ||||
1692 | |||||
1693 | CGUpdater.removeCallSite(*CI); | ||||
1694 | CI->replaceAllUsesWith(ReplVal); | ||||
1695 | CI->eraseFromParent(); | ||||
1696 | ++NumOpenMPRuntimeCallsDeduplicated; | ||||
1697 | Changed = true; | ||||
1698 | return true; | ||||
1699 | }; | ||||
1700 | RFI.foreachUse(SCC, ReplaceAndDeleteCB); | ||||
1701 | |||||
1702 | return Changed; | ||||
1703 | } | ||||
1704 | |||||
1705 | /// Collect arguments that represent the global thread id in \p GTIdArgs. | ||||
1706 | void collectGlobalThreadIdArguments(SmallSetVector<Value *, 16> >IdArgs) { | ||||
1707 | // TODO: Below we basically perform a fixpoint iteration with a pessimistic | ||||
1708 | // initialization. We could define an AbstractAttribute instead and | ||||
1709 | // run the Attributor here once it can be run as an SCC pass. | ||||
1710 | |||||
1711 | // Helper to check the argument \p ArgNo at all call sites of \p F for | ||||
1712 | // a GTId. | ||||
1713 | auto CallArgOpIsGTId = [&](Function &F, unsigned ArgNo, CallInst &RefCI) { | ||||
1714 | if (!F.hasLocalLinkage()) | ||||
1715 | return false; | ||||
1716 | for (Use &U : F.uses()) { | ||||
1717 | if (CallInst *CI = getCallIfRegularCall(U)) { | ||||
1718 | Value *ArgOp = CI->getArgOperand(ArgNo); | ||||
1719 | if (CI == &RefCI || GTIdArgs.count(ArgOp) || | ||||
1720 | getCallIfRegularCall( | ||||
1721 | *ArgOp, &OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num])) | ||||
1722 | continue; | ||||
1723 | } | ||||
1724 | return false; | ||||
1725 | } | ||||
1726 | return true; | ||||
1727 | }; | ||||
1728 | |||||
1729 | // Helper to identify uses of a GTId as GTId arguments. | ||||
1730 | auto AddUserArgs = [&](Value >Id) { | ||||
1731 | for (Use &U : GTId.uses()) | ||||
1732 | if (CallInst *CI = dyn_cast<CallInst>(U.getUser())) | ||||
1733 | if (CI->isArgOperand(&U)) | ||||
1734 | if (Function *Callee = CI->getCalledFunction()) | ||||
1735 | if (CallArgOpIsGTId(*Callee, U.getOperandNo(), *CI)) | ||||
1736 | GTIdArgs.insert(Callee->getArg(U.getOperandNo())); | ||||
1737 | }; | ||||
1738 | |||||
1739 | // The argument users of __kmpc_global_thread_num calls are GTIds. | ||||
1740 | OMPInformationCache::RuntimeFunctionInfo &GlobThreadNumRFI = | ||||
1741 | OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num]; | ||||
1742 | |||||
1743 | GlobThreadNumRFI.foreachUse(SCC, [&](Use &U, Function &F) { | ||||
1744 | if (CallInst *CI = getCallIfRegularCall(U, &GlobThreadNumRFI)) | ||||
1745 | AddUserArgs(*CI); | ||||
1746 | return false; | ||||
1747 | }); | ||||
1748 | |||||
1749 | // Transitively search for more arguments by looking at the users of the | ||||
1750 | // ones we know already. During the search the GTIdArgs vector is extended | ||||
1751 | // so we cannot cache the size nor can we use a range based for. | ||||
1752 | for (unsigned U = 0; U < GTIdArgs.size(); ++U) | ||||
1753 | AddUserArgs(*GTIdArgs[U]); | ||||
1754 | } | ||||
1755 | |||||
1756 | /// Kernel (=GPU) optimizations and utility functions | ||||
1757 | /// | ||||
1758 | ///{{ | ||||
1759 | |||||
1760 | /// Check if \p F is a kernel, hence entry point for target offloading. | ||||
1761 | bool isKernel(Function &F) { return OMPInfoCache.Kernels.count(&F); } | ||||
1762 | |||||
1763 | /// Cache to remember the unique kernel for a function. | ||||
1764 | DenseMap<Function *, Optional<Kernel>> UniqueKernelMap; | ||||
1765 | |||||
1766 | /// Find the unique kernel that will execute \p F, if any. | ||||
1767 | Kernel getUniqueKernelFor(Function &F); | ||||
1768 | |||||
1769 | /// Find the unique kernel that will execute \p I, if any. | ||||
1770 | Kernel getUniqueKernelFor(Instruction &I) { | ||||
1771 | return getUniqueKernelFor(*I.getFunction()); | ||||
1772 | } | ||||
1773 | |||||
1774 | /// Rewrite the device (=GPU) code state machine create in non-SPMD mode in | ||||
1775 | /// the cases we can avoid taking the address of a function. | ||||
1776 | bool rewriteDeviceCodeStateMachine(); | ||||
1777 | |||||
1778 | /// | ||||
1779 | ///}} | ||||
1780 | |||||
1781 | /// Emit a remark generically | ||||
1782 | /// | ||||
1783 | /// This template function can be used to generically emit a remark. The | ||||
1784 | /// RemarkKind should be one of the following: | ||||
1785 | /// - OptimizationRemark to indicate a successful optimization attempt | ||||
1786 | /// - OptimizationRemarkMissed to report a failed optimization attempt | ||||
1787 | /// - OptimizationRemarkAnalysis to provide additional information about an | ||||
1788 | /// optimization attempt | ||||
1789 | /// | ||||
1790 | /// The remark is built using a callback function provided by the caller that | ||||
1791 | /// takes a RemarkKind as input and returns a RemarkKind. | ||||
1792 | template <typename RemarkKind, typename RemarkCallBack> | ||||
1793 | void emitRemark(Instruction *I, StringRef RemarkName, | ||||
1794 | RemarkCallBack &&RemarkCB) const { | ||||
1795 | Function *F = I->getParent()->getParent(); | ||||
1796 | auto &ORE = OREGetter(F); | ||||
1797 | |||||
1798 | if (RemarkName.startswith("OMP")) | ||||
1799 | ORE.emit([&]() { | ||||
1800 | return RemarkCB(RemarkKind(DEBUG_TYPE"openmp-opt", RemarkName, I)) | ||||
1801 | << " [" << RemarkName << "]"; | ||||
1802 | }); | ||||
1803 | else | ||||
1804 | ORE.emit( | ||||
1805 | [&]() { return RemarkCB(RemarkKind(DEBUG_TYPE"openmp-opt", RemarkName, I)); }); | ||||
1806 | } | ||||
1807 | |||||
1808 | /// Emit a remark on a function. | ||||
1809 | template <typename RemarkKind, typename RemarkCallBack> | ||||
1810 | void emitRemark(Function *F, StringRef RemarkName, | ||||
1811 | RemarkCallBack &&RemarkCB) const { | ||||
1812 | auto &ORE = OREGetter(F); | ||||
1813 | |||||
1814 | if (RemarkName.startswith("OMP")) | ||||
1815 | ORE.emit([&]() { | ||||
1816 | return RemarkCB(RemarkKind(DEBUG_TYPE"openmp-opt", RemarkName, F)) | ||||
1817 | << " [" << RemarkName << "]"; | ||||
1818 | }); | ||||
1819 | else | ||||
1820 | ORE.emit( | ||||
1821 | [&]() { return RemarkCB(RemarkKind(DEBUG_TYPE"openmp-opt", RemarkName, F)); }); | ||||
1822 | } | ||||
1823 | |||||
1824 | /// RAII struct to temporarily change an RTL function's linkage to external. | ||||
1825 | /// This prevents it from being mistakenly removed by other optimizations. | ||||
1826 | struct ExternalizationRAII { | ||||
1827 | ExternalizationRAII(OMPInformationCache &OMPInfoCache, | ||||
1828 | RuntimeFunction RFKind) | ||||
1829 | : Declaration(OMPInfoCache.RFIs[RFKind].Declaration) { | ||||
1830 | if (!Declaration) | ||||
1831 | return; | ||||
1832 | |||||
1833 | LinkageType = Declaration->getLinkage(); | ||||
1834 | Declaration->setLinkage(GlobalValue::ExternalLinkage); | ||||
1835 | } | ||||
1836 | |||||
1837 | ~ExternalizationRAII() { | ||||
1838 | if (!Declaration) | ||||
1839 | return; | ||||
1840 | |||||
1841 | Declaration->setLinkage(LinkageType); | ||||
1842 | } | ||||
1843 | |||||
1844 | Function *Declaration; | ||||
1845 | GlobalValue::LinkageTypes LinkageType; | ||||
1846 | }; | ||||
1847 | |||||
1848 | /// The underlying module. | ||||
1849 | Module &M; | ||||
1850 | |||||
1851 | /// The SCC we are operating on. | ||||
1852 | SmallVectorImpl<Function *> &SCC; | ||||
1853 | |||||
1854 | /// Callback to update the call graph, the first argument is a removed call, | ||||
1855 | /// the second an optional replacement call. | ||||
1856 | CallGraphUpdater &CGUpdater; | ||||
1857 | |||||
1858 | /// Callback to get an OptimizationRemarkEmitter from a Function * | ||||
1859 | OptimizationRemarkGetter OREGetter; | ||||
1860 | |||||
1861 | /// OpenMP-specific information cache. Also Used for Attributor runs. | ||||
1862 | OMPInformationCache &OMPInfoCache; | ||||
1863 | |||||
1864 | /// Attributor instance. | ||||
1865 | Attributor &A; | ||||
1866 | |||||
1867 | /// Helper function to run Attributor on SCC. | ||||
1868 | bool runAttributor(bool IsModulePass) { | ||||
1869 | if (SCC.empty()) | ||||
1870 | return false; | ||||
1871 | |||||
1872 | // Temporarily make these function have external linkage so the Attributor | ||||
1873 | // doesn't remove them when we try to look them up later. | ||||
1874 | ExternalizationRAII Parallel(OMPInfoCache, OMPRTL___kmpc_kernel_parallel); | ||||
1875 | ExternalizationRAII EndParallel(OMPInfoCache, | ||||
1876 | OMPRTL___kmpc_kernel_end_parallel); | ||||
1877 | ExternalizationRAII BarrierSPMD(OMPInfoCache, | ||||
1878 | OMPRTL___kmpc_barrier_simple_spmd); | ||||
1879 | ExternalizationRAII BarrierGeneric(OMPInfoCache, | ||||
1880 | OMPRTL___kmpc_barrier_simple_generic); | ||||
1881 | ExternalizationRAII ThreadId(OMPInfoCache, | ||||
1882 | OMPRTL___kmpc_get_hardware_thread_id_in_block); | ||||
1883 | ExternalizationRAII WarpSize(OMPInfoCache, OMPRTL___kmpc_get_warp_size); | ||||
1884 | |||||
1885 | registerAAs(IsModulePass); | ||||
1886 | |||||
1887 | ChangeStatus Changed = A.run(); | ||||
1888 | |||||
1889 | LLVM_DEBUG(dbgs() << "[Attributor] Done with " << SCC.size()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { dbgs() << "[Attributor] Done with " << SCC.size() << " functions, result: " << Changed << ".\n"; } } while (false) | ||||
1890 | << " functions, result: " << Changed << ".\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { dbgs() << "[Attributor] Done with " << SCC.size() << " functions, result: " << Changed << ".\n"; } } while (false); | ||||
1891 | |||||
1892 | return Changed == ChangeStatus::CHANGED; | ||||
1893 | } | ||||
1894 | |||||
1895 | void registerFoldRuntimeCall(RuntimeFunction RF); | ||||
1896 | |||||
1897 | /// Populate the Attributor with abstract attribute opportunities in the | ||||
1898 | /// function. | ||||
1899 | void registerAAs(bool IsModulePass); | ||||
1900 | }; | ||||
1901 | |||||
1902 | Kernel OpenMPOpt::getUniqueKernelFor(Function &F) { | ||||
1903 | if (!OMPInfoCache.ModuleSlice.count(&F)) | ||||
1904 | return nullptr; | ||||
1905 | |||||
1906 | // Use a scope to keep the lifetime of the CachedKernel short. | ||||
1907 | { | ||||
1908 | Optional<Kernel> &CachedKernel = UniqueKernelMap[&F]; | ||||
1909 | if (CachedKernel) | ||||
1910 | return *CachedKernel; | ||||
1911 | |||||
1912 | // TODO: We should use an AA to create an (optimistic and callback | ||||
1913 | // call-aware) call graph. For now we stick to simple patterns that | ||||
1914 | // are less powerful, basically the worst fixpoint. | ||||
1915 | if (isKernel(F)) { | ||||
1916 | CachedKernel = Kernel(&F); | ||||
1917 | return *CachedKernel; | ||||
1918 | } | ||||
1919 | |||||
1920 | CachedKernel = nullptr; | ||||
1921 | if (!F.hasLocalLinkage()) { | ||||
1922 | |||||
1923 | // See https://openmp.llvm.org/remarks/OptimizationRemarks.html | ||||
1924 | auto Remark = [&](OptimizationRemarkAnalysis ORA) { | ||||
1925 | return ORA << "Potentially unknown OpenMP target region caller."; | ||||
1926 | }; | ||||
1927 | emitRemark<OptimizationRemarkAnalysis>(&F, "OMP100", Remark); | ||||
1928 | |||||
1929 | return nullptr; | ||||
1930 | } | ||||
1931 | } | ||||
1932 | |||||
1933 | auto GetUniqueKernelForUse = [&](const Use &U) -> Kernel { | ||||
1934 | if (auto *Cmp = dyn_cast<ICmpInst>(U.getUser())) { | ||||
1935 | // Allow use in equality comparisons. | ||||
1936 | if (Cmp->isEquality()) | ||||
1937 | return getUniqueKernelFor(*Cmp); | ||||
1938 | return nullptr; | ||||
1939 | } | ||||
1940 | if (auto *CB = dyn_cast<CallBase>(U.getUser())) { | ||||
1941 | // Allow direct calls. | ||||
1942 | if (CB->isCallee(&U)) | ||||
1943 | return getUniqueKernelFor(*CB); | ||||
1944 | |||||
1945 | OMPInformationCache::RuntimeFunctionInfo &KernelParallelRFI = | ||||
1946 | OMPInfoCache.RFIs[OMPRTL___kmpc_parallel_51]; | ||||
1947 | // Allow the use in __kmpc_parallel_51 calls. | ||||
1948 | if (OpenMPOpt::getCallIfRegularCall(*U.getUser(), &KernelParallelRFI)) | ||||
1949 | return getUniqueKernelFor(*CB); | ||||
1950 | return nullptr; | ||||
1951 | } | ||||
1952 | // Disallow every other use. | ||||
1953 | return nullptr; | ||||
1954 | }; | ||||
1955 | |||||
1956 | // TODO: In the future we want to track more than just a unique kernel. | ||||
1957 | SmallPtrSet<Kernel, 2> PotentialKernels; | ||||
1958 | OMPInformationCache::foreachUse(F, [&](const Use &U) { | ||||
1959 | PotentialKernels.insert(GetUniqueKernelForUse(U)); | ||||
1960 | }); | ||||
1961 | |||||
1962 | Kernel K = nullptr; | ||||
1963 | if (PotentialKernels.size() == 1) | ||||
1964 | K = *PotentialKernels.begin(); | ||||
1965 | |||||
1966 | // Cache the result. | ||||
1967 | UniqueKernelMap[&F] = K; | ||||
1968 | |||||
1969 | return K; | ||||
1970 | } | ||||
1971 | |||||
1972 | bool OpenMPOpt::rewriteDeviceCodeStateMachine() { | ||||
1973 | OMPInformationCache::RuntimeFunctionInfo &KernelParallelRFI = | ||||
1974 | OMPInfoCache.RFIs[OMPRTL___kmpc_parallel_51]; | ||||
1975 | |||||
1976 | bool Changed = false; | ||||
1977 | if (!KernelParallelRFI) | ||||
1978 | return Changed; | ||||
1979 | |||||
1980 | // If we have disabled state machine changes, exit | ||||
1981 | if (DisableOpenMPOptStateMachineRewrite) | ||||
1982 | return Changed; | ||||
1983 | |||||
1984 | for (Function *F : SCC) { | ||||
1985 | |||||
1986 | // Check if the function is a use in a __kmpc_parallel_51 call at | ||||
1987 | // all. | ||||
1988 | bool UnknownUse = false; | ||||
1989 | bool KernelParallelUse = false; | ||||
1990 | unsigned NumDirectCalls = 0; | ||||
1991 | |||||
1992 | SmallVector<Use *, 2> ToBeReplacedStateMachineUses; | ||||
1993 | OMPInformationCache::foreachUse(*F, [&](Use &U) { | ||||
1994 | if (auto *CB = dyn_cast<CallBase>(U.getUser())) | ||||
1995 | if (CB->isCallee(&U)) { | ||||
1996 | ++NumDirectCalls; | ||||
1997 | return; | ||||
1998 | } | ||||
1999 | |||||
2000 | if (isa<ICmpInst>(U.getUser())) { | ||||
2001 | ToBeReplacedStateMachineUses.push_back(&U); | ||||
2002 | return; | ||||
2003 | } | ||||
2004 | |||||
2005 | // Find wrapper functions that represent parallel kernels. | ||||
2006 | CallInst *CI = | ||||
2007 | OpenMPOpt::getCallIfRegularCall(*U.getUser(), &KernelParallelRFI); | ||||
2008 | const unsigned int WrapperFunctionArgNo = 6; | ||||
2009 | if (!KernelParallelUse && CI && | ||||
2010 | CI->getArgOperandNo(&U) == WrapperFunctionArgNo) { | ||||
2011 | KernelParallelUse = true; | ||||
2012 | ToBeReplacedStateMachineUses.push_back(&U); | ||||
2013 | return; | ||||
2014 | } | ||||
2015 | UnknownUse = true; | ||||
2016 | }); | ||||
2017 | |||||
2018 | // Do not emit a remark if we haven't seen a __kmpc_parallel_51 | ||||
2019 | // use. | ||||
2020 | if (!KernelParallelUse) | ||||
2021 | continue; | ||||
2022 | |||||
2023 | // If this ever hits, we should investigate. | ||||
2024 | // TODO: Checking the number of uses is not a necessary restriction and | ||||
2025 | // should be lifted. | ||||
2026 | if (UnknownUse || NumDirectCalls != 1 || | ||||
2027 | ToBeReplacedStateMachineUses.size() > 2) { | ||||
2028 | auto Remark = [&](OptimizationRemarkAnalysis ORA) { | ||||
2029 | return ORA << "Parallel region is used in " | ||||
2030 | << (UnknownUse ? "unknown" : "unexpected") | ||||
2031 | << " ways. Will not attempt to rewrite the state machine."; | ||||
2032 | }; | ||||
2033 | emitRemark<OptimizationRemarkAnalysis>(F, "OMP101", Remark); | ||||
2034 | continue; | ||||
2035 | } | ||||
2036 | |||||
2037 | // Even if we have __kmpc_parallel_51 calls, we (for now) give | ||||
2038 | // up if the function is not called from a unique kernel. | ||||
2039 | Kernel K = getUniqueKernelFor(*F); | ||||
2040 | if (!K) { | ||||
2041 | auto Remark = [&](OptimizationRemarkAnalysis ORA) { | ||||
2042 | return ORA << "Parallel region is not called from a unique kernel. " | ||||
2043 | "Will not attempt to rewrite the state machine."; | ||||
2044 | }; | ||||
2045 | emitRemark<OptimizationRemarkAnalysis>(F, "OMP102", Remark); | ||||
2046 | continue; | ||||
2047 | } | ||||
2048 | |||||
2049 | // We now know F is a parallel body function called only from the kernel K. | ||||
2050 | // We also identified the state machine uses in which we replace the | ||||
2051 | // function pointer by a new global symbol for identification purposes. This | ||||
2052 | // ensures only direct calls to the function are left. | ||||
2053 | |||||
2054 | Module &M = *F->getParent(); | ||||
2055 | Type *Int8Ty = Type::getInt8Ty(M.getContext()); | ||||
2056 | |||||
2057 | auto *ID = new GlobalVariable( | ||||
2058 | M, Int8Ty, /* isConstant */ true, GlobalValue::PrivateLinkage, | ||||
2059 | UndefValue::get(Int8Ty), F->getName() + ".ID"); | ||||
2060 | |||||
2061 | for (Use *U : ToBeReplacedStateMachineUses) | ||||
2062 | U->set(ConstantExpr::getPointerBitCastOrAddrSpaceCast( | ||||
2063 | ID, U->get()->getType())); | ||||
2064 | |||||
2065 | ++NumOpenMPParallelRegionsReplacedInGPUStateMachine; | ||||
2066 | |||||
2067 | Changed = true; | ||||
2068 | } | ||||
2069 | |||||
2070 | return Changed; | ||||
2071 | } | ||||
2072 | |||||
2073 | /// Abstract Attribute for tracking ICV values. | ||||
2074 | struct AAICVTracker : public StateWrapper<BooleanState, AbstractAttribute> { | ||||
2075 | using Base = StateWrapper<BooleanState, AbstractAttribute>; | ||||
2076 | AAICVTracker(const IRPosition &IRP, Attributor &A) : Base(IRP) {} | ||||
2077 | |||||
2078 | void initialize(Attributor &A) override { | ||||
2079 | Function *F = getAnchorScope(); | ||||
2080 | if (!F || !A.isFunctionIPOAmendable(*F)) | ||||
2081 | indicatePessimisticFixpoint(); | ||||
2082 | } | ||||
2083 | |||||
2084 | /// Returns true if value is assumed to be tracked. | ||||
2085 | bool isAssumedTracked() const { return getAssumed(); } | ||||
2086 | |||||
2087 | /// Returns true if value is known to be tracked. | ||||
2088 | bool isKnownTracked() const { return getAssumed(); } | ||||
2089 | |||||
2090 | /// Create an abstract attribute biew for the position \p IRP. | ||||
2091 | static AAICVTracker &createForPosition(const IRPosition &IRP, Attributor &A); | ||||
2092 | |||||
2093 | /// Return the value with which \p I can be replaced for specific \p ICV. | ||||
2094 | virtual Optional<Value *> getReplacementValue(InternalControlVar ICV, | ||||
2095 | const Instruction *I, | ||||
2096 | Attributor &A) const { | ||||
2097 | return None; | ||||
2098 | } | ||||
2099 | |||||
2100 | /// Return an assumed unique ICV value if a single candidate is found. If | ||||
2101 | /// there cannot be one, return a nullptr. If it is not clear yet, return the | ||||
2102 | /// Optional::NoneType. | ||||
2103 | virtual Optional<Value *> | ||||
2104 | getUniqueReplacementValue(InternalControlVar ICV) const = 0; | ||||
2105 | |||||
2106 | // Currently only nthreads is being tracked. | ||||
2107 | // this array will only grow with time. | ||||
2108 | InternalControlVar TrackableICVs[1] = {ICV_nthreads}; | ||||
2109 | |||||
2110 | /// See AbstractAttribute::getName() | ||||
2111 | const std::string getName() const override { return "AAICVTracker"; } | ||||
2112 | |||||
2113 | /// See AbstractAttribute::getIdAddr() | ||||
2114 | const char *getIdAddr() const override { return &ID; } | ||||
2115 | |||||
2116 | /// This function should return true if the type of the \p AA is AAICVTracker | ||||
2117 | static bool classof(const AbstractAttribute *AA) { | ||||
2118 | return (AA->getIdAddr() == &ID); | ||||
2119 | } | ||||
2120 | |||||
2121 | static const char ID; | ||||
2122 | }; | ||||
2123 | |||||
2124 | struct AAICVTrackerFunction : public AAICVTracker { | ||||
2125 | AAICVTrackerFunction(const IRPosition &IRP, Attributor &A) | ||||
2126 | : AAICVTracker(IRP, A) {} | ||||
2127 | |||||
2128 | // FIXME: come up with better string. | ||||
2129 | const std::string getAsStr() const override { return "ICVTrackerFunction"; } | ||||
2130 | |||||
2131 | // FIXME: come up with some stats. | ||||
2132 | void trackStatistics() const override {} | ||||
2133 | |||||
2134 | /// We don't manifest anything for this AA. | ||||
2135 | ChangeStatus manifest(Attributor &A) override { | ||||
2136 | return ChangeStatus::UNCHANGED; | ||||
2137 | } | ||||
2138 | |||||
2139 | // Map of ICV to their values at specific program point. | ||||
2140 | EnumeratedArray<DenseMap<Instruction *, Value *>, InternalControlVar, | ||||
2141 | InternalControlVar::ICV___last> | ||||
2142 | ICVReplacementValuesMap; | ||||
2143 | |||||
2144 | ChangeStatus updateImpl(Attributor &A) override { | ||||
2145 | ChangeStatus HasChanged = ChangeStatus::UNCHANGED; | ||||
2146 | |||||
2147 | Function *F = getAnchorScope(); | ||||
2148 | |||||
2149 | auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); | ||||
2150 | |||||
2151 | for (InternalControlVar ICV : TrackableICVs) { | ||||
2152 | auto &SetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Setter]; | ||||
2153 | |||||
2154 | auto &ValuesMap = ICVReplacementValuesMap[ICV]; | ||||
2155 | auto TrackValues = [&](Use &U, Function &) { | ||||
2156 | CallInst *CI = OpenMPOpt::getCallIfRegularCall(U); | ||||
2157 | if (!CI) | ||||
2158 | return false; | ||||
2159 | |||||
2160 | // FIXME: handle setters with more that 1 arguments. | ||||
2161 | /// Track new value. | ||||
2162 | if (ValuesMap.insert(std::make_pair(CI, CI->getArgOperand(0))).second) | ||||
2163 | HasChanged = ChangeStatus::CHANGED; | ||||
2164 | |||||
2165 | return false; | ||||
2166 | }; | ||||
2167 | |||||
2168 | auto CallCheck = [&](Instruction &I) { | ||||
2169 | Optional<Value *> ReplVal = getValueForCall(A, &I, ICV); | ||||
2170 | if (ReplVal.hasValue() && | ||||
2171 | ValuesMap.insert(std::make_pair(&I, *ReplVal)).second) | ||||
2172 | HasChanged = ChangeStatus::CHANGED; | ||||
2173 | |||||
2174 | return true; | ||||
2175 | }; | ||||
2176 | |||||
2177 | // Track all changes of an ICV. | ||||
2178 | SetterRFI.foreachUse(TrackValues, F); | ||||
2179 | |||||
2180 | bool UsedAssumedInformation = false; | ||||
2181 | A.checkForAllInstructions(CallCheck, *this, {Instruction::Call}, | ||||
2182 | UsedAssumedInformation, | ||||
2183 | /* CheckBBLivenessOnly */ true); | ||||
2184 | |||||
2185 | /// TODO: Figure out a way to avoid adding entry in | ||||
2186 | /// ICVReplacementValuesMap | ||||
2187 | Instruction *Entry = &F->getEntryBlock().front(); | ||||
2188 | if (HasChanged == ChangeStatus::CHANGED && !ValuesMap.count(Entry)) | ||||
2189 | ValuesMap.insert(std::make_pair(Entry, nullptr)); | ||||
2190 | } | ||||
2191 | |||||
2192 | return HasChanged; | ||||
2193 | } | ||||
2194 | |||||
2195 | /// Hepler to check if \p I is a call and get the value for it if it is | ||||
2196 | /// unique. | ||||
2197 | Optional<Value *> getValueForCall(Attributor &A, const Instruction *I, | ||||
2198 | InternalControlVar &ICV) const { | ||||
2199 | |||||
2200 | const auto *CB = dyn_cast<CallBase>(I); | ||||
2201 | if (!CB || CB->hasFnAttr("no_openmp") || | ||||
2202 | CB->hasFnAttr("no_openmp_routines")) | ||||
2203 | return None; | ||||
2204 | |||||
2205 | auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); | ||||
2206 | auto &GetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Getter]; | ||||
2207 | auto &SetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Setter]; | ||||
2208 | Function *CalledFunction = CB->getCalledFunction(); | ||||
2209 | |||||
2210 | // Indirect call, assume ICV changes. | ||||
2211 | if (CalledFunction == nullptr) | ||||
2212 | return nullptr; | ||||
2213 | if (CalledFunction == GetterRFI.Declaration) | ||||
2214 | return None; | ||||
2215 | if (CalledFunction == SetterRFI.Declaration) { | ||||
2216 | if (ICVReplacementValuesMap[ICV].count(I)) | ||||
2217 | return ICVReplacementValuesMap[ICV].lookup(I); | ||||
2218 | |||||
2219 | return nullptr; | ||||
2220 | } | ||||
2221 | |||||
2222 | // Since we don't know, assume it changes the ICV. | ||||
2223 | if (CalledFunction->isDeclaration()) | ||||
2224 | return nullptr; | ||||
2225 | |||||
2226 | const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>( | ||||
2227 | *this, IRPosition::callsite_returned(*CB), DepClassTy::REQUIRED); | ||||
2228 | |||||
2229 | if (ICVTrackingAA.isAssumedTracked()) | ||||
2230 | return ICVTrackingAA.getUniqueReplacementValue(ICV); | ||||
2231 | |||||
2232 | // If we don't know, assume it changes. | ||||
2233 | return nullptr; | ||||
2234 | } | ||||
2235 | |||||
2236 | // We don't check unique value for a function, so return None. | ||||
2237 | Optional<Value *> | ||||
2238 | getUniqueReplacementValue(InternalControlVar ICV) const override { | ||||
2239 | return None; | ||||
2240 | } | ||||
2241 | |||||
2242 | /// Return the value with which \p I can be replaced for specific \p ICV. | ||||
2243 | Optional<Value *> getReplacementValue(InternalControlVar ICV, | ||||
2244 | const Instruction *I, | ||||
2245 | Attributor &A) const override { | ||||
2246 | const auto &ValuesMap = ICVReplacementValuesMap[ICV]; | ||||
2247 | if (ValuesMap.count(I)) | ||||
2248 | return ValuesMap.lookup(I); | ||||
2249 | |||||
2250 | SmallVector<const Instruction *, 16> Worklist; | ||||
2251 | SmallPtrSet<const Instruction *, 16> Visited; | ||||
2252 | Worklist.push_back(I); | ||||
2253 | |||||
2254 | Optional<Value *> ReplVal; | ||||
2255 | |||||
2256 | while (!Worklist.empty()) { | ||||
2257 | const Instruction *CurrInst = Worklist.pop_back_val(); | ||||
2258 | if (!Visited.insert(CurrInst).second) | ||||
2259 | continue; | ||||
2260 | |||||
2261 | const BasicBlock *CurrBB = CurrInst->getParent(); | ||||
2262 | |||||
2263 | // Go up and look for all potential setters/calls that might change the | ||||
2264 | // ICV. | ||||
2265 | while ((CurrInst = CurrInst->getPrevNode())) { | ||||
2266 | if (ValuesMap.count(CurrInst)) { | ||||
2267 | Optional<Value *> NewReplVal = ValuesMap.lookup(CurrInst); | ||||
2268 | // Unknown value, track new. | ||||
2269 | if (!ReplVal.hasValue()) { | ||||
2270 | ReplVal = NewReplVal; | ||||
2271 | break; | ||||
2272 | } | ||||
2273 | |||||
2274 | // If we found a new value, we can't know the icv value anymore. | ||||
2275 | if (NewReplVal.hasValue()) | ||||
2276 | if (ReplVal != NewReplVal) | ||||
2277 | return nullptr; | ||||
2278 | |||||
2279 | break; | ||||
2280 | } | ||||
2281 | |||||
2282 | Optional<Value *> NewReplVal = getValueForCall(A, CurrInst, ICV); | ||||
2283 | if (!NewReplVal.hasValue()) | ||||
2284 | continue; | ||||
2285 | |||||
2286 | // Unknown value, track new. | ||||
2287 | if (!ReplVal.hasValue()) { | ||||
2288 | ReplVal = NewReplVal; | ||||
2289 | break; | ||||
2290 | } | ||||
2291 | |||||
2292 | // if (NewReplVal.hasValue()) | ||||
2293 | // We found a new value, we can't know the icv value anymore. | ||||
2294 | if (ReplVal != NewReplVal) | ||||
2295 | return nullptr; | ||||
2296 | } | ||||
2297 | |||||
2298 | // If we are in the same BB and we have a value, we are done. | ||||
2299 | if (CurrBB == I->getParent() && ReplVal.hasValue()) | ||||
2300 | return ReplVal; | ||||
2301 | |||||
2302 | // Go through all predecessors and add terminators for analysis. | ||||
2303 | for (const BasicBlock *Pred : predecessors(CurrBB)) | ||||
2304 | if (const Instruction *Terminator = Pred->getTerminator()) | ||||
2305 | Worklist.push_back(Terminator); | ||||
2306 | } | ||||
2307 | |||||
2308 | return ReplVal; | ||||
2309 | } | ||||
2310 | }; | ||||
2311 | |||||
2312 | struct AAICVTrackerFunctionReturned : AAICVTracker { | ||||
2313 | AAICVTrackerFunctionReturned(const IRPosition &IRP, Attributor &A) | ||||
2314 | : AAICVTracker(IRP, A) {} | ||||
2315 | |||||
2316 | // FIXME: come up with better string. | ||||
2317 | const std::string getAsStr() const override { | ||||
2318 | return "ICVTrackerFunctionReturned"; | ||||
2319 | } | ||||
2320 | |||||
2321 | // FIXME: come up with some stats. | ||||
2322 | void trackStatistics() const override {} | ||||
2323 | |||||
2324 | /// We don't manifest anything for this AA. | ||||
2325 | ChangeStatus manifest(Attributor &A) override { | ||||
2326 | return ChangeStatus::UNCHANGED; | ||||
2327 | } | ||||
2328 | |||||
2329 | // Map of ICV to their values at specific program point. | ||||
2330 | EnumeratedArray<Optional<Value *>, InternalControlVar, | ||||
2331 | InternalControlVar::ICV___last> | ||||
2332 | ICVReplacementValuesMap; | ||||
2333 | |||||
2334 | /// Return the value with which \p I can be replaced for specific \p ICV. | ||||
2335 | Optional<Value *> | ||||
2336 | getUniqueReplacementValue(InternalControlVar ICV) const override { | ||||
2337 | return ICVReplacementValuesMap[ICV]; | ||||
2338 | } | ||||
2339 | |||||
2340 | ChangeStatus updateImpl(Attributor &A) override { | ||||
2341 | ChangeStatus Changed = ChangeStatus::UNCHANGED; | ||||
2342 | const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>( | ||||
2343 | *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED); | ||||
2344 | |||||
2345 | if (!ICVTrackingAA.isAssumedTracked()) | ||||
2346 | return indicatePessimisticFixpoint(); | ||||
2347 | |||||
2348 | for (InternalControlVar ICV : TrackableICVs) { | ||||
2349 | Optional<Value *> &ReplVal = ICVReplacementValuesMap[ICV]; | ||||
2350 | Optional<Value *> UniqueICVValue; | ||||
2351 | |||||
2352 | auto CheckReturnInst = [&](Instruction &I) { | ||||
2353 | Optional<Value *> NewReplVal = | ||||
2354 | ICVTrackingAA.getReplacementValue(ICV, &I, A); | ||||
2355 | |||||
2356 | // If we found a second ICV value there is no unique returned value. | ||||
2357 | if (UniqueICVValue.hasValue() && UniqueICVValue != NewReplVal) | ||||
2358 | return false; | ||||
2359 | |||||
2360 | UniqueICVValue = NewReplVal; | ||||
2361 | |||||
2362 | return true; | ||||
2363 | }; | ||||
2364 | |||||
2365 | bool UsedAssumedInformation = false; | ||||
2366 | if (!A.checkForAllInstructions(CheckReturnInst, *this, {Instruction::Ret}, | ||||
2367 | UsedAssumedInformation, | ||||
2368 | /* CheckBBLivenessOnly */ true)) | ||||
2369 | UniqueICVValue = nullptr; | ||||
2370 | |||||
2371 | if (UniqueICVValue == ReplVal) | ||||
2372 | continue; | ||||
2373 | |||||
2374 | ReplVal = UniqueICVValue; | ||||
2375 | Changed = ChangeStatus::CHANGED; | ||||
2376 | } | ||||
2377 | |||||
2378 | return Changed; | ||||
2379 | } | ||||
2380 | }; | ||||
2381 | |||||
2382 | struct AAICVTrackerCallSite : AAICVTracker { | ||||
2383 | AAICVTrackerCallSite(const IRPosition &IRP, Attributor &A) | ||||
2384 | : AAICVTracker(IRP, A) {} | ||||
2385 | |||||
2386 | void initialize(Attributor &A) override { | ||||
2387 | Function *F = getAnchorScope(); | ||||
2388 | if (!F || !A.isFunctionIPOAmendable(*F)) | ||||
2389 | indicatePessimisticFixpoint(); | ||||
2390 | |||||
2391 | // We only initialize this AA for getters, so we need to know which ICV it | ||||
2392 | // gets. | ||||
2393 | auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); | ||||
2394 | for (InternalControlVar ICV : TrackableICVs) { | ||||
2395 | auto ICVInfo = OMPInfoCache.ICVs[ICV]; | ||||
2396 | auto &Getter = OMPInfoCache.RFIs[ICVInfo.Getter]; | ||||
2397 | if (Getter.Declaration == getAssociatedFunction()) { | ||||
2398 | AssociatedICV = ICVInfo.Kind; | ||||
2399 | return; | ||||
2400 | } | ||||
2401 | } | ||||
2402 | |||||
2403 | /// Unknown ICV. | ||||
2404 | indicatePessimisticFixpoint(); | ||||
2405 | } | ||||
2406 | |||||
2407 | ChangeStatus manifest(Attributor &A) override { | ||||
2408 | if (!ReplVal.hasValue() || !ReplVal.getValue()) | ||||
2409 | return ChangeStatus::UNCHANGED; | ||||
2410 | |||||
2411 | A.changeValueAfterManifest(*getCtxI(), **ReplVal); | ||||
2412 | A.deleteAfterManifest(*getCtxI()); | ||||
2413 | |||||
2414 | return ChangeStatus::CHANGED; | ||||
2415 | } | ||||
2416 | |||||
2417 | // FIXME: come up with better string. | ||||
2418 | const std::string getAsStr() const override { return "ICVTrackerCallSite"; } | ||||
2419 | |||||
2420 | // FIXME: come up with some stats. | ||||
2421 | void trackStatistics() const override {} | ||||
2422 | |||||
2423 | InternalControlVar AssociatedICV; | ||||
2424 | Optional<Value *> ReplVal; | ||||
2425 | |||||
2426 | ChangeStatus updateImpl(Attributor &A) override { | ||||
2427 | const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>( | ||||
2428 | *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED); | ||||
2429 | |||||
2430 | // We don't have any information, so we assume it changes the ICV. | ||||
2431 | if (!ICVTrackingAA.isAssumedTracked()) | ||||
2432 | return indicatePessimisticFixpoint(); | ||||
2433 | |||||
2434 | Optional<Value *> NewReplVal = | ||||
2435 | ICVTrackingAA.getReplacementValue(AssociatedICV, getCtxI(), A); | ||||
2436 | |||||
2437 | if (ReplVal == NewReplVal) | ||||
2438 | return ChangeStatus::UNCHANGED; | ||||
2439 | |||||
2440 | ReplVal = NewReplVal; | ||||
2441 | return ChangeStatus::CHANGED; | ||||
2442 | } | ||||
2443 | |||||
2444 | // Return the value with which associated value can be replaced for specific | ||||
2445 | // \p ICV. | ||||
2446 | Optional<Value *> | ||||
2447 | getUniqueReplacementValue(InternalControlVar ICV) const override { | ||||
2448 | return ReplVal; | ||||
2449 | } | ||||
2450 | }; | ||||
2451 | |||||
2452 | struct AAICVTrackerCallSiteReturned : AAICVTracker { | ||||
2453 | AAICVTrackerCallSiteReturned(const IRPosition &IRP, Attributor &A) | ||||
2454 | : AAICVTracker(IRP, A) {} | ||||
2455 | |||||
2456 | // FIXME: come up with better string. | ||||
2457 | const std::string getAsStr() const override { | ||||
2458 | return "ICVTrackerCallSiteReturned"; | ||||
2459 | } | ||||
2460 | |||||
2461 | // FIXME: come up with some stats. | ||||
2462 | void trackStatistics() const override {} | ||||
2463 | |||||
2464 | /// We don't manifest anything for this AA. | ||||
2465 | ChangeStatus manifest(Attributor &A) override { | ||||
2466 | return ChangeStatus::UNCHANGED; | ||||
2467 | } | ||||
2468 | |||||
2469 | // Map of ICV to their values at specific program point. | ||||
2470 | EnumeratedArray<Optional<Value *>, InternalControlVar, | ||||
2471 | InternalControlVar::ICV___last> | ||||
2472 | ICVReplacementValuesMap; | ||||
2473 | |||||
2474 | /// Return the value with which associated value can be replaced for specific | ||||
2475 | /// \p ICV. | ||||
2476 | Optional<Value *> | ||||
2477 | getUniqueReplacementValue(InternalControlVar ICV) const override { | ||||
2478 | return ICVReplacementValuesMap[ICV]; | ||||
2479 | } | ||||
2480 | |||||
2481 | ChangeStatus updateImpl(Attributor &A) override { | ||||
2482 | ChangeStatus Changed = ChangeStatus::UNCHANGED; | ||||
2483 | const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>( | ||||
2484 | *this, IRPosition::returned(*getAssociatedFunction()), | ||||
2485 | DepClassTy::REQUIRED); | ||||
2486 | |||||
2487 | // We don't have any information, so we assume it changes the ICV. | ||||
2488 | if (!ICVTrackingAA.isAssumedTracked()) | ||||
2489 | return indicatePessimisticFixpoint(); | ||||
2490 | |||||
2491 | for (InternalControlVar ICV : TrackableICVs) { | ||||
2492 | Optional<Value *> &ReplVal = ICVReplacementValuesMap[ICV]; | ||||
2493 | Optional<Value *> NewReplVal = | ||||
2494 | ICVTrackingAA.getUniqueReplacementValue(ICV); | ||||
2495 | |||||
2496 | if (ReplVal == NewReplVal) | ||||
2497 | continue; | ||||
2498 | |||||
2499 | ReplVal = NewReplVal; | ||||
2500 | Changed = ChangeStatus::CHANGED; | ||||
2501 | } | ||||
2502 | return Changed; | ||||
2503 | } | ||||
2504 | }; | ||||
2505 | |||||
2506 | struct AAExecutionDomainFunction : public AAExecutionDomain { | ||||
2507 | AAExecutionDomainFunction(const IRPosition &IRP, Attributor &A) | ||||
2508 | : AAExecutionDomain(IRP, A) {} | ||||
2509 | |||||
2510 | const std::string getAsStr() const override { | ||||
2511 | return "[AAExecutionDomain] " + std::to_string(SingleThreadedBBs.size()) + | ||||
2512 | "/" + std::to_string(NumBBs) + " BBs thread 0 only."; | ||||
2513 | } | ||||
2514 | |||||
2515 | /// See AbstractAttribute::trackStatistics(). | ||||
2516 | void trackStatistics() const override {} | ||||
2517 | |||||
2518 | void initialize(Attributor &A) override { | ||||
2519 | Function *F = getAnchorScope(); | ||||
2520 | for (const auto &BB : *F) | ||||
2521 | SingleThreadedBBs.insert(&BB); | ||||
2522 | NumBBs = SingleThreadedBBs.size(); | ||||
2523 | } | ||||
2524 | |||||
2525 | ChangeStatus manifest(Attributor &A) override { | ||||
2526 | LLVM_DEBUG({do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { { for (const BasicBlock *BB : SingleThreadedBBs ) dbgs() << TAG << " Basic block @" << getAnchorScope ()->getName() << " " << BB->getName() << " is executed by a single thread.\n"; }; } } while (false) | ||||
2527 | for (const BasicBlock *BB : SingleThreadedBBs)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { { for (const BasicBlock *BB : SingleThreadedBBs ) dbgs() << TAG << " Basic block @" << getAnchorScope ()->getName() << " " << BB->getName() << " is executed by a single thread.\n"; }; } } while (false) | ||||
2528 | dbgs() << TAG << " Basic block @" << getAnchorScope()->getName() << " "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { { for (const BasicBlock *BB : SingleThreadedBBs ) dbgs() << TAG << " Basic block @" << getAnchorScope ()->getName() << " " << BB->getName() << " is executed by a single thread.\n"; }; } } while (false) | ||||
2529 | << BB->getName() << " is executed by a single thread.\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { { for (const BasicBlock *BB : SingleThreadedBBs ) dbgs() << TAG << " Basic block @" << getAnchorScope ()->getName() << " " << BB->getName() << " is executed by a single thread.\n"; }; } } while (false) | ||||
2530 | })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { { for (const BasicBlock *BB : SingleThreadedBBs ) dbgs() << TAG << " Basic block @" << getAnchorScope ()->getName() << " " << BB->getName() << " is executed by a single thread.\n"; }; } } while (false); | ||||
2531 | return ChangeStatus::UNCHANGED; | ||||
2532 | } | ||||
2533 | |||||
2534 | ChangeStatus updateImpl(Attributor &A) override; | ||||
2535 | |||||
2536 | /// Check if an instruction is executed by a single thread. | ||||
2537 | bool isExecutedByInitialThreadOnly(const Instruction &I) const override { | ||||
2538 | return isExecutedByInitialThreadOnly(*I.getParent()); | ||||
2539 | } | ||||
2540 | |||||
2541 | bool isExecutedByInitialThreadOnly(const BasicBlock &BB) const override { | ||||
2542 | return isValidState() && SingleThreadedBBs.contains(&BB); | ||||
2543 | } | ||||
2544 | |||||
2545 | /// Set of basic blocks that are executed by a single thread. | ||||
2546 | SmallSetVector<const BasicBlock *, 16> SingleThreadedBBs; | ||||
2547 | |||||
2548 | /// Total number of basic blocks in this function. | ||||
2549 | long unsigned NumBBs; | ||||
2550 | }; | ||||
2551 | |||||
2552 | ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) { | ||||
2553 | Function *F = getAnchorScope(); | ||||
2554 | ReversePostOrderTraversal<Function *> RPOT(F); | ||||
2555 | auto NumSingleThreadedBBs = SingleThreadedBBs.size(); | ||||
2556 | |||||
2557 | bool AllCallSitesKnown; | ||||
2558 | auto PredForCallSite = [&](AbstractCallSite ACS) { | ||||
2559 | const auto &ExecutionDomainAA = A.getAAFor<AAExecutionDomain>( | ||||
2560 | *this, IRPosition::function(*ACS.getInstruction()->getFunction()), | ||||
2561 | DepClassTy::REQUIRED); | ||||
2562 | return ACS.isDirectCall() && | ||||
2563 | ExecutionDomainAA.isExecutedByInitialThreadOnly( | ||||
2564 | *ACS.getInstruction()); | ||||
2565 | }; | ||||
2566 | |||||
2567 | if (!A.checkForAllCallSites(PredForCallSite, *this, | ||||
2568 | /* RequiresAllCallSites */ true, | ||||
2569 | AllCallSitesKnown)) | ||||
2570 | SingleThreadedBBs.remove(&F->getEntryBlock()); | ||||
2571 | |||||
2572 | auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); | ||||
2573 | auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_target_init]; | ||||
2574 | |||||
2575 | // Check if the edge into the successor block contains a condition that only | ||||
2576 | // lets the main thread execute it. | ||||
2577 | auto IsInitialThreadOnly = [&](BranchInst *Edge, BasicBlock *SuccessorBB) { | ||||
2578 | if (!Edge || !Edge->isConditional()) | ||||
2579 | return false; | ||||
2580 | if (Edge->getSuccessor(0) != SuccessorBB) | ||||
2581 | return false; | ||||
2582 | |||||
2583 | auto *Cmp = dyn_cast<CmpInst>(Edge->getCondition()); | ||||
2584 | if (!Cmp || !Cmp->isTrueWhenEqual() || !Cmp->isEquality()) | ||||
2585 | return false; | ||||
2586 | |||||
2587 | ConstantInt *C = dyn_cast<ConstantInt>(Cmp->getOperand(1)); | ||||
2588 | if (!C) | ||||
2589 | return false; | ||||
2590 | |||||
2591 | // Match: -1 == __kmpc_target_init (for non-SPMD kernels only!) | ||||
2592 | if (C->isAllOnesValue()) { | ||||
2593 | auto *CB = dyn_cast<CallBase>(Cmp->getOperand(0)); | ||||
2594 | CB = CB ? OpenMPOpt::getCallIfRegularCall(*CB, &RFI) : nullptr; | ||||
2595 | if (!CB) | ||||
2596 | return false; | ||||
2597 | const int InitModeArgNo = 1; | ||||
2598 | auto *ModeCI = dyn_cast<ConstantInt>(CB->getOperand(InitModeArgNo)); | ||||
2599 | return ModeCI && (ModeCI->getSExtValue() & OMP_TGT_EXEC_MODE_GENERIC); | ||||
2600 | } | ||||
2601 | |||||
2602 | if (C->isZero()) { | ||||
2603 | // Match: 0 == llvm.nvvm.read.ptx.sreg.tid.x() | ||||
2604 | if (auto *II = dyn_cast<IntrinsicInst>(Cmp->getOperand(0))) | ||||
2605 | if (II->getIntrinsicID() == Intrinsic::nvvm_read_ptx_sreg_tid_x) | ||||
2606 | return true; | ||||
2607 | |||||
2608 | // Match: 0 == llvm.amdgcn.workitem.id.x() | ||||
2609 | if (auto *II = dyn_cast<IntrinsicInst>(Cmp->getOperand(0))) | ||||
2610 | if (II->getIntrinsicID() == Intrinsic::amdgcn_workitem_id_x) | ||||
2611 | return true; | ||||
2612 | } | ||||
2613 | |||||
2614 | return false; | ||||
2615 | }; | ||||
2616 | |||||
2617 | // Merge all the predecessor states into the current basic block. A basic | ||||
2618 | // block is executed by a single thread if all of its predecessors are. | ||||
2619 | auto MergePredecessorStates = [&](BasicBlock *BB) { | ||||
2620 | if (pred_empty(BB)) | ||||
2621 | return SingleThreadedBBs.contains(BB); | ||||
2622 | |||||
2623 | bool IsInitialThread = true; | ||||
2624 | for (BasicBlock *PredBB : predecessors(BB)) { | ||||
2625 | if (!IsInitialThreadOnly(dyn_cast<BranchInst>(PredBB->getTerminator()), | ||||
2626 | BB)) | ||||
2627 | IsInitialThread &= SingleThreadedBBs.contains(PredBB); | ||||
2628 | } | ||||
2629 | |||||
2630 | return IsInitialThread; | ||||
2631 | }; | ||||
2632 | |||||
2633 | for (auto *BB : RPOT) { | ||||
2634 | if (!MergePredecessorStates(BB)) | ||||
2635 | SingleThreadedBBs.remove(BB); | ||||
2636 | } | ||||
2637 | |||||
2638 | return (NumSingleThreadedBBs == SingleThreadedBBs.size()) | ||||
2639 | ? ChangeStatus::UNCHANGED | ||||
2640 | : ChangeStatus::CHANGED; | ||||
2641 | } | ||||
2642 | |||||
2643 | /// Try to replace memory allocation calls called by a single thread with a | ||||
2644 | /// static buffer of shared memory. | ||||
2645 | struct AAHeapToShared : public StateWrapper<BooleanState, AbstractAttribute> { | ||||
2646 | using Base = StateWrapper<BooleanState, AbstractAttribute>; | ||||
2647 | AAHeapToShared(const IRPosition &IRP, Attributor &A) : Base(IRP) {} | ||||
2648 | |||||
2649 | /// Create an abstract attribute view for the position \p IRP. | ||||
2650 | static AAHeapToShared &createForPosition(const IRPosition &IRP, | ||||
2651 | Attributor &A); | ||||
2652 | |||||
2653 | /// Returns true if HeapToShared conversion is assumed to be possible. | ||||
2654 | virtual bool isAssumedHeapToShared(CallBase &CB) const = 0; | ||||
2655 | |||||
2656 | /// Returns true if HeapToShared conversion is assumed and the CB is a | ||||
2657 | /// callsite to a free operation to be removed. | ||||
2658 | virtual bool isAssumedHeapToSharedRemovedFree(CallBase &CB) const = 0; | ||||
2659 | |||||
2660 | /// See AbstractAttribute::getName(). | ||||
2661 | const std::string getName() const override { return "AAHeapToShared"; } | ||||
2662 | |||||
2663 | /// See AbstractAttribute::getIdAddr(). | ||||
2664 | const char *getIdAddr() const override { return &ID; } | ||||
2665 | |||||
2666 | /// This function should return true if the type of the \p AA is | ||||
2667 | /// AAHeapToShared. | ||||
2668 | static bool classof(const AbstractAttribute *AA) { | ||||
2669 | return (AA->getIdAddr() == &ID); | ||||
2670 | } | ||||
2671 | |||||
2672 | /// Unique ID (due to the unique address) | ||||
2673 | static const char ID; | ||||
2674 | }; | ||||
2675 | |||||
2676 | struct AAHeapToSharedFunction : public AAHeapToShared { | ||||
2677 | AAHeapToSharedFunction(const IRPosition &IRP, Attributor &A) | ||||
2678 | : AAHeapToShared(IRP, A) {} | ||||
2679 | |||||
2680 | const std::string getAsStr() const override { | ||||
2681 | return "[AAHeapToShared] " + std::to_string(MallocCalls.size()) + | ||||
2682 | " malloc calls eligible."; | ||||
2683 | } | ||||
2684 | |||||
2685 | /// See AbstractAttribute::trackStatistics(). | ||||
2686 | void trackStatistics() const override {} | ||||
2687 | |||||
2688 | /// This functions finds free calls that will be removed by the | ||||
2689 | /// HeapToShared transformation. | ||||
2690 | void findPotentialRemovedFreeCalls(Attributor &A) { | ||||
2691 | auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); | ||||
2692 | auto &FreeRFI = OMPInfoCache.RFIs[OMPRTL___kmpc_free_shared]; | ||||
2693 | |||||
2694 | PotentialRemovedFreeCalls.clear(); | ||||
2695 | // Update free call users of found malloc calls. | ||||
2696 | for (CallBase *CB : MallocCalls) { | ||||
2697 | SmallVector<CallBase *, 4> FreeCalls; | ||||
2698 | for (auto *U : CB->users()) { | ||||
2699 | CallBase *C = dyn_cast<CallBase>(U); | ||||
2700 | if (C && C->getCalledFunction() == FreeRFI.Declaration) | ||||
2701 | FreeCalls.push_back(C); | ||||
2702 | } | ||||
2703 | |||||
2704 | if (FreeCalls.size() != 1) | ||||
2705 | continue; | ||||
2706 | |||||
2707 | PotentialRemovedFreeCalls.insert(FreeCalls.front()); | ||||
2708 | } | ||||
2709 | } | ||||
2710 | |||||
2711 | void initialize(Attributor &A) override { | ||||
2712 | auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); | ||||
2713 | auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared]; | ||||
2714 | |||||
2715 | for (User *U : RFI.Declaration->users()) | ||||
2716 | if (CallBase *CB = dyn_cast<CallBase>(U)) | ||||
2717 | MallocCalls.insert(CB); | ||||
2718 | |||||
2719 | findPotentialRemovedFreeCalls(A); | ||||
2720 | } | ||||
2721 | |||||
2722 | bool isAssumedHeapToShared(CallBase &CB) const override { | ||||
2723 | return isValidState() && MallocCalls.count(&CB); | ||||
2724 | } | ||||
2725 | |||||
2726 | bool isAssumedHeapToSharedRemovedFree(CallBase &CB) const override { | ||||
2727 | return isValidState() && PotentialRemovedFreeCalls.count(&CB); | ||||
2728 | } | ||||
2729 | |||||
2730 | ChangeStatus manifest(Attributor &A) override { | ||||
2731 | if (MallocCalls.empty()) | ||||
| |||||
2732 | return ChangeStatus::UNCHANGED; | ||||
2733 | |||||
2734 | auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); | ||||
2735 | auto &FreeCall = OMPInfoCache.RFIs[OMPRTL___kmpc_free_shared]; | ||||
2736 | |||||
2737 | Function *F = getAnchorScope(); | ||||
2738 | auto *HS = A.lookupAAFor<AAHeapToStack>(IRPosition::function(*F), this, | ||||
2739 | DepClassTy::OPTIONAL); | ||||
2740 | |||||
2741 | ChangeStatus Changed = ChangeStatus::UNCHANGED; | ||||
2742 | for (CallBase *CB : MallocCalls) { | ||||
2743 | // Skip replacing this if HeapToStack has already claimed it. | ||||
2744 | if (HS
| ||||
2745 | continue; | ||||
2746 | |||||
2747 | // Find the unique free call to remove it. | ||||
2748 | SmallVector<CallBase *, 4> FreeCalls; | ||||
2749 | for (auto *U : CB->users()) { | ||||
2750 | CallBase *C = dyn_cast<CallBase>(U); | ||||
2751 | if (C && C->getCalledFunction() == FreeCall.Declaration) | ||||
2752 | FreeCalls.push_back(C); | ||||
2753 | } | ||||
2754 | if (FreeCalls.size() != 1) | ||||
2755 | continue; | ||||
2756 | |||||
2757 | ConstantInt *AllocSize = dyn_cast<ConstantInt>(CB->getArgOperand(0)); | ||||
2758 | |||||
2759 | LLVM_DEBUG(dbgs() << TAG << "Replace globalization call " << *CBdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { dbgs() << TAG << "Replace globalization call " << *CB << " with " << AllocSize->getZExtValue () << " bytes of shared memory\n"; } } while (false) | ||||
2760 | << " with " << AllocSize->getZExtValue()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { dbgs() << TAG << "Replace globalization call " << *CB << " with " << AllocSize->getZExtValue () << " bytes of shared memory\n"; } } while (false) | ||||
2761 | << " bytes of shared memory\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { dbgs() << TAG << "Replace globalization call " << *CB << " with " << AllocSize->getZExtValue () << " bytes of shared memory\n"; } } while (false); | ||||
2762 | |||||
2763 | // Create a new shared memory buffer of the same size as the allocation | ||||
2764 | // and replace all the uses of the original allocation with it. | ||||
2765 | Module *M = CB->getModule(); | ||||
2766 | Type *Int8Ty = Type::getInt8Ty(M->getContext()); | ||||
2767 | Type *Int8ArrTy = ArrayType::get(Int8Ty, AllocSize->getZExtValue()); | ||||
| |||||
2768 | auto *SharedMem = new GlobalVariable( | ||||
2769 | *M, Int8ArrTy, /* IsConstant */ false, GlobalValue::InternalLinkage, | ||||
2770 | UndefValue::get(Int8ArrTy), CB->getName(), nullptr, | ||||
2771 | GlobalValue::NotThreadLocal, | ||||
2772 | static_cast<unsigned>(AddressSpace::Shared)); | ||||
2773 | auto *NewBuffer = | ||||
2774 | ConstantExpr::getPointerCast(SharedMem, Int8Ty->getPointerTo()); | ||||
2775 | |||||
2776 | auto Remark = [&](OptimizationRemark OR) { | ||||
2777 | return OR << "Replaced globalized variable with " | ||||
2778 | << ore::NV("SharedMemory", AllocSize->getZExtValue()) | ||||
2779 | << ((AllocSize->getZExtValue() != 1) ? " bytes " : " byte ") | ||||
2780 | << "of shared memory."; | ||||
2781 | }; | ||||
2782 | A.emitRemark<OptimizationRemark>(CB, "OMP111", Remark); | ||||
2783 | |||||
2784 | MaybeAlign Alignment = CB->getRetAlign(); | ||||
2785 | assert(Alignment &&(static_cast <bool> (Alignment && "HeapToShared on allocation without alignment attribute" ) ? void (0) : __assert_fail ("Alignment && \"HeapToShared on allocation without alignment attribute\"" , "llvm/lib/Transforms/IPO/OpenMPOpt.cpp", 2786, __extension__ __PRETTY_FUNCTION__)) | ||||
2786 | "HeapToShared on allocation without alignment attribute")(static_cast <bool> (Alignment && "HeapToShared on allocation without alignment attribute" ) ? void (0) : __assert_fail ("Alignment && \"HeapToShared on allocation without alignment attribute\"" , "llvm/lib/Transforms/IPO/OpenMPOpt.cpp", 2786, __extension__ __PRETTY_FUNCTION__)); | ||||
2787 | SharedMem->setAlignment(MaybeAlign(Alignment)); | ||||
2788 | |||||
2789 | A.changeValueAfterManifest(*CB, *NewBuffer); | ||||
2790 | A.deleteAfterManifest(*CB); | ||||
2791 | A.deleteAfterManifest(*FreeCalls.front()); | ||||
2792 | |||||
2793 | NumBytesMovedToSharedMemory += AllocSize->getZExtValue(); | ||||
2794 | Changed = ChangeStatus::CHANGED; | ||||
2795 | } | ||||
2796 | |||||
2797 | return Changed; | ||||
2798 | } | ||||
2799 | |||||
2800 | ChangeStatus updateImpl(Attributor &A) override { | ||||
2801 | auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); | ||||
2802 | auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared]; | ||||
2803 | Function *F = getAnchorScope(); | ||||
2804 | |||||
2805 | auto NumMallocCalls = MallocCalls.size(); | ||||
2806 | |||||
2807 | // Only consider malloc calls executed by a single thread with a constant. | ||||
2808 | for (User *U : RFI.Declaration->users()) { | ||||
2809 | const auto &ED = A.getAAFor<AAExecutionDomain>( | ||||
2810 | *this, IRPosition::function(*F), DepClassTy::REQUIRED); | ||||
2811 | if (CallBase *CB = dyn_cast<CallBase>(U)) | ||||
2812 | if (!isa<ConstantInt>(CB->getArgOperand(0)) || | ||||
2813 | !ED.isExecutedByInitialThreadOnly(*CB)) | ||||
2814 | MallocCalls.remove(CB); | ||||
2815 | } | ||||
2816 | |||||
2817 | findPotentialRemovedFreeCalls(A); | ||||
2818 | |||||
2819 | if (NumMallocCalls != MallocCalls.size()) | ||||
2820 | return ChangeStatus::CHANGED; | ||||
2821 | |||||
2822 | return ChangeStatus::UNCHANGED; | ||||
2823 | } | ||||
2824 | |||||
2825 | /// Collection of all malloc calls in a function. | ||||
2826 | SmallSetVector<CallBase *, 4> MallocCalls; | ||||
2827 | /// Collection of potentially removed free calls in a function. | ||||
2828 | SmallPtrSet<CallBase *, 4> PotentialRemovedFreeCalls; | ||||
2829 | }; | ||||
2830 | |||||
2831 | struct AAKernelInfo : public StateWrapper<KernelInfoState, AbstractAttribute> { | ||||
2832 | using Base = StateWrapper<KernelInfoState, AbstractAttribute>; | ||||
2833 | AAKernelInfo(const IRPosition &IRP, Attributor &A) : Base(IRP) {} | ||||
2834 | |||||
2835 | /// Statistics are tracked as part of manifest for now. | ||||
2836 | void trackStatistics() const override {} | ||||
2837 | |||||
2838 | /// See AbstractAttribute::getAsStr() | ||||
2839 | const std::string getAsStr() const override { | ||||
2840 | if (!isValidState()) | ||||
2841 | return "<invalid>"; | ||||
2842 | return std::string(SPMDCompatibilityTracker.isAssumed() ? "SPMD" | ||||
2843 | : "generic") + | ||||
2844 | std::string(SPMDCompatibilityTracker.isAtFixpoint() ? " [FIX]" | ||||
2845 | : "") + | ||||
2846 | std::string(" #PRs: ") + | ||||
2847 | (ReachedKnownParallelRegions.isValidState() | ||||
2848 | ? std::to_string(ReachedKnownParallelRegions.size()) | ||||
2849 | : "<invalid>") + | ||||
2850 | ", #Unknown PRs: " + | ||||
2851 | (ReachedUnknownParallelRegions.isValidState() | ||||
2852 | ? std::to_string(ReachedUnknownParallelRegions.size()) | ||||
2853 | : "<invalid>") + | ||||
2854 | ", #Reaching Kernels: " + | ||||
2855 | (ReachingKernelEntries.isValidState() | ||||
2856 | ? std::to_string(ReachingKernelEntries.size()) | ||||
2857 | : "<invalid>"); | ||||
2858 | } | ||||
2859 | |||||
2860 | /// Create an abstract attribute biew for the position \p IRP. | ||||
2861 | static AAKernelInfo &createForPosition(const IRPosition &IRP, Attributor &A); | ||||
2862 | |||||
2863 | /// See AbstractAttribute::getName() | ||||
2864 | const std::string getName() const override { return "AAKernelInfo"; } | ||||
2865 | |||||
2866 | /// See AbstractAttribute::getIdAddr() | ||||
2867 | const char *getIdAddr() const override { return &ID; } | ||||
2868 | |||||
2869 | /// This function should return true if the type of the \p AA is AAKernelInfo | ||||
2870 | static bool classof(const AbstractAttribute *AA) { | ||||
2871 | return (AA->getIdAddr() == &ID); | ||||
2872 | } | ||||
2873 | |||||
2874 | static const char ID; | ||||
2875 | }; | ||||
2876 | |||||
2877 | /// The function kernel info abstract attribute, basically, what can we say | ||||
2878 | /// about a function with regards to the KernelInfoState. | ||||
2879 | struct AAKernelInfoFunction : AAKernelInfo { | ||||
2880 | AAKernelInfoFunction(const IRPosition &IRP, Attributor &A) | ||||
2881 | : AAKernelInfo(IRP, A) {} | ||||
2882 | |||||
2883 | SmallPtrSet<Instruction *, 4> GuardedInstructions; | ||||
2884 | |||||
2885 | SmallPtrSetImpl<Instruction *> &getGuardedInstructions() { | ||||
2886 | return GuardedInstructions; | ||||
2887 | } | ||||
2888 | |||||
2889 | /// See AbstractAttribute::initialize(...). | ||||
2890 | void initialize(Attributor &A) override { | ||||
2891 | // This is a high-level transform that might change the constant arguments | ||||
2892 | // of the init and dinit calls. We need to tell the Attributor about this | ||||
2893 | // to avoid other parts using the current constant value for simpliication. | ||||
2894 | auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); | ||||
2895 | |||||
2896 | Function *Fn = getAnchorScope(); | ||||
2897 | if (!OMPInfoCache.Kernels.count(Fn)) | ||||
2898 | return; | ||||
2899 | |||||
2900 | // Add itself to the reaching kernel and set IsKernelEntry. | ||||
2901 | ReachingKernelEntries.insert(Fn); | ||||
2902 | IsKernelEntry = true; | ||||
2903 | |||||
2904 | OMPInformationCache::RuntimeFunctionInfo &InitRFI = | ||||
2905 | OMPInfoCache.RFIs[OMPRTL___kmpc_target_init]; | ||||
2906 | OMPInformationCache::RuntimeFunctionInfo &DeinitRFI = | ||||
2907 | OMPInfoCache.RFIs[OMPRTL___kmpc_target_deinit]; | ||||
2908 | |||||
2909 | // For kernels we perform more initialization work, first we find the init | ||||
2910 | // and deinit calls. | ||||
2911 | auto StoreCallBase = [](Use &U, | ||||
2912 | OMPInformationCache::RuntimeFunctionInfo &RFI, | ||||
2913 | CallBase *&Storage) { | ||||
2914 | CallBase *CB = OpenMPOpt::getCallIfRegularCall(U, &RFI); | ||||
2915 | assert(CB &&(static_cast <bool> (CB && "Unexpected use of __kmpc_target_init or __kmpc_target_deinit!" ) ? void (0) : __assert_fail ("CB && \"Unexpected use of __kmpc_target_init or __kmpc_target_deinit!\"" , "llvm/lib/Transforms/IPO/OpenMPOpt.cpp", 2916, __extension__ __PRETTY_FUNCTION__)) | ||||
2916 | "Unexpected use of __kmpc_target_init or __kmpc_target_deinit!")(static_cast <bool> (CB && "Unexpected use of __kmpc_target_init or __kmpc_target_deinit!" ) ? void (0) : __assert_fail ("CB && \"Unexpected use of __kmpc_target_init or __kmpc_target_deinit!\"" , "llvm/lib/Transforms/IPO/OpenMPOpt.cpp", 2916, __extension__ __PRETTY_FUNCTION__)); | ||||
2917 | assert(!Storage &&(static_cast <bool> (!Storage && "Multiple uses of __kmpc_target_init or __kmpc_target_deinit!" ) ? void (0) : __assert_fail ("!Storage && \"Multiple uses of __kmpc_target_init or __kmpc_target_deinit!\"" , "llvm/lib/Transforms/IPO/OpenMPOpt.cpp", 2918, __extension__ __PRETTY_FUNCTION__)) | ||||
2918 | "Multiple uses of __kmpc_target_init or __kmpc_target_deinit!")(static_cast <bool> (!Storage && "Multiple uses of __kmpc_target_init or __kmpc_target_deinit!" ) ? void (0) : __assert_fail ("!Storage && \"Multiple uses of __kmpc_target_init or __kmpc_target_deinit!\"" , "llvm/lib/Transforms/IPO/OpenMPOpt.cpp", 2918, __extension__ __PRETTY_FUNCTION__)); | ||||
2919 | Storage = CB; | ||||
2920 | return false; | ||||
2921 | }; | ||||
2922 | InitRFI.foreachUse( | ||||
2923 | [&](Use &U, Function &) { | ||||
2924 | StoreCallBase(U, InitRFI, KernelInitCB); | ||||
2925 | return false; | ||||
2926 | }, | ||||
2927 | Fn); | ||||
2928 | DeinitRFI.foreachUse( | ||||
2929 | [&](Use &U, Function &) { | ||||
2930 | StoreCallBase(U, DeinitRFI, KernelDeinitCB); | ||||
2931 | return false; | ||||
2932 | }, | ||||
2933 | Fn); | ||||
2934 | |||||
2935 | // Ignore kernels without initializers such as global constructors. | ||||
2936 | if (!KernelInitCB || !KernelDeinitCB) { | ||||
2937 | indicateOptimisticFixpoint(); | ||||
2938 | return; | ||||
2939 | } | ||||
2940 | |||||
2941 | // For kernels we might need to initialize/finalize the IsSPMD state and | ||||
2942 | // we need to register a simplification callback so that the Attributor | ||||
2943 | // knows the constant arguments to __kmpc_target_init and | ||||
2944 | // __kmpc_target_deinit might actually change. | ||||
2945 | |||||
2946 | Attributor::SimplifictionCallbackTy StateMachineSimplifyCB = | ||||
2947 | [&](const IRPosition &IRP, const AbstractAttribute *AA, | ||||
2948 | bool &UsedAssumedInformation) -> Optional<Value *> { | ||||
2949 | // IRP represents the "use generic state machine" argument of an | ||||
2950 | // __kmpc_target_init call. We will answer this one with the internal | ||||
2951 | // state. As long as we are not in an invalid state, we will create a | ||||
2952 | // custom state machine so the value should be a `i1 false`. If we are | ||||
2953 | // in an invalid state, we won't change the value that is in the IR. | ||||
2954 | if (!ReachedKnownParallelRegions.isValidState()) | ||||
2955 | return nullptr; | ||||
2956 | // If we have disabled state machine rewrites, don't make a custom one. | ||||
2957 | if (DisableOpenMPOptStateMachineRewrite) | ||||
2958 | return nullptr; | ||||
2959 | if (AA) | ||||
2960 | A.recordDependence(*this, *AA, DepClassTy::OPTIONAL); | ||||
2961 | UsedAssumedInformation = !isAtFixpoint(); | ||||
2962 | auto *FalseVal = | ||||
2963 | ConstantInt::getBool(IRP.getAnchorValue().getContext(), false); | ||||
2964 | return FalseVal; | ||||
2965 | }; | ||||
2966 | |||||
2967 | Attributor::SimplifictionCallbackTy ModeSimplifyCB = | ||||
2968 | [&](const IRPosition &IRP, const AbstractAttribute *AA, | ||||
2969 | bool &UsedAssumedInformation) -> Optional<Value *> { | ||||
2970 | // IRP represents the "SPMDCompatibilityTracker" argument of an | ||||
2971 | // __kmpc_target_init or | ||||
2972 | // __kmpc_target_deinit call. We will answer this one with the internal | ||||
2973 | // state. | ||||
2974 | if (!SPMDCompatibilityTracker.isValidState()) | ||||
2975 | return nullptr; | ||||
2976 | if (!SPMDCompatibilityTracker.isAtFixpoint()) { | ||||
2977 | if (AA) | ||||
2978 | A.recordDependence(*this, *AA, DepClassTy::OPTIONAL); | ||||
2979 | UsedAssumedInformation = true; | ||||
2980 | } else { | ||||
2981 | UsedAssumedInformation = false; | ||||
2982 | } | ||||
2983 | auto *Val = ConstantInt::getSigned( | ||||
2984 | IntegerType::getInt8Ty(IRP.getAnchorValue().getContext()), | ||||
2985 | SPMDCompatibilityTracker.isAssumed() ? OMP_TGT_EXEC_MODE_SPMD | ||||
2986 | : OMP_TGT_EXEC_MODE_GENERIC); | ||||
2987 | return Val; | ||||
2988 | }; | ||||
2989 | |||||
2990 | Attributor::SimplifictionCallbackTy IsGenericModeSimplifyCB = | ||||
2991 | [&](const IRPosition &IRP, const AbstractAttribute *AA, | ||||
2992 | bool &UsedAssumedInformation) -> Optional<Value *> { | ||||
2993 | // IRP represents the "RequiresFullRuntime" argument of an | ||||
2994 | // __kmpc_target_init or __kmpc_target_deinit call. We will answer this | ||||
2995 | // one with the internal state of the SPMDCompatibilityTracker, so if | ||||
2996 | // generic then true, if SPMD then false. | ||||
2997 | if (!SPMDCompatibilityTracker.isValidState()) | ||||
2998 | return nullptr; | ||||
2999 | if (!SPMDCompatibilityTracker.isAtFixpoint()) { | ||||
3000 | if (AA) | ||||
3001 | A.recordDependence(*this, *AA, DepClassTy::OPTIONAL); | ||||
3002 | UsedAssumedInformation = true; | ||||
3003 | } else { | ||||
3004 | UsedAssumedInformation = false; | ||||
3005 | } | ||||
3006 | auto *Val = ConstantInt::getBool(IRP.getAnchorValue().getContext(), | ||||
3007 | !SPMDCompatibilityTracker.isAssumed()); | ||||
3008 | return Val; | ||||
3009 | }; | ||||
3010 | |||||
3011 | constexpr const int InitModeArgNo = 1; | ||||
3012 | constexpr const int DeinitModeArgNo = 1; | ||||
3013 | constexpr const int InitUseStateMachineArgNo = 2; | ||||
3014 | constexpr const int InitRequiresFullRuntimeArgNo = 3; | ||||
3015 | constexpr const int DeinitRequiresFullRuntimeArgNo = 2; | ||||
3016 | A.registerSimplificationCallback( | ||||
3017 | IRPosition::callsite_argument(*KernelInitCB, InitUseStateMachineArgNo), | ||||
3018 | StateMachineSimplifyCB); | ||||
3019 | A.registerSimplificationCallback( | ||||
3020 | IRPosition::callsite_argument(*KernelInitCB, InitModeArgNo), | ||||
3021 | ModeSimplifyCB); | ||||
3022 | A.registerSimplificationCallback( | ||||
3023 | IRPosition::callsite_argument(*KernelDeinitCB, DeinitModeArgNo), | ||||
3024 | ModeSimplifyCB); | ||||
3025 | A.registerSimplificationCallback( | ||||
3026 | IRPosition::callsite_argument(*KernelInitCB, | ||||
3027 | InitRequiresFullRuntimeArgNo), | ||||
3028 | IsGenericModeSimplifyCB); | ||||
3029 | A.registerSimplificationCallback( | ||||
3030 | IRPosition::callsite_argument(*KernelDeinitCB, | ||||
3031 | DeinitRequiresFullRuntimeArgNo), | ||||
3032 | IsGenericModeSimplifyCB); | ||||
3033 | |||||
3034 | // Check if we know we are in SPMD-mode already. | ||||
3035 | ConstantInt *ModeArg = | ||||
3036 | dyn_cast<ConstantInt>(KernelInitCB->getArgOperand(InitModeArgNo)); | ||||
3037 | if (ModeArg && (ModeArg->getSExtValue() & OMP_TGT_EXEC_MODE_SPMD)) | ||||
3038 | SPMDCompatibilityTracker.indicateOptimisticFixpoint(); | ||||
3039 | // This is a generic region but SPMDization is disabled so stop tracking. | ||||
3040 | else if (DisableOpenMPOptSPMDization) | ||||
3041 | SPMDCompatibilityTracker.indicatePessimisticFixpoint(); | ||||
3042 | } | ||||
3043 | |||||
3044 | /// Sanitize the string \p S such that it is a suitable global symbol name. | ||||
3045 | static std::string sanitizeForGlobalName(std::string S) { | ||||
3046 | std::replace_if( | ||||
3047 | S.begin(), S.end(), | ||||
3048 | [](const char C) { | ||||
3049 | return !((C >= 'a' && C <= 'z') || (C >= 'A' && C <= 'Z') || | ||||
3050 | (C >= '0' && C <= '9') || C == '_'); | ||||
3051 | }, | ||||
3052 | '.'); | ||||
3053 | return S; | ||||
3054 | } | ||||
3055 | |||||
3056 | /// Modify the IR based on the KernelInfoState as the fixpoint iteration is | ||||
3057 | /// finished now. | ||||
3058 | ChangeStatus manifest(Attributor &A) override { | ||||
3059 | // If we are not looking at a kernel with __kmpc_target_init and | ||||
3060 | // __kmpc_target_deinit call we cannot actually manifest the information. | ||||
3061 | if (!KernelInitCB || !KernelDeinitCB) | ||||
3062 | return ChangeStatus::UNCHANGED; | ||||
3063 | |||||
3064 | // If we can we change the execution mode to SPMD-mode otherwise we build a | ||||
3065 | // custom state machine. | ||||
3066 | ChangeStatus Changed = ChangeStatus::UNCHANGED; | ||||
3067 | if (!changeToSPMDMode(A, Changed)) | ||||
3068 | return buildCustomStateMachine(A); | ||||
3069 | |||||
3070 | return Changed; | ||||
3071 | } | ||||
3072 | |||||
3073 | bool changeToSPMDMode(Attributor &A, ChangeStatus &Changed) { | ||||
3074 | auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); | ||||
3075 | |||||
3076 | if (!SPMDCompatibilityTracker.isAssumed()) { | ||||
3077 | for (Instruction *NonCompatibleI : SPMDCompatibilityTracker) { | ||||
3078 | if (!NonCompatibleI) | ||||
3079 | continue; | ||||
3080 | |||||
3081 | // Skip diagnostics on calls to known OpenMP runtime functions for now. | ||||
3082 | if (auto *CB = dyn_cast<CallBase>(NonCompatibleI)) | ||||
3083 | if (OMPInfoCache.RTLFunctions.contains(CB->getCalledFunction())) | ||||
3084 | continue; | ||||
3085 | |||||
3086 | auto Remark = [&](OptimizationRemarkAnalysis ORA) { | ||||
3087 | ORA << "Value has potential side effects preventing SPMD-mode " | ||||
3088 | "execution"; | ||||
3089 | if (isa<CallBase>(NonCompatibleI)) { | ||||
3090 | ORA << ". Add `__attribute__((assume(\"ompx_spmd_amenable\")))` to " | ||||
3091 | "the called function to override"; | ||||
3092 | } | ||||
3093 | return ORA << "."; | ||||
3094 | }; | ||||
3095 | A.emitRemark<OptimizationRemarkAnalysis>(NonCompatibleI, "OMP121", | ||||
3096 | Remark); | ||||
3097 | |||||
3098 | LLVM_DEBUG(dbgs() << TAG << "SPMD-incompatible side-effect: "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { dbgs() << TAG << "SPMD-incompatible side-effect: " << *NonCompatibleI << "\n"; } } while (false) | ||||
3099 | << *NonCompatibleI << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { dbgs() << TAG << "SPMD-incompatible side-effect: " << *NonCompatibleI << "\n"; } } while (false); | ||||
3100 | } | ||||
3101 | |||||
3102 | return false; | ||||
3103 | } | ||||
3104 | |||||
3105 | // Check if the kernel is already in SPMD mode, if so, return success. | ||||
3106 | Function *Kernel = getAnchorScope(); | ||||
3107 | GlobalVariable *ExecMode = Kernel->getParent()->getGlobalVariable( | ||||
3108 | (Kernel->getName() + "_exec_mode").str()); | ||||
3109 | assert(ExecMode && "Kernel without exec mode?")(static_cast <bool> (ExecMode && "Kernel without exec mode?" ) ? void (0) : __assert_fail ("ExecMode && \"Kernel without exec mode?\"" , "llvm/lib/Transforms/IPO/OpenMPOpt.cpp", 3109, __extension__ __PRETTY_FUNCTION__)); | ||||
3110 | assert(ExecMode->getInitializer() && "ExecMode doesn't have initializer!")(static_cast <bool> (ExecMode->getInitializer() && "ExecMode doesn't have initializer!") ? void (0) : __assert_fail ("ExecMode->getInitializer() && \"ExecMode doesn't have initializer!\"" , "llvm/lib/Transforms/IPO/OpenMPOpt.cpp", 3110, __extension__ __PRETTY_FUNCTION__)); | ||||
3111 | |||||
3112 | // Set the global exec mode flag to indicate SPMD-Generic mode. | ||||
3113 | assert(isa<ConstantInt>(ExecMode->getInitializer()) &&(static_cast <bool> (isa<ConstantInt>(ExecMode-> getInitializer()) && "ExecMode is not an integer!") ? void (0) : __assert_fail ("isa<ConstantInt>(ExecMode->getInitializer()) && \"ExecMode is not an integer!\"" , "llvm/lib/Transforms/IPO/OpenMPOpt.cpp", 3114, __extension__ __PRETTY_FUNCTION__)) | ||||
3114 | "ExecMode is not an integer!")(static_cast <bool> (isa<ConstantInt>(ExecMode-> getInitializer()) && "ExecMode is not an integer!") ? void (0) : __assert_fail ("isa<ConstantInt>(ExecMode->getInitializer()) && \"ExecMode is not an integer!\"" , "llvm/lib/Transforms/IPO/OpenMPOpt.cpp", 3114, __extension__ __PRETTY_FUNCTION__)); | ||||
3115 | const int8_t ExecModeVal = | ||||
3116 | cast<ConstantInt>(ExecMode->getInitializer())->getSExtValue(); | ||||
3117 | if (ExecModeVal != OMP_TGT_EXEC_MODE_GENERIC) | ||||
3118 | return true; | ||||
3119 | |||||
3120 | // We will now unconditionally modify the IR, indicate a change. | ||||
3121 | Changed = ChangeStatus::CHANGED; | ||||
3122 | |||||
3123 | auto CreateGuardedRegion = [&](Instruction *RegionStartI, | ||||
3124 | Instruction *RegionEndI) { | ||||
3125 | LoopInfo *LI = nullptr; | ||||
3126 | DominatorTree *DT = nullptr; | ||||
3127 | MemorySSAUpdater *MSU = nullptr; | ||||
3128 | using InsertPointTy = OpenMPIRBuilder::InsertPointTy; | ||||
3129 | |||||
3130 | BasicBlock *ParentBB = RegionStartI->getParent(); | ||||
3131 | Function *Fn = ParentBB->getParent(); | ||||
3132 | Module &M = *Fn->getParent(); | ||||
3133 | |||||
3134 | // Create all the blocks and logic. | ||||
3135 | // ParentBB: | ||||
3136 | // goto RegionCheckTidBB | ||||
3137 | // RegionCheckTidBB: | ||||
3138 | // Tid = __kmpc_hardware_thread_id() | ||||
3139 | // if (Tid != 0) | ||||
3140 | // goto RegionBarrierBB | ||||
3141 | // RegionStartBB: | ||||
3142 | // <execute instructions guarded> | ||||
3143 | // goto RegionEndBB | ||||
3144 | // RegionEndBB: | ||||
3145 | // <store escaping values to shared mem> | ||||
3146 | // goto RegionBarrierBB | ||||
3147 | // RegionBarrierBB: | ||||
3148 | // __kmpc_simple_barrier_spmd() | ||||
3149 | // // second barrier is omitted if lacking escaping values. | ||||
3150 | // <load escaping values from shared mem> | ||||
3151 | // __kmpc_simple_barrier_spmd() | ||||
3152 | // goto RegionExitBB | ||||
3153 | // RegionExitBB: | ||||
3154 | // <execute rest of instructions> | ||||
3155 | |||||
3156 | BasicBlock *RegionEndBB = SplitBlock(ParentBB, RegionEndI->getNextNode(), | ||||
3157 | DT, LI, MSU, "region.guarded.end"); | ||||
3158 | BasicBlock *RegionBarrierBB = | ||||
3159 | SplitBlock(RegionEndBB, &*RegionEndBB->getFirstInsertionPt(), DT, LI, | ||||
3160 | MSU, "region.barrier"); | ||||
3161 | BasicBlock *RegionExitBB = | ||||
3162 | SplitBlock(RegionBarrierBB, &*RegionBarrierBB->getFirstInsertionPt(), | ||||
3163 | DT, LI, MSU, "region.exit"); | ||||
3164 | BasicBlock *RegionStartBB = | ||||
3165 | SplitBlock(ParentBB, RegionStartI, DT, LI, MSU, "region.guarded"); | ||||
3166 | |||||
3167 | assert(ParentBB->getUniqueSuccessor() == RegionStartBB &&(static_cast <bool> (ParentBB->getUniqueSuccessor() == RegionStartBB && "Expected a different CFG") ? void ( 0) : __assert_fail ("ParentBB->getUniqueSuccessor() == RegionStartBB && \"Expected a different CFG\"" , "llvm/lib/Transforms/IPO/OpenMPOpt.cpp", 3168, __extension__ __PRETTY_FUNCTION__)) | ||||
3168 | "Expected a different CFG")(static_cast <bool> (ParentBB->getUniqueSuccessor() == RegionStartBB && "Expected a different CFG") ? void ( 0) : __assert_fail ("ParentBB->getUniqueSuccessor() == RegionStartBB && \"Expected a different CFG\"" , "llvm/lib/Transforms/IPO/OpenMPOpt.cpp", 3168, __extension__ __PRETTY_FUNCTION__)); | ||||
3169 | |||||
3170 | BasicBlock *RegionCheckTidBB = SplitBlock( | ||||
3171 | ParentBB, ParentBB->getTerminator(), DT, LI, MSU, "region.check.tid"); | ||||
3172 | |||||
3173 | // Register basic blocks with the Attributor. | ||||
3174 | A.registerManifestAddedBasicBlock(*RegionEndBB); | ||||
3175 | A.registerManifestAddedBasicBlock(*RegionBarrierBB); | ||||
3176 | A.registerManifestAddedBasicBlock(*RegionExitBB); | ||||
3177 | A.registerManifestAddedBasicBlock(*RegionStartBB); | ||||
3178 | A.registerManifestAddedBasicBlock(*RegionCheckTidBB); | ||||
3179 | |||||
3180 | bool HasBroadcastValues = false; | ||||
3181 | // Find escaping outputs from the guarded region to outside users and | ||||
3182 | // broadcast their values to them. | ||||
3183 | for (Instruction &I : *RegionStartBB) { | ||||
3184 | SmallPtrSet<Instruction *, 4> OutsideUsers; | ||||
3185 | for (User *Usr : I.users()) { | ||||
3186 | Instruction &UsrI = *cast<Instruction>(Usr); | ||||
3187 | if (UsrI.getParent() != RegionStartBB) | ||||
3188 | OutsideUsers.insert(&UsrI); | ||||
3189 | } | ||||
3190 | |||||
3191 | if (OutsideUsers.empty()) | ||||
3192 | continue; | ||||
3193 | |||||
3194 | HasBroadcastValues = true; | ||||
3195 | |||||
3196 | // Emit a global variable in shared memory to store the broadcasted | ||||
3197 | // value. | ||||
3198 | auto *SharedMem = new GlobalVariable( | ||||
3199 | M, I.getType(), /* IsConstant */ false, | ||||
3200 | GlobalValue::InternalLinkage, UndefValue::get(I.getType()), | ||||
3201 | sanitizeForGlobalName( | ||||
3202 | (I.getName() + ".guarded.output.alloc").str()), | ||||
3203 | nullptr, GlobalValue::NotThreadLocal, | ||||
3204 | static_cast<unsigned>(AddressSpace::Shared)); | ||||
3205 | |||||
3206 | // Emit a store instruction to update the value. | ||||
3207 | new StoreInst(&I, SharedMem, RegionEndBB->getTerminator()); | ||||
3208 | |||||
3209 | LoadInst *LoadI = new LoadInst(I.getType(), SharedMem, | ||||
3210 | I.getName() + ".guarded.output.load", | ||||
3211 | RegionBarrierBB->getTerminator()); | ||||
3212 | |||||
3213 | // Emit a load instruction and replace uses of the output value. | ||||
3214 | for (Instruction *UsrI : OutsideUsers) | ||||
3215 | UsrI->replaceUsesOfWith(&I, LoadI); | ||||
3216 | } | ||||
3217 | |||||
3218 | auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); | ||||
3219 | |||||
3220 | // Go to tid check BB in ParentBB. | ||||
3221 | const DebugLoc DL = ParentBB->getTerminator()->getDebugLoc(); | ||||
3222 | ParentBB->getTerminator()->eraseFromParent(); | ||||
3223 | OpenMPIRBuilder::LocationDescription Loc( | ||||
3224 | InsertPointTy(ParentBB, ParentBB->end()), DL); | ||||
3225 | OMPInfoCache.OMPBuilder.updateToLocation(Loc); | ||||
3226 | uint32_t SrcLocStrSize; | ||||
3227 | auto *SrcLocStr = | ||||
3228 | OMPInfoCache.OMPBuilder.getOrCreateSrcLocStr(Loc, SrcLocStrSize); | ||||
3229 | Value *Ident = | ||||
3230 | OMPInfoCache.OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize); | ||||
3231 | BranchInst::Create(RegionCheckTidBB, ParentBB)->setDebugLoc(DL); | ||||
3232 | |||||
3233 | // Add check for Tid in RegionCheckTidBB | ||||
3234 | RegionCheckTidBB->getTerminator()->eraseFromParent(); | ||||
3235 | OpenMPIRBuilder::LocationDescription LocRegionCheckTid( | ||||
3236 | InsertPointTy(RegionCheckTidBB, RegionCheckTidBB->end()), DL); | ||||
3237 | OMPInfoCache.OMPBuilder.updateToLocation(LocRegionCheckTid); | ||||
3238 | FunctionCallee HardwareTidFn = | ||||
3239 | OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction( | ||||
3240 | M, OMPRTL___kmpc_get_hardware_thread_id_in_block); | ||||
3241 | Value *Tid = | ||||
3242 | OMPInfoCache.OMPBuilder.Builder.CreateCall(HardwareTidFn, {}); | ||||
3243 | Value *TidCheck = OMPInfoCache.OMPBuilder.Builder.CreateIsNull(Tid); | ||||
3244 | OMPInfoCache.OMPBuilder.Builder | ||||
3245 | .CreateCondBr(TidCheck, RegionStartBB, RegionBarrierBB) | ||||
3246 | ->setDebugLoc(DL); | ||||
3247 | |||||
3248 | // First barrier for synchronization, ensures main thread has updated | ||||
3249 | // values. | ||||
3250 | FunctionCallee BarrierFn = | ||||
3251 | OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction( | ||||
3252 | M, OMPRTL___kmpc_barrier_simple_spmd); | ||||
3253 | OMPInfoCache.OMPBuilder.updateToLocation(InsertPointTy( | ||||
3254 | RegionBarrierBB, RegionBarrierBB->getFirstInsertionPt())); | ||||
3255 | OMPInfoCache.OMPBuilder.Builder.CreateCall(BarrierFn, {Ident, Tid}) | ||||
3256 | ->setDebugLoc(DL); | ||||
3257 | |||||
3258 | // Second barrier ensures workers have read broadcast values. | ||||
3259 | if (HasBroadcastValues) | ||||
3260 | CallInst::Create(BarrierFn, {Ident, Tid}, "", | ||||
3261 | RegionBarrierBB->getTerminator()) | ||||
3262 | ->setDebugLoc(DL); | ||||
3263 | }; | ||||
3264 | |||||
3265 | auto &AllocSharedRFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared]; | ||||
3266 | SmallPtrSet<BasicBlock *, 8> Visited; | ||||
3267 | for (Instruction *GuardedI : SPMDCompatibilityTracker) { | ||||
3268 | BasicBlock *BB = GuardedI->getParent(); | ||||
3269 | if (!Visited.insert(BB).second) | ||||
3270 | continue; | ||||
3271 | |||||
3272 | SmallVector<std::pair<Instruction *, Instruction *>> Reorders; | ||||
3273 | Instruction *LastEffect = nullptr; | ||||
3274 | BasicBlock::reverse_iterator IP = BB->rbegin(), IPEnd = BB->rend(); | ||||
3275 | while (++IP != IPEnd) { | ||||
3276 | if (!IP->mayHaveSideEffects() && !IP->mayReadFromMemory()) | ||||
3277 | continue; | ||||
3278 | Instruction *I = &*IP; | ||||
3279 | if (OpenMPOpt::getCallIfRegularCall(*I, &AllocSharedRFI)) | ||||
3280 | continue; | ||||
3281 | if (!I->user_empty() || !SPMDCompatibilityTracker.contains(I)) { | ||||
3282 | LastEffect = nullptr; | ||||
3283 | continue; | ||||
3284 | } | ||||
3285 | if (LastEffect) | ||||
3286 | Reorders.push_back({I, LastEffect}); | ||||
3287 | LastEffect = &*IP; | ||||
3288 | } | ||||
3289 | for (auto &Reorder : Reorders) | ||||
3290 | Reorder.first->moveBefore(Reorder.second); | ||||
3291 | } | ||||
3292 | |||||
3293 | SmallVector<std::pair<Instruction *, Instruction *>, 4> GuardedRegions; | ||||
3294 | |||||
3295 | for (Instruction *GuardedI : SPMDCompatibilityTracker) { | ||||
3296 | BasicBlock *BB = GuardedI->getParent(); | ||||
3297 | auto *CalleeAA = A.lookupAAFor<AAKernelInfo>( | ||||
3298 | IRPosition::function(*GuardedI->getFunction()), nullptr, | ||||
3299 | DepClassTy::NONE); | ||||
3300 | assert(CalleeAA != nullptr && "Expected Callee AAKernelInfo")(static_cast <bool> (CalleeAA != nullptr && "Expected Callee AAKernelInfo" ) ? void (0) : __assert_fail ("CalleeAA != nullptr && \"Expected Callee AAKernelInfo\"" , "llvm/lib/Transforms/IPO/OpenMPOpt.cpp", 3300, __extension__ __PRETTY_FUNCTION__)); | ||||
3301 | auto &CalleeAAFunction = *cast<AAKernelInfoFunction>(CalleeAA); | ||||
3302 | // Continue if instruction is already guarded. | ||||
3303 | if (CalleeAAFunction.getGuardedInstructions().contains(GuardedI)) | ||||
3304 | continue; | ||||
3305 | |||||
3306 | Instruction *GuardedRegionStart = nullptr, *GuardedRegionEnd = nullptr; | ||||
3307 | for (Instruction &I : *BB) { | ||||
3308 | // If instruction I needs to be guarded update the guarded region | ||||
3309 | // bounds. | ||||
3310 | if (SPMDCompatibilityTracker.contains(&I)) { | ||||
3311 | CalleeAAFunction.getGuardedInstructions().insert(&I); | ||||
3312 | if (GuardedRegionStart) | ||||
3313 | GuardedRegionEnd = &I; | ||||
3314 | else | ||||
3315 | GuardedRegionStart = GuardedRegionEnd = &I; | ||||
3316 | |||||
3317 | continue; | ||||
3318 | } | ||||
3319 | |||||
3320 | // Instruction I does not need guarding, store | ||||
3321 | // any region found and reset bounds. | ||||
3322 | if (GuardedRegionStart) { | ||||
3323 | GuardedRegions.push_back( | ||||
3324 | std::make_pair(GuardedRegionStart, GuardedRegionEnd)); | ||||
3325 | GuardedRegionStart = nullptr; | ||||
3326 | GuardedRegionEnd = nullptr; | ||||
3327 | } | ||||
3328 | } | ||||
3329 | } | ||||
3330 | |||||
3331 | for (auto &GR : GuardedRegions) | ||||
3332 | CreateGuardedRegion(GR.first, GR.second); | ||||
3333 | |||||
3334 | // Adjust the global exec mode flag that tells the runtime what mode this | ||||
3335 | // kernel is executed in. | ||||
3336 | assert(ExecModeVal == OMP_TGT_EXEC_MODE_GENERIC &&(static_cast <bool> (ExecModeVal == OMP_TGT_EXEC_MODE_GENERIC && "Initially non-SPMD kernel has SPMD exec mode!") ? void (0) : __assert_fail ("ExecModeVal == OMP_TGT_EXEC_MODE_GENERIC && \"Initially non-SPMD kernel has SPMD exec mode!\"" , "llvm/lib/Transforms/IPO/OpenMPOpt.cpp", 3337, __extension__ __PRETTY_FUNCTION__)) | ||||
3337 | "Initially non-SPMD kernel has SPMD exec mode!")(static_cast <bool> (ExecModeVal == OMP_TGT_EXEC_MODE_GENERIC && "Initially non-SPMD kernel has SPMD exec mode!") ? void (0) : __assert_fail ("ExecModeVal == OMP_TGT_EXEC_MODE_GENERIC && \"Initially non-SPMD kernel has SPMD exec mode!\"" , "llvm/lib/Transforms/IPO/OpenMPOpt.cpp", 3337, __extension__ __PRETTY_FUNCTION__)); | ||||
3338 | ExecMode->setInitializer( | ||||
3339 | ConstantInt::get(ExecMode->getInitializer()->getType(), | ||||
3340 | ExecModeVal | OMP_TGT_EXEC_MODE_GENERIC_SPMD)); | ||||
3341 | |||||
3342 | // Next rewrite the init and deinit calls to indicate we use SPMD-mode now. | ||||
3343 | const int InitModeArgNo = 1; | ||||
3344 | const int DeinitModeArgNo = 1; | ||||
3345 | const int InitUseStateMachineArgNo = 2; | ||||
3346 | const int InitRequiresFullRuntimeArgNo = 3; | ||||
3347 | const int DeinitRequiresFullRuntimeArgNo = 2; | ||||
3348 | |||||
3349 | auto &Ctx = getAnchorValue().getContext(); | ||||
3350 | A.changeUseAfterManifest( | ||||
3351 | KernelInitCB->getArgOperandUse(InitModeArgNo), | ||||
3352 | *ConstantInt::getSigned(IntegerType::getInt8Ty(Ctx), | ||||
3353 | OMP_TGT_EXEC_MODE_SPMD)); | ||||
3354 | A.changeUseAfterManifest( | ||||
3355 | KernelInitCB->getArgOperandUse(InitUseStateMachineArgNo), | ||||
3356 | *ConstantInt::getBool(Ctx, false)); | ||||
3357 | A.changeUseAfterManifest( | ||||
3358 | KernelDeinitCB->getArgOperandUse(DeinitModeArgNo), | ||||
3359 | *ConstantInt::getSigned(IntegerType::getInt8Ty(Ctx), | ||||
3360 | OMP_TGT_EXEC_MODE_SPMD)); | ||||
3361 | A.changeUseAfterManifest( | ||||
3362 | KernelInitCB->getArgOperandUse(InitRequiresFullRuntimeArgNo), | ||||
3363 | *ConstantInt::getBool(Ctx, false)); | ||||
3364 | A.changeUseAfterManifest( | ||||
3365 | KernelDeinitCB->getArgOperandUse(DeinitRequiresFullRuntimeArgNo), | ||||
3366 | *ConstantInt::getBool(Ctx, false)); | ||||
3367 | |||||
3368 | ++NumOpenMPTargetRegionKernelsSPMD; | ||||
3369 | |||||
3370 | auto Remark = [&](OptimizationRemark OR) { | ||||
3371 | return OR << "Transformed generic-mode kernel to SPMD-mode."; | ||||
3372 | }; | ||||
3373 | A.emitRemark<OptimizationRemark>(KernelInitCB, "OMP120", Remark); | ||||
3374 | return true; | ||||
3375 | }; | ||||
3376 | |||||
3377 | ChangeStatus buildCustomStateMachine(Attributor &A) { | ||||
3378 | // If we have disabled state machine rewrites, don't make a custom one | ||||
3379 | if (DisableOpenMPOptStateMachineRewrite) | ||||
3380 | return ChangeStatus::UNCHANGED; | ||||
3381 | |||||
3382 | // Don't rewrite the state machine if we are not in a valid state. | ||||
3383 | if (!ReachedKnownParallelRegions.isValidState()) | ||||
3384 | return ChangeStatus::UNCHANGED; | ||||
3385 | |||||
3386 | const int InitModeArgNo = 1; | ||||
3387 | const int InitUseStateMachineArgNo = 2; | ||||
3388 | |||||
3389 | // Check if the current configuration is non-SPMD and generic state machine. | ||||
3390 | // If we already have SPMD mode or a custom state machine we do not need to | ||||
3391 | // go any further. If it is anything but a constant something is weird and | ||||
3392 | // we give up. | ||||
3393 | ConstantInt *UseStateMachine = dyn_cast<ConstantInt>( | ||||
3394 | KernelInitCB->getArgOperand(InitUseStateMachineArgNo)); | ||||
3395 | ConstantInt *Mode = | ||||
3396 | dyn_cast<ConstantInt>(KernelInitCB->getArgOperand(InitModeArgNo)); | ||||
3397 | |||||
3398 | // If we are stuck with generic mode, try to create a custom device (=GPU) | ||||
3399 | // state machine which is specialized for the parallel regions that are | ||||
3400 | // reachable by the kernel. | ||||
3401 | if (!UseStateMachine || UseStateMachine->isZero() || !Mode || | ||||
3402 | (Mode->getSExtValue() & OMP_TGT_EXEC_MODE_SPMD)) | ||||
3403 | return ChangeStatus::UNCHANGED; | ||||
3404 | |||||
3405 | // If not SPMD mode, indicate we use a custom state machine now. | ||||
3406 | auto &Ctx = getAnchorValue().getContext(); | ||||
3407 | auto *FalseVal = ConstantInt::getBool(Ctx, false); | ||||
3408 | A.changeUseAfterManifest( | ||||
3409 | KernelInitCB->getArgOperandUse(InitUseStateMachineArgNo), *FalseVal); | ||||
3410 | |||||
3411 | // If we don't actually need a state machine we are done here. This can | ||||
3412 | // happen if there simply are no parallel regions. In the resulting kernel | ||||
3413 | // all worker threads will simply exit right away, leaving the main thread | ||||
3414 | // to do the work alone. | ||||
3415 | if (!mayContainParallelRegion()) { | ||||
3416 | ++NumOpenMPTargetRegionKernelsWithoutStateMachine; | ||||
3417 | |||||
3418 | auto Remark = [&](OptimizationRemark OR) { | ||||
3419 | return OR << "Removing unused state machine from generic-mode kernel."; | ||||
3420 | }; | ||||
3421 | A.emitRemark<OptimizationRemark>(KernelInitCB, "OMP130", Remark); | ||||
3422 | |||||
3423 | return ChangeStatus::CHANGED; | ||||
3424 | } | ||||
3425 | |||||
3426 | // Keep track in the statistics of our new shiny custom state machine. | ||||
3427 | if (ReachedUnknownParallelRegions.empty()) { | ||||
3428 | ++NumOpenMPTargetRegionKernelsCustomStateMachineWithoutFallback; | ||||
3429 | |||||
3430 | auto Remark = [&](OptimizationRemark OR) { | ||||
3431 | return OR << "Rewriting generic-mode kernel with a customized state " | ||||
3432 | "machine."; | ||||
3433 | }; | ||||
3434 | A.emitRemark<OptimizationRemark>(KernelInitCB, "OMP131", Remark); | ||||
3435 | } else { | ||||
3436 | ++NumOpenMPTargetRegionKernelsCustomStateMachineWithFallback; | ||||
3437 | |||||
3438 | auto Remark = [&](OptimizationRemarkAnalysis OR) { | ||||
3439 | return OR << "Generic-mode kernel is executed with a customized state " | ||||
3440 | "machine that requires a fallback."; | ||||
3441 | }; | ||||
3442 | A.emitRemark<OptimizationRemarkAnalysis>(KernelInitCB, "OMP132", Remark); | ||||
3443 | |||||
3444 | // Tell the user why we ended up with a fallback. | ||||
3445 | for (CallBase *UnknownParallelRegionCB : ReachedUnknownParallelRegions) { | ||||
3446 | if (!UnknownParallelRegionCB) | ||||
3447 | continue; | ||||
3448 | auto Remark = [&](OptimizationRemarkAnalysis ORA) { | ||||
3449 | return ORA << "Call may contain unknown parallel regions. Use " | ||||
3450 | << "`__attribute__((assume(\"omp_no_parallelism\")))` to " | ||||
3451 | "override."; | ||||
3452 | }; | ||||
3453 | A.emitRemark<OptimizationRemarkAnalysis>(UnknownParallelRegionCB, | ||||
3454 | "OMP133", Remark); | ||||
3455 | } | ||||
3456 | } | ||||
3457 | |||||
3458 | // Create all the blocks: | ||||
3459 | // | ||||
3460 | // InitCB = __kmpc_target_init(...) | ||||
3461 | // BlockHwSize = | ||||
3462 | // __kmpc_get_hardware_num_threads_in_block(); | ||||
3463 | // WarpSize = __kmpc_get_warp_size(); | ||||
3464 | // BlockSize = BlockHwSize - WarpSize; | ||||
3465 | // if (InitCB >= BlockSize) return; | ||||
3466 | // IsWorkerCheckBB: bool IsWorker = InitCB >= 0; | ||||
3467 | // if (IsWorker) { | ||||
3468 | // SMBeginBB: __kmpc_barrier_simple_generic(...); | ||||
3469 | // void *WorkFn; | ||||
3470 | // bool Active = __kmpc_kernel_parallel(&WorkFn); | ||||
3471 | // if (!WorkFn) return; | ||||
3472 | // SMIsActiveCheckBB: if (Active) { | ||||
3473 | // SMIfCascadeCurrentBB: if (WorkFn == <ParFn0>) | ||||
3474 | // ParFn0(...); | ||||
3475 | // SMIfCascadeCurrentBB: else if (WorkFn == <ParFn1>) | ||||
3476 | // ParFn1(...); | ||||
3477 | // ... | ||||
3478 | // SMIfCascadeCurrentBB: else | ||||
3479 | // ((WorkFnTy*)WorkFn)(...); | ||||
3480 | // SMEndParallelBB: __kmpc_kernel_end_parallel(...); | ||||
3481 | // } | ||||
3482 | // SMDoneBB: __kmpc_barrier_simple_generic(...); | ||||
3483 | // goto SMBeginBB; | ||||
3484 | // } | ||||
3485 | // UserCodeEntryBB: // user code | ||||
3486 | // __kmpc_target_deinit(...) | ||||
3487 | // | ||||
3488 | Function *Kernel = getAssociatedFunction(); | ||||
3489 | assert(Kernel && "Expected an associated function!")(static_cast <bool> (Kernel && "Expected an associated function!" ) ? void (0) : __assert_fail ("Kernel && \"Expected an associated function!\"" , "llvm/lib/Transforms/IPO/OpenMPOpt.cpp", 3489, __extension__ __PRETTY_FUNCTION__)); | ||||
3490 | |||||
3491 | BasicBlock *InitBB = KernelInitCB->getParent(); | ||||
3492 | BasicBlock *UserCodeEntryBB = InitBB->splitBasicBlock( | ||||
3493 | KernelInitCB->getNextNode(), "thread.user_code.check"); | ||||
3494 | BasicBlock *IsWorkerCheckBB = | ||||
3495 | BasicBlock::Create(Ctx, "is_worker_check", Kernel, UserCodeEntryBB); | ||||
3496 | BasicBlock *StateMachineBeginBB = BasicBlock::Create( | ||||
3497 | Ctx, "worker_state_machine.begin", Kernel, UserCodeEntryBB); | ||||
3498 | BasicBlock *StateMachineFinishedBB = BasicBlock::Create( | ||||
3499 | Ctx, "worker_state_machine.finished", Kernel, UserCodeEntryBB); | ||||
3500 | BasicBlock *StateMachineIsActiveCheckBB = BasicBlock::Create( | ||||
3501 | Ctx, "worker_state_machine.is_active.check", Kernel, UserCodeEntryBB); | ||||
3502 | BasicBlock *StateMachineIfCascadeCurrentBB = | ||||
3503 | BasicBlock::Create(Ctx, "worker_state_machine.parallel_region.check", | ||||
3504 | Kernel, UserCodeEntryBB); | ||||
3505 | BasicBlock *StateMachineEndParallelBB = | ||||
3506 | BasicBlock::Create(Ctx, "worker_state_machine.parallel_region.end", | ||||
3507 | Kernel, UserCodeEntryBB); | ||||
3508 | BasicBlock *StateMachineDoneBarrierBB = BasicBlock::Create( | ||||
3509 | Ctx, "worker_state_machine.done.barrier", Kernel, UserCodeEntryBB); | ||||
3510 | A.registerManifestAddedBasicBlock(*InitBB); | ||||
3511 | A.registerManifestAddedBasicBlock(*UserCodeEntryBB); | ||||
3512 | A.registerManifestAddedBasicBlock(*IsWorkerCheckBB); | ||||
3513 | A.registerManifestAddedBasicBlock(*StateMachineBeginBB); | ||||
3514 | A.registerManifestAddedBasicBlock(*StateMachineFinishedBB); | ||||
3515 | A.registerManifestAddedBasicBlock(*StateMachineIsActiveCheckBB); | ||||
3516 | A.registerManifestAddedBasicBlock(*StateMachineIfCascadeCurrentBB); | ||||
3517 | A.registerManifestAddedBasicBlock(*StateMachineEndParallelBB); | ||||
3518 | A.registerManifestAddedBasicBlock(*StateMachineDoneBarrierBB); | ||||
3519 | |||||
3520 | const DebugLoc &DLoc = KernelInitCB->getDebugLoc(); | ||||
3521 | ReturnInst::Create(Ctx, StateMachineFinishedBB)->setDebugLoc(DLoc); | ||||
3522 | InitBB->getTerminator()->eraseFromParent(); | ||||
3523 | |||||
3524 | Module &M = *Kernel->getParent(); | ||||
3525 | auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); | ||||
3526 | FunctionCallee BlockHwSizeFn = | ||||
3527 | OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction( | ||||
3528 | M, OMPRTL___kmpc_get_hardware_num_threads_in_block); | ||||
3529 | FunctionCallee WarpSizeFn = | ||||
3530 | OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction( | ||||
3531 | M, OMPRTL___kmpc_get_warp_size); | ||||
3532 | Instruction *BlockHwSize = | ||||
3533 | CallInst::Create(BlockHwSizeFn, "block.hw_size", InitBB); | ||||
3534 | BlockHwSize->setDebugLoc(DLoc); | ||||
3535 | Instruction *WarpSize = CallInst::Create(WarpSizeFn, "warp.size", InitBB); | ||||
3536 | WarpSize->setDebugLoc(DLoc); | ||||
3537 | Instruction *BlockSize = | ||||
3538 | BinaryOperator::CreateSub(BlockHwSize, WarpSize, "block.size", InitBB); | ||||
3539 | BlockSize->setDebugLoc(DLoc); | ||||
3540 | Instruction *IsMainOrWorker = | ||||
3541 | ICmpInst::Create(ICmpInst::ICmp, llvm::CmpInst::ICMP_SLT, KernelInitCB, | ||||
3542 | BlockSize, "thread.is_main_or_worker", InitBB); | ||||
3543 | IsMainOrWorker->setDebugLoc(DLoc); | ||||
3544 | BranchInst::Create(IsWorkerCheckBB, StateMachineFinishedBB, IsMainOrWorker, | ||||
3545 | InitBB); | ||||
3546 | |||||
3547 | Instruction *IsWorker = | ||||
3548 | ICmpInst::Create(ICmpInst::ICmp, llvm::CmpInst::ICMP_NE, KernelInitCB, | ||||
3549 | ConstantInt::get(KernelInitCB->getType(), -1), | ||||
3550 | "thread.is_worker", IsWorkerCheckBB); | ||||
3551 | IsWorker->setDebugLoc(DLoc); | ||||
3552 | BranchInst::Create(StateMachineBeginBB, UserCodeEntryBB, IsWorker, | ||||
3553 | IsWorkerCheckBB); | ||||
3554 | |||||
3555 | // Create local storage for the work function pointer. | ||||
3556 | const DataLayout &DL = M.getDataLayout(); | ||||
3557 | Type *VoidPtrTy = Type::getInt8PtrTy(Ctx); | ||||
3558 | Instruction *WorkFnAI = | ||||
3559 | new AllocaInst(VoidPtrTy, DL.getAllocaAddrSpace(), nullptr, | ||||
3560 | "worker.work_fn.addr", &Kernel->getEntryBlock().front()); | ||||
3561 | WorkFnAI->setDebugLoc(DLoc); | ||||
3562 | |||||
3563 | OMPInfoCache.OMPBuilder.updateToLocation( | ||||
3564 | OpenMPIRBuilder::LocationDescription( | ||||
3565 | IRBuilder<>::InsertPoint(StateMachineBeginBB, | ||||
3566 | StateMachineBeginBB->end()), | ||||
3567 | DLoc)); | ||||
3568 | |||||
3569 | Value *Ident = KernelInitCB->getArgOperand(0); | ||||
3570 | Value *GTid = KernelInitCB; | ||||
3571 | |||||
3572 | FunctionCallee BarrierFn = | ||||
3573 | OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction( | ||||
3574 | M, OMPRTL___kmpc_barrier_simple_generic); | ||||
3575 | CallInst::Create(BarrierFn, {Ident, GTid}, "", StateMachineBeginBB) | ||||
3576 | ->setDebugLoc(DLoc); | ||||
3577 | |||||
3578 | if (WorkFnAI->getType()->getPointerAddressSpace() != | ||||
3579 | (unsigned int)AddressSpace::Generic) { | ||||
3580 | WorkFnAI = new AddrSpaceCastInst( | ||||
3581 | WorkFnAI, | ||||
3582 | PointerType::getWithSamePointeeType( | ||||
3583 | cast<PointerType>(WorkFnAI->getType()), | ||||
3584 | (unsigned int)AddressSpace::Generic), | ||||
3585 | WorkFnAI->getName() + ".generic", StateMachineBeginBB); | ||||
3586 | WorkFnAI->setDebugLoc(DLoc); | ||||
3587 | } | ||||
3588 | |||||
3589 | FunctionCallee KernelParallelFn = | ||||
3590 | OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction( | ||||
3591 | M, OMPRTL___kmpc_kernel_parallel); | ||||
3592 | Instruction *IsActiveWorker = CallInst::Create( | ||||
3593 | KernelParallelFn, {WorkFnAI}, "worker.is_active", StateMachineBeginBB); | ||||
3594 | IsActiveWorker->setDebugLoc(DLoc); | ||||
3595 | Instruction *WorkFn = new LoadInst(VoidPtrTy, WorkFnAI, "worker.work_fn", | ||||
3596 | StateMachineBeginBB); | ||||
3597 | WorkFn->setDebugLoc(DLoc); | ||||
3598 | |||||
3599 | FunctionType *ParallelRegionFnTy = FunctionType::get( | ||||
3600 | Type::getVoidTy(Ctx), {Type::getInt16Ty(Ctx), Type::getInt32Ty(Ctx)}, | ||||
3601 | false); | ||||
3602 | Value *WorkFnCast = BitCastInst::CreatePointerBitCastOrAddrSpaceCast( | ||||
3603 | WorkFn, ParallelRegionFnTy->getPointerTo(), "worker.work_fn.addr_cast", | ||||
3604 | StateMachineBeginBB); | ||||
3605 | |||||
3606 | Instruction *IsDone = | ||||
3607 | ICmpInst::Create(ICmpInst::ICmp, llvm::CmpInst::ICMP_EQ, WorkFn, | ||||
3608 | Constant::getNullValue(VoidPtrTy), "worker.is_done", | ||||
3609 | StateMachineBeginBB); | ||||
3610 | IsDone->setDebugLoc(DLoc); | ||||
3611 | BranchInst::Create(StateMachineFinishedBB, StateMachineIsActiveCheckBB, | ||||
3612 | IsDone, StateMachineBeginBB) | ||||
3613 | ->setDebugLoc(DLoc); | ||||
3614 | |||||
3615 | BranchInst::Create(StateMachineIfCascadeCurrentBB, | ||||
3616 | StateMachineDoneBarrierBB, IsActiveWorker, | ||||
3617 | StateMachineIsActiveCheckBB) | ||||
3618 | ->setDebugLoc(DLoc); | ||||
3619 | |||||
3620 | Value *ZeroArg = | ||||
3621 | Constant::getNullValue(ParallelRegionFnTy->getParamType(0)); | ||||
3622 | |||||
3623 | // Now that we have most of the CFG skeleton it is time for the if-cascade | ||||
3624 | // that checks the function pointer we got from the runtime against the | ||||
3625 | // parallel regions we expect, if there are any. | ||||
3626 | for (int I = 0, E = ReachedKnownParallelRegions.size(); I < E; ++I) { | ||||
3627 | auto *ParallelRegion = ReachedKnownParallelRegions[I]; | ||||
3628 | BasicBlock *PRExecuteBB = BasicBlock::Create( | ||||
3629 | Ctx, "worker_state_machine.parallel_region.execute", Kernel, | ||||
3630 | StateMachineEndParallelBB); | ||||
3631 | CallInst::Create(ParallelRegion, {ZeroArg, GTid}, "", PRExecuteBB) | ||||
3632 | ->setDebugLoc(DLoc); | ||||
3633 | BranchInst::Create(StateMachineEndParallelBB, PRExecuteBB) | ||||
3634 | ->setDebugLoc(DLoc); | ||||
3635 | |||||
3636 | BasicBlock *PRNextBB = | ||||
3637 | BasicBlock::Create(Ctx, "worker_state_machine.parallel_region.check", | ||||
3638 | Kernel, StateMachineEndParallelBB); | ||||
3639 | |||||
3640 | // Check if we need to compare the pointer at all or if we can just | ||||
3641 | // call the parallel region function. | ||||
3642 | Value *IsPR; | ||||
3643 | if (I + 1 < E || !ReachedUnknownParallelRegions.empty()) { | ||||
3644 | Instruction *CmpI = ICmpInst::Create( | ||||
3645 | ICmpInst::ICmp, llvm::CmpInst::ICMP_EQ, WorkFnCast, ParallelRegion, | ||||
3646 | "worker.check_parallel_region", StateMachineIfCascadeCurrentBB); | ||||
3647 | CmpI->setDebugLoc(DLoc); | ||||
3648 | IsPR = CmpI; | ||||
3649 | } else { | ||||
3650 | IsPR = ConstantInt::getTrue(Ctx); | ||||
3651 | } | ||||
3652 | |||||
3653 | BranchInst::Create(PRExecuteBB, PRNextBB, IsPR, | ||||
3654 | StateMachineIfCascadeCurrentBB) | ||||
3655 | ->setDebugLoc(DLoc); | ||||
3656 | StateMachineIfCascadeCurrentBB = PRNextBB; | ||||
3657 | } | ||||
3658 | |||||
3659 | // At the end of the if-cascade we place the indirect function pointer call | ||||
3660 | // in case we might need it, that is if there can be parallel regions we | ||||
3661 | // have not handled in the if-cascade above. | ||||
3662 | if (!ReachedUnknownParallelRegions.empty()) { | ||||
3663 | StateMachineIfCascadeCurrentBB->setName( | ||||
3664 | "worker_state_machine.parallel_region.fallback.execute"); | ||||
3665 | CallInst::Create(ParallelRegionFnTy, WorkFnCast, {ZeroArg, GTid}, "", | ||||
3666 | StateMachineIfCascadeCurrentBB) | ||||
3667 | ->setDebugLoc(DLoc); | ||||
3668 | } | ||||
3669 | BranchInst::Create(StateMachineEndParallelBB, | ||||
3670 | StateMachineIfCascadeCurrentBB) | ||||
3671 | ->setDebugLoc(DLoc); | ||||
3672 | |||||
3673 | CallInst::Create(OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction( | ||||
3674 | M, OMPRTL___kmpc_kernel_end_parallel), | ||||
3675 | {}, "", StateMachineEndParallelBB) | ||||
3676 | ->setDebugLoc(DLoc); | ||||
3677 | BranchInst::Create(StateMachineDoneBarrierBB, StateMachineEndParallelBB) | ||||
3678 | ->setDebugLoc(DLoc); | ||||
3679 | |||||
3680 | CallInst::Create(BarrierFn, {Ident, GTid}, "", StateMachineDoneBarrierBB) | ||||
3681 | ->setDebugLoc(DLoc); | ||||
3682 | BranchInst::Create(StateMachineBeginBB, StateMachineDoneBarrierBB) | ||||
3683 | ->setDebugLoc(DLoc); | ||||
3684 | |||||
3685 | return ChangeStatus::CHANGED; | ||||
3686 | } | ||||
3687 | |||||
3688 | /// Fixpoint iteration update function. Will be called every time a dependence | ||||
3689 | /// changed its state (and in the beginning). | ||||
3690 | ChangeStatus updateImpl(Attributor &A) override { | ||||
3691 | KernelInfoState StateBefore = getState(); | ||||
3692 | |||||
3693 | // Callback to check a read/write instruction. | ||||
3694 | auto CheckRWInst = [&](Instruction &I) { | ||||
3695 | // We handle calls later. | ||||
3696 | if (isa<CallBase>(I)) | ||||
3697 | return true; | ||||
3698 | // We only care about write effects. | ||||
3699 | if (!I.mayWriteToMemory()) | ||||
3700 | return true; | ||||
3701 | if (auto *SI = dyn_cast<StoreInst>(&I)) { | ||||
3702 | SmallVector<const Value *> Objects; | ||||
3703 | getUnderlyingObjects(SI->getPointerOperand(), Objects); | ||||
3704 | if (llvm::all_of(Objects, | ||||
3705 | [](const Value *Obj) { return isa<AllocaInst>(Obj); })) | ||||
3706 | return true; | ||||
3707 | // Check for AAHeapToStack moved objects which must not be guarded. | ||||
3708 | auto &HS = A.getAAFor<AAHeapToStack>( | ||||
3709 | *this, IRPosition::function(*I.getFunction()), | ||||
3710 | DepClassTy::OPTIONAL); | ||||
3711 | if (llvm::all_of(Objects, [&HS](const Value *Obj) { | ||||
3712 | auto *CB = dyn_cast<CallBase>(Obj); | ||||
3713 | if (!CB) | ||||
3714 | return false; | ||||
3715 | return HS.isAssumedHeapToStack(*CB); | ||||
3716 | })) { | ||||
3717 | return true; | ||||
3718 | } | ||||
3719 | } | ||||
3720 | |||||
3721 | // Insert instruction that needs guarding. | ||||
3722 | SPMDCompatibilityTracker.insert(&I); | ||||
3723 | return true; | ||||
3724 | }; | ||||
3725 | |||||
3726 | bool UsedAssumedInformationInCheckRWInst = false; | ||||
3727 | if (!SPMDCompatibilityTracker.isAtFixpoint()) | ||||
3728 | if (!A.checkForAllReadWriteInstructions( | ||||
3729 | CheckRWInst, *this, UsedAssumedInformationInCheckRWInst)) | ||||
3730 | SPMDCompatibilityTracker.indicatePessimisticFixpoint(); | ||||
3731 | |||||
3732 | bool UsedAssumedInformationFromReachingKernels = false; | ||||
3733 | if (!IsKernelEntry) { | ||||
3734 | updateParallelLevels(A); | ||||
3735 | |||||
3736 | bool AllReachingKernelsKnown = true; | ||||
3737 | updateReachingKernelEntries(A, AllReachingKernelsKnown); | ||||
3738 | UsedAssumedInformationFromReachingKernels = !AllReachingKernelsKnown; | ||||
3739 | |||||
3740 | if (!ParallelLevels.isValidState()) | ||||
3741 | SPMDCompatibilityTracker.indicatePessimisticFixpoint(); | ||||
3742 | else if (!ReachingKernelEntries.isValidState()) | ||||
3743 | SPMDCompatibilityTracker.indicatePessimisticFixpoint(); | ||||
3744 | else if (!SPMDCompatibilityTracker.empty()) { | ||||
3745 | // Check if all reaching kernels agree on the mode as we can otherwise | ||||
3746 | // not guard instructions. We might not be sure about the mode so we | ||||
3747 | // we cannot fix the internal spmd-zation state either. | ||||
3748 | int SPMD = 0, Generic = 0; | ||||
3749 | for (auto *Kernel : ReachingKernelEntries) { | ||||
3750 | auto &CBAA = A.getAAFor<AAKernelInfo>( | ||||
3751 | *this, IRPosition::function(*Kernel), DepClassTy::OPTIONAL); | ||||
3752 | if (CBAA.SPMDCompatibilityTracker.isValidState() && | ||||
3753 | CBAA.SPMDCompatibilityTracker.isAssumed()) | ||||
3754 | ++SPMD; | ||||
3755 | else | ||||
3756 | ++Generic; | ||||
3757 | if (!CBAA.SPMDCompatibilityTracker.isAtFixpoint()) | ||||
3758 | UsedAssumedInformationFromReachingKernels = true; | ||||
3759 | } | ||||
3760 | if (SPMD != 0 && Generic != 0) | ||||
3761 | SPMDCompatibilityTracker.indicatePessimisticFixpoint(); | ||||
3762 | } | ||||
3763 | } | ||||
3764 | |||||
3765 | // Callback to check a call instruction. | ||||
3766 | bool AllParallelRegionStatesWereFixed = true; | ||||
3767 | bool AllSPMDStatesWereFixed = true; | ||||
3768 | auto CheckCallInst = [&](Instruction &I) { | ||||
3769 | auto &CB = cast<CallBase>(I); | ||||
3770 | auto &CBAA = A.getAAFor<AAKernelInfo>( | ||||
3771 | *this, IRPosition::callsite_function(CB), DepClassTy::OPTIONAL); | ||||
3772 | getState() ^= CBAA.getState(); | ||||
3773 | AllSPMDStatesWereFixed &= CBAA.SPMDCompatibilityTracker.isAtFixpoint(); | ||||
3774 | AllParallelRegionStatesWereFixed &= | ||||
3775 | CBAA.ReachedKnownParallelRegions.isAtFixpoint(); | ||||
3776 | AllParallelRegionStatesWereFixed &= | ||||
3777 | CBAA.ReachedUnknownParallelRegions.isAtFixpoint(); | ||||
3778 | return true; | ||||
3779 | }; | ||||
3780 | |||||
3781 | bool UsedAssumedInformationInCheckCallInst = false; | ||||
3782 | if (!A.checkForAllCallLikeInstructions( | ||||
3783 | CheckCallInst, *this, UsedAssumedInformationInCheckCallInst)) { | ||||
3784 | LLVM_DEBUG(dbgs() << TAGdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { dbgs() << TAG << "Failed to visit all call-like instructions!\n" ;; } } while (false) | ||||
3785 | << "Failed to visit all call-like instructions!\n";)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { dbgs() << TAG << "Failed to visit all call-like instructions!\n" ;; } } while (false); | ||||
3786 | return indicatePessimisticFixpoint(); | ||||
3787 | } | ||||
3788 | |||||
3789 | // If we haven't used any assumed information for the reached parallel | ||||
3790 | // region states we can fix it. | ||||
3791 | if (!UsedAssumedInformationInCheckCallInst && | ||||
3792 | AllParallelRegionStatesWereFixed) { | ||||
3793 | ReachedKnownParallelRegions.indicateOptimisticFixpoint(); | ||||
3794 | ReachedUnknownParallelRegions.indicateOptimisticFixpoint(); | ||||
3795 | } | ||||
3796 | |||||
3797 | // If we are sure there are no parallel regions in the kernel we do not | ||||
3798 | // want SPMD mode. | ||||
3799 | if (IsKernelEntry && ReachedUnknownParallelRegions.isAtFixpoint() && | ||||
3800 | ReachedKnownParallelRegions.isAtFixpoint() && | ||||
3801 | ReachedUnknownParallelRegions.isValidState() && | ||||
3802 | ReachedKnownParallelRegions.isValidState() && | ||||
3803 | !mayContainParallelRegion()) | ||||
3804 | SPMDCompatibilityTracker.indicatePessimisticFixpoint(); | ||||
3805 | |||||
3806 | // If we haven't used any assumed information for the SPMD state we can fix | ||||
3807 | // it. | ||||
3808 | if (!UsedAssumedInformationInCheckRWInst && | ||||
3809 | !UsedAssumedInformationInCheckCallInst && | ||||
3810 | !UsedAssumedInformationFromReachingKernels && AllSPMDStatesWereFixed) | ||||
3811 | SPMDCompatibilityTracker.indicateOptimisticFixpoint(); | ||||
3812 | |||||
3813 | return StateBefore == getState() ? ChangeStatus::UNCHANGED | ||||
3814 | : ChangeStatus::CHANGED; | ||||
3815 | } | ||||
3816 | |||||
3817 | private: | ||||
3818 | /// Update info regarding reaching kernels. | ||||
3819 | void updateReachingKernelEntries(Attributor &A, | ||||
3820 | bool &AllReachingKernelsKnown) { | ||||
3821 | auto PredCallSite = [&](AbstractCallSite ACS) { | ||||
3822 | Function *Caller = ACS.getInstruction()->getFunction(); | ||||
3823 | |||||
3824 | assert(Caller && "Caller is nullptr")(static_cast <bool> (Caller && "Caller is nullptr" ) ? void (0) : __assert_fail ("Caller && \"Caller is nullptr\"" , "llvm/lib/Transforms/IPO/OpenMPOpt.cpp", 3824, __extension__ __PRETTY_FUNCTION__)); | ||||
3825 | |||||
3826 | auto &CAA = A.getOrCreateAAFor<AAKernelInfo>( | ||||
3827 | IRPosition::function(*Caller), this, DepClassTy::REQUIRED); | ||||
3828 | if (CAA.ReachingKernelEntries.isValidState()) { | ||||
3829 | ReachingKernelEntries ^= CAA.ReachingKernelEntries; | ||||
3830 | return true; | ||||
3831 | } | ||||
3832 | |||||
3833 | // We lost track of the caller of the associated function, any kernel | ||||
3834 | // could reach now. | ||||
3835 | ReachingKernelEntries.indicatePessimisticFixpoint(); | ||||
3836 | |||||
3837 | return true; | ||||
3838 | }; | ||||
3839 | |||||
3840 | if (!A.checkForAllCallSites(PredCallSite, *this, | ||||
3841 | true /* RequireAllCallSites */, | ||||
3842 | AllReachingKernelsKnown)) | ||||
3843 | ReachingKernelEntries.indicatePessimisticFixpoint(); | ||||
3844 | } | ||||
3845 | |||||
3846 | /// Update info regarding parallel levels. | ||||
3847 | void updateParallelLevels(Attributor &A) { | ||||
3848 | auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); | ||||
3849 | OMPInformationCache::RuntimeFunctionInfo &Parallel51RFI = | ||||
3850 | OMPInfoCache.RFIs[OMPRTL___kmpc_parallel_51]; | ||||
3851 | |||||
3852 | auto PredCallSite = [&](AbstractCallSite ACS) { | ||||
3853 | Function *Caller = ACS.getInstruction()->getFunction(); | ||||
3854 | |||||
3855 | assert(Caller && "Caller is nullptr")(static_cast <bool> (Caller && "Caller is nullptr" ) ? void (0) : __assert_fail ("Caller && \"Caller is nullptr\"" , "llvm/lib/Transforms/IPO/OpenMPOpt.cpp", 3855, __extension__ __PRETTY_FUNCTION__)); | ||||
3856 | |||||
3857 | auto &CAA = | ||||
3858 | A.getOrCreateAAFor<AAKernelInfo>(IRPosition::function(*Caller)); | ||||
3859 | if (CAA.ParallelLevels.isValidState()) { | ||||
3860 | // Any function that is called by `__kmpc_parallel_51` will not be | ||||
3861 | // folded as the parallel level in the function is updated. In order to | ||||
3862 | // get it right, all the analysis would depend on the implentation. That | ||||
3863 | // said, if in the future any change to the implementation, the analysis | ||||
3864 | // could be wrong. As a consequence, we are just conservative here. | ||||
3865 | if (Caller == Parallel51RFI.Declaration) { | ||||
3866 | ParallelLevels.indicatePessimisticFixpoint(); | ||||
3867 | return true; | ||||
3868 | } | ||||
3869 | |||||
3870 | ParallelLevels ^= CAA.ParallelLevels; | ||||
3871 | |||||
3872 | return true; | ||||
3873 | } | ||||
3874 | |||||
3875 | // We lost track of the caller of the associated function, any kernel | ||||
3876 | // could reach now. | ||||
3877 | ParallelLevels.indicatePessimisticFixpoint(); | ||||
3878 | |||||
3879 | return true; | ||||
3880 | }; | ||||
3881 | |||||
3882 | bool AllCallSitesKnown = true; | ||||
3883 | if (!A.checkForAllCallSites(PredCallSite, *this, | ||||
3884 | true /* RequireAllCallSites */, | ||||
3885 | AllCallSitesKnown)) | ||||
3886 | ParallelLevels.indicatePessimisticFixpoint(); | ||||
3887 | } | ||||
3888 | }; | ||||
3889 | |||||
3890 | /// The call site kernel info abstract attribute, basically, what can we say | ||||
3891 | /// about a call site with regards to the KernelInfoState. For now this simply | ||||
3892 | /// forwards the information from the callee. | ||||
3893 | struct AAKernelInfoCallSite : AAKernelInfo { | ||||
3894 | AAKernelInfoCallSite(const IRPosition &IRP, Attributor &A) | ||||
3895 | : AAKernelInfo(IRP, A) {} | ||||
3896 | |||||
3897 | /// See AbstractAttribute::initialize(...). | ||||
3898 | void initialize(Attributor &A) override { | ||||
3899 | AAKernelInfo::initialize(A); | ||||
3900 | |||||
3901 | CallBase &CB = cast<CallBase>(getAssociatedValue()); | ||||
3902 | Function *Callee = getAssociatedFunction(); | ||||
3903 | |||||
3904 | auto &AssumptionAA = A.getAAFor<AAAssumptionInfo>( | ||||
3905 | *this, IRPosition::callsite_function(CB), DepClassTy::OPTIONAL); | ||||
3906 | |||||
3907 | // Check for SPMD-mode assumptions. | ||||
3908 | if (AssumptionAA.hasAssumption("ompx_spmd_amenable")) { | ||||
3909 | SPMDCompatibilityTracker.indicateOptimisticFixpoint(); | ||||
3910 | indicateOptimisticFixpoint(); | ||||
3911 | } | ||||
3912 | |||||
3913 | // First weed out calls we do not care about, that is readonly/readnone | ||||
3914 | // calls, intrinsics, and "no_openmp" calls. Neither of these can reach a | ||||
3915 | // parallel region or anything else we are looking for. | ||||
3916 | if (!CB.mayWriteToMemory() || isa<IntrinsicInst>(CB)) { | ||||
3917 | indicateOptimisticFixpoint(); | ||||
3918 | return; | ||||
3919 | } | ||||
3920 | |||||
3921 | // Next we check if we know the callee. If it is a known OpenMP function | ||||
3922 | // we will handle them explicitly in the switch below. If it is not, we | ||||
3923 | // will use an AAKernelInfo object on the callee to gather information and | ||||
3924 | // merge that into the current state. The latter happens in the updateImpl. | ||||
3925 | auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); | ||||
3926 | const auto &It = OMPInfoCache.RuntimeFunctionIDMap.find(Callee); | ||||
3927 | if (It == OMPInfoCache.RuntimeFunctionIDMap.end()) { | ||||
3928 | // Unknown caller or declarations are not analyzable, we give up. | ||||
3929 | if (!Callee || !A.isFunctionIPOAmendable(*Callee)) { | ||||
3930 | |||||
3931 | // Unknown callees might contain parallel regions, except if they have | ||||
3932 | // an appropriate assumption attached. | ||||
3933 | if (!(AssumptionAA.hasAssumption("omp_no_openmp") || | ||||
3934 | AssumptionAA.hasAssumption("omp_no_parallelism"))) | ||||
3935 | ReachedUnknownParallelRegions.insert(&CB); | ||||
3936 | |||||
3937 | // If SPMDCompatibilityTracker is not fixed, we need to give up on the | ||||
3938 | // idea we can run something unknown in SPMD-mode. | ||||
3939 | if (!SPMDCompatibilityTracker.isAtFixpoint()) { | ||||
3940 | SPMDCompatibilityTracker.indicatePessimisticFixpoint(); | ||||
3941 | SPMDCompatibilityTracker.insert(&CB); | ||||
3942 | } | ||||
3943 | |||||
3944 | // We have updated the state for this unknown call properly, there won't | ||||
3945 | // be any change so we indicate a fixpoint. | ||||
3946 | indicateOptimisticFixpoint(); | ||||
3947 | } | ||||
3948 | // If the callee is known and can be used in IPO, we will update the state | ||||
3949 | // based on the callee state in updateImpl. | ||||
3950 | return; | ||||
3951 | } | ||||
3952 | |||||
3953 | const unsigned int WrapperFunctionArgNo = 6; | ||||
3954 | RuntimeFunction RF = It->getSecond(); | ||||
3955 | switch (RF) { | ||||
3956 | // All the functions we know are compatible with SPMD mode. | ||||
3957 | case OMPRTL___kmpc_is_spmd_exec_mode: | ||||
3958 | case OMPRTL___kmpc_distribute_static_fini: | ||||
3959 | case OMPRTL___kmpc_for_static_fini: | ||||
3960 | case OMPRTL___kmpc_global_thread_num: | ||||
3961 | case OMPRTL___kmpc_get_hardware_num_threads_in_block: | ||||
3962 | case OMPRTL___kmpc_get_hardware_num_blocks: | ||||
3963 | case OMPRTL___kmpc_single: | ||||
3964 | case OMPRTL___kmpc_end_single: | ||||
3965 | case OMPRTL___kmpc_master: | ||||
3966 | case OMPRTL___kmpc_end_master: | ||||
3967 | case OMPRTL___kmpc_barrier: | ||||
3968 | case OMPRTL___kmpc_nvptx_parallel_reduce_nowait_v2: | ||||
3969 | case OMPRTL___kmpc_nvptx_teams_reduce_nowait_v2: | ||||
3970 | case OMPRTL___kmpc_nvptx_end_reduce_nowait: | ||||
3971 | break; | ||||
3972 | case OMPRTL___kmpc_distribute_static_init_4: | ||||
3973 | case OMPRTL___kmpc_distribute_static_init_4u: | ||||
3974 | case OMPRTL___kmpc_distribute_static_init_8: | ||||
3975 | case OMPRTL___kmpc_distribute_static_init_8u: | ||||
3976 | case OMPRTL___kmpc_for_static_init_4: | ||||
3977 | case OMPRTL___kmpc_for_static_init_4u: | ||||
3978 | case OMPRTL___kmpc_for_static_init_8: | ||||
3979 | case OMPRTL___kmpc_for_static_init_8u: { | ||||
3980 | // Check the schedule and allow static schedule in SPMD mode. | ||||
3981 | unsigned ScheduleArgOpNo = 2; | ||||
3982 | auto *ScheduleTypeCI = | ||||
3983 | dyn_cast<ConstantInt>(CB.getArgOperand(ScheduleArgOpNo)); | ||||
3984 | unsigned ScheduleTypeVal = | ||||
3985 | ScheduleTypeCI ? ScheduleTypeCI->getZExtValue() : 0; | ||||
3986 | switch (OMPScheduleType(ScheduleTypeVal)) { | ||||
3987 | case OMPScheduleType::Static: | ||||
3988 | case OMPScheduleType::StaticChunked: | ||||
3989 | case OMPScheduleType::Distribute: | ||||
3990 | case OMPScheduleType::DistributeChunked: | ||||
3991 | break; | ||||
3992 | default: | ||||
3993 | SPMDCompatibilityTracker.indicatePessimisticFixpoint(); | ||||
3994 | SPMDCompatibilityTracker.insert(&CB); | ||||
3995 | break; | ||||
3996 | }; | ||||
3997 | } break; | ||||
3998 | case OMPRTL___kmpc_target_init: | ||||
3999 | KernelInitCB = &CB; | ||||
4000 | break; | ||||
4001 | case OMPRTL___kmpc_target_deinit: | ||||
4002 | KernelDeinitCB = &CB; | ||||
4003 | break; | ||||
4004 | case OMPRTL___kmpc_parallel_51: | ||||
4005 | if (auto *ParallelRegion = dyn_cast<Function>( | ||||
4006 | CB.getArgOperand(WrapperFunctionArgNo)->stripPointerCasts())) { | ||||
4007 | ReachedKnownParallelRegions.insert(ParallelRegion); | ||||
4008 | break; | ||||
4009 | } | ||||
4010 | // The condition above should usually get the parallel region function | ||||
4011 | // pointer and record it. In the off chance it doesn't we assume the | ||||
4012 | // worst. | ||||
4013 | ReachedUnknownParallelRegions.insert(&CB); | ||||
4014 | break; | ||||
4015 | case OMPRTL___kmpc_omp_task: | ||||
4016 | // We do not look into tasks right now, just give up. | ||||
4017 | SPMDCompatibilityTracker.indicatePessimisticFixpoint(); | ||||
4018 | SPMDCompatibilityTracker.insert(&CB); | ||||
4019 | ReachedUnknownParallelRegions.insert(&CB); | ||||
4020 | break; | ||||
4021 | case OMPRTL___kmpc_alloc_shared: | ||||
4022 | case OMPRTL___kmpc_free_shared: | ||||
4023 | // Return without setting a fixpoint, to be resolved in updateImpl. | ||||
4024 | return; | ||||
4025 | default: | ||||
4026 | // Unknown OpenMP runtime calls cannot be executed in SPMD-mode, | ||||
4027 | // generally. However, they do not hide parallel regions. | ||||
4028 | SPMDCompatibilityTracker.indicatePessimisticFixpoint(); | ||||
4029 | SPMDCompatibilityTracker.insert(&CB); | ||||
4030 | break; | ||||
4031 | } | ||||
4032 | // All other OpenMP runtime calls will not reach parallel regions so they | ||||
4033 | // can be safely ignored for now. Since it is a known OpenMP runtime call we | ||||
4034 | // have now modeled all effects and there is no need for any update. | ||||
4035 | indicateOptimisticFixpoint(); | ||||
4036 | } | ||||
4037 | |||||
4038 | ChangeStatus updateImpl(Attributor &A) override { | ||||
4039 | // TODO: Once we have call site specific value information we can provide | ||||
4040 | // call site specific liveness information and then it makes | ||||
4041 | // sense to specialize attributes for call sites arguments instead of | ||||
4042 | // redirecting requests to the callee argument. | ||||
4043 | Function *F = getAssociatedFunction(); | ||||
4044 | |||||
4045 | auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); | ||||
4046 | const auto &It = OMPInfoCache.RuntimeFunctionIDMap.find(F); | ||||
4047 | |||||
4048 | // If F is not a runtime function, propagate the AAKernelInfo of the callee. | ||||
4049 | if (It == OMPInfoCache.RuntimeFunctionIDMap.end()) { | ||||
4050 | const IRPosition &FnPos = IRPosition::function(*F); | ||||
4051 | auto &FnAA = A.getAAFor<AAKernelInfo>(*this, FnPos, DepClassTy::REQUIRED); | ||||
4052 | if (getState() == FnAA.getState()) | ||||
4053 | return ChangeStatus::UNCHANGED; | ||||
4054 | getState() = FnAA.getState(); | ||||
4055 | return ChangeStatus::CHANGED; | ||||
4056 | } | ||||
4057 | |||||
4058 | // F is a runtime function that allocates or frees memory, check | ||||
4059 | // AAHeapToStack and AAHeapToShared. | ||||
4060 | KernelInfoState StateBefore = getState(); | ||||
4061 | assert((It->getSecond() == OMPRTL___kmpc_alloc_shared ||(static_cast <bool> ((It->getSecond() == OMPRTL___kmpc_alloc_shared || It->getSecond() == OMPRTL___kmpc_free_shared) && "Expected a __kmpc_alloc_shared or __kmpc_free_shared runtime call" ) ? void (0) : __assert_fail ("(It->getSecond() == OMPRTL___kmpc_alloc_shared || It->getSecond() == OMPRTL___kmpc_free_shared) && \"Expected a __kmpc_alloc_shared or __kmpc_free_shared runtime call\"" , "llvm/lib/Transforms/IPO/OpenMPOpt.cpp", 4063, __extension__ __PRETTY_FUNCTION__)) | ||||
4062 | It->getSecond() == OMPRTL___kmpc_free_shared) &&(static_cast <bool> ((It->getSecond() == OMPRTL___kmpc_alloc_shared || It->getSecond() == OMPRTL___kmpc_free_shared) && "Expected a __kmpc_alloc_shared or __kmpc_free_shared runtime call" ) ? void (0) : __assert_fail ("(It->getSecond() == OMPRTL___kmpc_alloc_shared || It->getSecond() == OMPRTL___kmpc_free_shared) && \"Expected a __kmpc_alloc_shared or __kmpc_free_shared runtime call\"" , "llvm/lib/Transforms/IPO/OpenMPOpt.cpp", 4063, __extension__ __PRETTY_FUNCTION__)) | ||||
4063 | "Expected a __kmpc_alloc_shared or __kmpc_free_shared runtime call")(static_cast <bool> ((It->getSecond() == OMPRTL___kmpc_alloc_shared || It->getSecond() == OMPRTL___kmpc_free_shared) && "Expected a __kmpc_alloc_shared or __kmpc_free_shared runtime call" ) ? void (0) : __assert_fail ("(It->getSecond() == OMPRTL___kmpc_alloc_shared || It->getSecond() == OMPRTL___kmpc_free_shared) && \"Expected a __kmpc_alloc_shared or __kmpc_free_shared runtime call\"" , "llvm/lib/Transforms/IPO/OpenMPOpt.cpp", 4063, __extension__ __PRETTY_FUNCTION__)); | ||||
4064 | |||||
4065 | CallBase &CB = cast<CallBase>(getAssociatedValue()); | ||||
4066 | |||||
4067 | auto &HeapToStackAA = A.getAAFor<AAHeapToStack>( | ||||
4068 | *this, IRPosition::function(*CB.getCaller()), DepClassTy::OPTIONAL); | ||||
4069 | auto &HeapToSharedAA = A.getAAFor<AAHeapToShared>( | ||||
4070 | *this, IRPosition::function(*CB.getCaller()), DepClassTy::OPTIONAL); | ||||
4071 | |||||
4072 | RuntimeFunction RF = It->getSecond(); | ||||
4073 | |||||
4074 | switch (RF) { | ||||
4075 | // If neither HeapToStack nor HeapToShared assume the call is removed, | ||||
4076 | // assume SPMD incompatibility. | ||||
4077 | case OMPRTL___kmpc_alloc_shared: | ||||
4078 | if (!HeapToStackAA.isAssumedHeapToStack(CB) && | ||||
4079 | !HeapToSharedAA.isAssumedHeapToShared(CB)) | ||||
4080 | SPMDCompatibilityTracker.insert(&CB); | ||||
4081 | break; | ||||
4082 | case OMPRTL___kmpc_free_shared: | ||||
4083 | if (!HeapToStackAA.isAssumedHeapToStackRemovedFree(CB) && | ||||
4084 | !HeapToSharedAA.isAssumedHeapToSharedRemovedFree(CB)) | ||||
4085 | SPMDCompatibilityTracker.insert(&CB); | ||||
4086 | break; | ||||
4087 | default: | ||||
4088 | SPMDCompatibilityTracker.indicatePessimisticFixpoint(); | ||||
4089 | SPMDCompatibilityTracker.insert(&CB); | ||||
4090 | } | ||||
4091 | |||||
4092 | return StateBefore == getState() ? ChangeStatus::UNCHANGED | ||||
4093 | : ChangeStatus::CHANGED; | ||||
4094 | } | ||||
4095 | }; | ||||
4096 | |||||
4097 | struct AAFoldRuntimeCall | ||||
4098 | : public StateWrapper<BooleanState, AbstractAttribute> { | ||||
4099 | using Base = StateWrapper<BooleanState, AbstractAttribute>; | ||||
4100 | |||||
4101 | AAFoldRuntimeCall(const IRPosition &IRP, Attributor &A) : Base(IRP) {} | ||||
4102 | |||||
4103 | /// Statistics are tracked as part of manifest for now. | ||||
4104 | void trackStatistics() const override {} | ||||
4105 | |||||
4106 | /// Create an abstract attribute biew for the position \p IRP. | ||||
4107 | static AAFoldRuntimeCall &createForPosition(const IRPosition &IRP, | ||||
4108 | Attributor &A); | ||||
4109 | |||||
4110 | /// See AbstractAttribute::getName() | ||||
4111 | const std::string getName() const override { return "AAFoldRuntimeCall"; } | ||||
4112 | |||||
4113 | /// See AbstractAttribute::getIdAddr() | ||||
4114 | const char *getIdAddr() const override { return &ID; } | ||||
4115 | |||||
4116 | /// This function should return true if the type of the \p AA is | ||||
4117 | /// AAFoldRuntimeCall | ||||
4118 | static bool classof(const AbstractAttribute *AA) { | ||||
4119 | return (AA->getIdAddr() == &ID); | ||||
4120 | } | ||||
4121 | |||||
4122 | static const char ID; | ||||
4123 | }; | ||||
4124 | |||||
4125 | struct AAFoldRuntimeCallCallSiteReturned : AAFoldRuntimeCall { | ||||
4126 | AAFoldRuntimeCallCallSiteReturned(const IRPosition &IRP, Attributor &A) | ||||
4127 | : AAFoldRuntimeCall(IRP, A) {} | ||||
4128 | |||||
4129 | /// See AbstractAttribute::getAsStr() | ||||
4130 | const std::string getAsStr() const override { | ||||
4131 | if (!isValidState()) | ||||
4132 | return "<invalid>"; | ||||
4133 | |||||
4134 | std::string Str("simplified value: "); | ||||
4135 | |||||
4136 | if (!SimplifiedValue.hasValue()) | ||||
4137 | return Str + std::string("none"); | ||||
4138 | |||||
4139 | if (!SimplifiedValue.getValue()) | ||||
4140 | return Str + std::string("nullptr"); | ||||
4141 | |||||
4142 | if (ConstantInt *CI = dyn_cast<ConstantInt>(SimplifiedValue.getValue())) | ||||
4143 | return Str + std::to_string(CI->getSExtValue()); | ||||
4144 | |||||
4145 | return Str + std::string("unknown"); | ||||
4146 | } | ||||
4147 | |||||
4148 | void initialize(Attributor &A) override { | ||||
4149 | if (DisableOpenMPOptFolding) | ||||
4150 | indicatePessimisticFixpoint(); | ||||
4151 | |||||
4152 | Function *Callee = getAssociatedFunction(); | ||||
4153 | |||||
4154 | auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); | ||||
4155 | const auto &It = OMPInfoCache.RuntimeFunctionIDMap.find(Callee); | ||||
4156 | assert(It != OMPInfoCache.RuntimeFunctionIDMap.end() &&(static_cast <bool> (It != OMPInfoCache.RuntimeFunctionIDMap .end() && "Expected a known OpenMP runtime function") ? void (0) : __assert_fail ("It != OMPInfoCache.RuntimeFunctionIDMap.end() && \"Expected a known OpenMP runtime function\"" , "llvm/lib/Transforms/IPO/OpenMPOpt.cpp", 4157, __extension__ __PRETTY_FUNCTION__)) | ||||
4157 | "Expected a known OpenMP runtime function")(static_cast <bool> (It != OMPInfoCache.RuntimeFunctionIDMap .end() && "Expected a known OpenMP runtime function") ? void (0) : __assert_fail ("It != OMPInfoCache.RuntimeFunctionIDMap.end() && \"Expected a known OpenMP runtime function\"" , "llvm/lib/Transforms/IPO/OpenMPOpt.cpp", 4157, __extension__ __PRETTY_FUNCTION__)); | ||||
4158 | |||||
4159 | RFKind = It->getSecond(); | ||||
4160 | |||||
4161 | CallBase &CB = cast<CallBase>(getAssociatedValue()); | ||||
4162 | A.registerSimplificationCallback( | ||||
4163 | IRPosition::callsite_returned(CB), | ||||
4164 | [&](const IRPosition &IRP, const AbstractAttribute *AA, | ||||
4165 | bool &UsedAssumedInformation) -> Optional<Value *> { | ||||
4166 | assert((isValidState() || (SimplifiedValue.hasValue() &&(static_cast <bool> ((isValidState() || (SimplifiedValue .hasValue() && SimplifiedValue.getValue() == nullptr) ) && "Unexpected invalid state!") ? void (0) : __assert_fail ("(isValidState() || (SimplifiedValue.hasValue() && SimplifiedValue.getValue() == nullptr)) && \"Unexpected invalid state!\"" , "llvm/lib/Transforms/IPO/OpenMPOpt.cpp", 4168, __extension__ __PRETTY_FUNCTION__)) | ||||
4167 | SimplifiedValue.getValue() == nullptr)) &&(static_cast <bool> ((isValidState() || (SimplifiedValue .hasValue() && SimplifiedValue.getValue() == nullptr) ) && "Unexpected invalid state!") ? void (0) : __assert_fail ("(isValidState() || (SimplifiedValue.hasValue() && SimplifiedValue.getValue() == nullptr)) && \"Unexpected invalid state!\"" , "llvm/lib/Transforms/IPO/OpenMPOpt.cpp", 4168, __extension__ __PRETTY_FUNCTION__)) | ||||
4168 | "Unexpected invalid state!")(static_cast <bool> ((isValidState() || (SimplifiedValue .hasValue() && SimplifiedValue.getValue() == nullptr) ) && "Unexpected invalid state!") ? void (0) : __assert_fail ("(isValidState() || (SimplifiedValue.hasValue() && SimplifiedValue.getValue() == nullptr)) && \"Unexpected invalid state!\"" , "llvm/lib/Transforms/IPO/OpenMPOpt.cpp", 4168, __extension__ __PRETTY_FUNCTION__)); | ||||
4169 | |||||
4170 | if (!isAtFixpoint()) { | ||||
4171 | UsedAssumedInformation = true; | ||||
4172 | if (AA) | ||||
4173 | A.recordDependence(*this, *AA, DepClassTy::OPTIONAL); | ||||
4174 | } | ||||
4175 | return SimplifiedValue; | ||||
4176 | }); | ||||
4177 | } | ||||
4178 | |||||
4179 | ChangeStatus updateImpl(Attributor &A) override { | ||||
4180 | ChangeStatus Changed = ChangeStatus::UNCHANGED; | ||||
4181 | switch (RFKind) { | ||||
4182 | case OMPRTL___kmpc_is_spmd_exec_mode: | ||||
4183 | Changed |= foldIsSPMDExecMode(A); | ||||
4184 | break; | ||||
4185 | case OMPRTL___kmpc_is_generic_main_thread_id: | ||||
4186 | Changed |= foldIsGenericMainThread(A); | ||||
4187 | break; | ||||
4188 | case OMPRTL___kmpc_parallel_level: | ||||
4189 | Changed |= foldParallelLevel(A); | ||||
4190 | break; | ||||
4191 | case OMPRTL___kmpc_get_hardware_num_threads_in_block: | ||||
4192 | Changed = Changed | foldKernelFnAttribute(A, "omp_target_thread_limit"); | ||||
4193 | break; | ||||
4194 | case OMPRTL___kmpc_get_hardware_num_blocks: | ||||
4195 | Changed = Changed | foldKernelFnAttribute(A, "omp_target_num_teams"); | ||||
4196 | break; | ||||
4197 | default: | ||||
4198 | llvm_unreachable("Unhandled OpenMP runtime function!")::llvm::llvm_unreachable_internal("Unhandled OpenMP runtime function!" , "llvm/lib/Transforms/IPO/OpenMPOpt.cpp", 4198); | ||||
4199 | } | ||||
4200 | |||||
4201 | return Changed; | ||||
4202 | } | ||||
4203 | |||||
4204 | ChangeStatus manifest(Attributor &A) override { | ||||
4205 | ChangeStatus Changed = ChangeStatus::UNCHANGED; | ||||
4206 | |||||
4207 | if (SimplifiedValue.hasValue() && SimplifiedValue.getValue()) { | ||||
4208 | Instruction &I = *getCtxI(); | ||||
4209 | A.changeValueAfterManifest(I, **SimplifiedValue); | ||||
4210 | A.deleteAfterManifest(I); | ||||
4211 | |||||
4212 | CallBase *CB = dyn_cast<CallBase>(&I); | ||||
4213 | auto Remark = [&](OptimizationRemark OR) { | ||||
4214 | if (auto *C = dyn_cast<ConstantInt>(*SimplifiedValue)) | ||||
4215 | return OR << "Replacing OpenMP runtime call " | ||||
4216 | << CB->getCalledFunction()->getName() << " with " | ||||
4217 | << ore::NV("FoldedValue", C->getZExtValue()) << "."; | ||||
4218 | return OR << "Replacing OpenMP runtime call " | ||||
4219 | << CB->getCalledFunction()->getName() << "."; | ||||
4220 | }; | ||||
4221 | |||||
4222 | if (CB && EnableVerboseRemarks) | ||||
4223 | A.emitRemark<OptimizationRemark>(CB, "OMP180", Remark); | ||||
4224 | |||||
4225 | LLVM_DEBUG(dbgs() << TAG << "Replacing runtime call: " << I << " with "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { dbgs() << TAG << "Replacing runtime call: " << I << " with " << **SimplifiedValue << "\n"; } } while (false) | ||||
4226 | << **SimplifiedValue << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { dbgs() << TAG << "Replacing runtime call: " << I << " with " << **SimplifiedValue << "\n"; } } while (false); | ||||
4227 | |||||
4228 | Changed = ChangeStatus::CHANGED; | ||||
4229 | } | ||||
4230 | |||||
4231 | return Changed; | ||||
4232 | } | ||||
4233 | |||||
4234 | ChangeStatus indicatePessimisticFixpoint() override { | ||||
4235 | SimplifiedValue = nullptr; | ||||
4236 | return AAFoldRuntimeCall::indicatePessimisticFixpoint(); | ||||
4237 | } | ||||
4238 | |||||
4239 | private: | ||||
4240 | /// Fold __kmpc_is_spmd_exec_mode into a constant if possible. | ||||
4241 | ChangeStatus foldIsSPMDExecMode(Attributor &A) { | ||||
4242 | Optional<Value *> SimplifiedValueBefore = SimplifiedValue; | ||||
4243 | |||||
4244 | unsigned AssumedSPMDCount = 0, KnownSPMDCount = 0; | ||||
4245 | unsigned AssumedNonSPMDCount = 0, KnownNonSPMDCount = 0; | ||||
4246 | auto &CallerKernelInfoAA = A.getAAFor<AAKernelInfo>( | ||||
4247 | *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED); | ||||
4248 | |||||
4249 | if (!CallerKernelInfoAA.ReachingKernelEntries.isValidState()) | ||||
4250 | return indicatePessimisticFixpoint(); | ||||
4251 | |||||
4252 | for (Kernel K : CallerKernelInfoAA.ReachingKernelEntries) { | ||||
4253 | auto &AA = A.getAAFor<AAKernelInfo>(*this, IRPosition::function(*K), | ||||
4254 | DepClassTy::REQUIRED); | ||||
4255 | |||||
4256 | if (!AA.isValidState()) { | ||||
4257 | SimplifiedValue = nullptr; | ||||
4258 | return indicatePessimisticFixpoint(); | ||||
4259 | } | ||||
4260 | |||||
4261 | if (AA.SPMDCompatibilityTracker.isAssumed()) { | ||||
4262 | if (AA.SPMDCompatibilityTracker.isAtFixpoint()) | ||||
4263 | ++KnownSPMDCount; | ||||
4264 | else | ||||
4265 | ++AssumedSPMDCount; | ||||
4266 | } else { | ||||
4267 | if (AA.SPMDCompatibilityTracker.isAtFixpoint()) | ||||
4268 | ++KnownNonSPMDCount; | ||||
4269 | else | ||||
4270 | ++AssumedNonSPMDCount; | ||||
4271 | } | ||||
4272 | } | ||||
4273 | |||||
4274 | if ((AssumedSPMDCount + KnownSPMDCount) && | ||||
4275 | (AssumedNonSPMDCount + KnownNonSPMDCount)) | ||||
4276 | return indicatePessimisticFixpoint(); | ||||
4277 | |||||
4278 | auto &Ctx = getAnchorValue().getContext(); | ||||
4279 | if (KnownSPMDCount || AssumedSPMDCount) { | ||||
4280 | assert(KnownNonSPMDCount == 0 && AssumedNonSPMDCount == 0 &&(static_cast <bool> (KnownNonSPMDCount == 0 && AssumedNonSPMDCount == 0 && "Expected only SPMD kernels!") ? void (0) : __assert_fail ("KnownNonSPMDCount == 0 && AssumedNonSPMDCount == 0 && \"Expected only SPMD kernels!\"" , "llvm/lib/Transforms/IPO/OpenMPOpt.cpp", 4281, __extension__ __PRETTY_FUNCTION__)) | ||||
4281 | "Expected only SPMD kernels!")(static_cast <bool> (KnownNonSPMDCount == 0 && AssumedNonSPMDCount == 0 && "Expected only SPMD kernels!") ? void (0) : __assert_fail ("KnownNonSPMDCount == 0 && AssumedNonSPMDCount == 0 && \"Expected only SPMD kernels!\"" , "llvm/lib/Transforms/IPO/OpenMPOpt.cpp", 4281, __extension__ __PRETTY_FUNCTION__)); | ||||
4282 | // All reaching kernels are in SPMD mode. Update all function calls to | ||||
4283 | // __kmpc_is_spmd_exec_mode to 1. | ||||
4284 | SimplifiedValue = ConstantInt::get(Type::getInt8Ty(Ctx), true); | ||||
4285 | } else if (KnownNonSPMDCount || AssumedNonSPMDCount) { | ||||
4286 | assert(KnownSPMDCount == 0 && AssumedSPMDCount == 0 &&(static_cast <bool> (KnownSPMDCount == 0 && AssumedSPMDCount == 0 && "Expected only non-SPMD kernels!") ? void (0 ) : __assert_fail ("KnownSPMDCount == 0 && AssumedSPMDCount == 0 && \"Expected only non-SPMD kernels!\"" , "llvm/lib/Transforms/IPO/OpenMPOpt.cpp", 4287, __extension__ __PRETTY_FUNCTION__)) | ||||
4287 | "Expected only non-SPMD kernels!")(static_cast <bool> (KnownSPMDCount == 0 && AssumedSPMDCount == 0 && "Expected only non-SPMD kernels!") ? void (0 ) : __assert_fail ("KnownSPMDCount == 0 && AssumedSPMDCount == 0 && \"Expected only non-SPMD kernels!\"" , "llvm/lib/Transforms/IPO/OpenMPOpt.cpp", 4287, __extension__ __PRETTY_FUNCTION__)); | ||||
4288 | // All reaching kernels are in non-SPMD mode. Update all function | ||||
4289 | // calls to __kmpc_is_spmd_exec_mode to 0. | ||||
4290 | SimplifiedValue = ConstantInt::get(Type::getInt8Ty(Ctx), false); | ||||
4291 | } else { | ||||
4292 | // We have empty reaching kernels, therefore we cannot tell if the | ||||
4293 | // associated call site can be folded. At this moment, SimplifiedValue | ||||
4294 | // must be none. | ||||
4295 | assert(!SimplifiedValue.hasValue() && "SimplifiedValue should be none")(static_cast <bool> (!SimplifiedValue.hasValue() && "SimplifiedValue should be none") ? void (0) : __assert_fail ("!SimplifiedValue.hasValue() && \"SimplifiedValue should be none\"" , "llvm/lib/Transforms/IPO/OpenMPOpt.cpp", 4295, __extension__ __PRETTY_FUNCTION__)); | ||||
4296 | } | ||||
4297 | |||||
4298 | return SimplifiedValue == SimplifiedValueBefore ? ChangeStatus::UNCHANGED | ||||
4299 | : ChangeStatus::CHANGED; | ||||
4300 | } | ||||
4301 | |||||
4302 | /// Fold __kmpc_is_generic_main_thread_id into a constant if possible. | ||||
4303 | ChangeStatus foldIsGenericMainThread(Attributor &A) { | ||||
4304 | Optional<Value *> SimplifiedValueBefore = SimplifiedValue; | ||||
4305 | |||||
4306 | CallBase &CB = cast<CallBase>(getAssociatedValue()); | ||||
4307 | Function *F = CB.getFunction(); | ||||
4308 | const auto &ExecutionDomainAA = A.getAAFor<AAExecutionDomain>( | ||||
4309 | *this, IRPosition::function(*F), DepClassTy::REQUIRED); | ||||
4310 | |||||
4311 | if (!ExecutionDomainAA.isValidState()) | ||||
4312 | return indicatePessimisticFixpoint(); | ||||
4313 | |||||
4314 | auto &Ctx = getAnchorValue().getContext(); | ||||
4315 | if (ExecutionDomainAA.isExecutedByInitialThreadOnly(CB)) | ||||
4316 | SimplifiedValue = ConstantInt::get(Type::getInt8Ty(Ctx), true); | ||||
4317 | else | ||||
4318 | return indicatePessimisticFixpoint(); | ||||
4319 | |||||
4320 | return SimplifiedValue == SimplifiedValueBefore ? ChangeStatus::UNCHANGED | ||||
4321 | : ChangeStatus::CHANGED; | ||||
4322 | } | ||||
4323 | |||||
4324 | /// Fold __kmpc_parallel_level into a constant if possible. | ||||
4325 | ChangeStatus foldParallelLevel(Attributor &A) { | ||||
4326 | Optional<Value *> SimplifiedValueBefore = SimplifiedValue; | ||||
4327 | |||||
4328 | auto &CallerKernelInfoAA = A.getAAFor<AAKernelInfo>( | ||||
4329 | *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED); | ||||
4330 | |||||
4331 | if (!CallerKernelInfoAA.ParallelLevels.isValidState()) | ||||
4332 | return indicatePessimisticFixpoint(); | ||||
4333 | |||||
4334 | if (!CallerKernelInfoAA.ReachingKernelEntries.isValidState()) | ||||
4335 | return indicatePessimisticFixpoint(); | ||||
4336 | |||||
4337 | if (CallerKernelInfoAA.ReachingKernelEntries.empty()) { | ||||
4338 | assert(!SimplifiedValue.hasValue() &&(static_cast <bool> (!SimplifiedValue.hasValue() && "SimplifiedValue should keep none at this point") ? void (0) : __assert_fail ("!SimplifiedValue.hasValue() && \"SimplifiedValue should keep none at this point\"" , "llvm/lib/Transforms/IPO/OpenMPOpt.cpp", 4339, __extension__ __PRETTY_FUNCTION__)) | ||||
4339 | "SimplifiedValue should keep none at this point")(static_cast <bool> (!SimplifiedValue.hasValue() && "SimplifiedValue should keep none at this point") ? void (0) : __assert_fail ("!SimplifiedValue.hasValue() && \"SimplifiedValue should keep none at this point\"" , "llvm/lib/Transforms/IPO/OpenMPOpt.cpp", 4339, __extension__ __PRETTY_FUNCTION__)); | ||||
4340 | return ChangeStatus::UNCHANGED; | ||||
4341 | } | ||||
4342 | |||||
4343 | unsigned AssumedSPMDCount = 0, KnownSPMDCount = 0; | ||||
4344 | unsigned AssumedNonSPMDCount = 0, KnownNonSPMDCount = 0; | ||||
4345 | for (Kernel K : CallerKernelInfoAA.ReachingKernelEntries) { | ||||
4346 | auto &AA = A.getAAFor<AAKernelInfo>(*this, IRPosition::function(*K), | ||||
4347 | DepClassTy::REQUIRED); | ||||
4348 | if (!AA.SPMDCompatibilityTracker.isValidState()) | ||||
4349 | return indicatePessimisticFixpoint(); | ||||
4350 | |||||
4351 | if (AA.SPMDCompatibilityTracker.isAssumed()) { | ||||
4352 | if (AA.SPMDCompatibilityTracker.isAtFixpoint()) | ||||
4353 | ++KnownSPMDCount; | ||||
4354 | else | ||||
4355 | ++AssumedSPMDCount; | ||||
4356 | } else { | ||||
4357 | if (AA.SPMDCompatibilityTracker.isAtFixpoint()) | ||||
4358 | ++KnownNonSPMDCount; | ||||
4359 | else | ||||
4360 | ++AssumedNonSPMDCount; | ||||
4361 | } | ||||
4362 | } | ||||
4363 | |||||
4364 | if ((AssumedSPMDCount + KnownSPMDCount) && | ||||
4365 | (AssumedNonSPMDCount + KnownNonSPMDCount)) | ||||
4366 | return indicatePessimisticFixpoint(); | ||||
4367 | |||||
4368 | auto &Ctx = getAnchorValue().getContext(); | ||||
4369 | // If the caller can only be reached by SPMD kernel entries, the parallel | ||||
4370 | // level is 1. Similarly, if the caller can only be reached by non-SPMD | ||||
4371 | // kernel entries, it is 0. | ||||
4372 | if (AssumedSPMDCount || KnownSPMDCount) { | ||||
4373 | assert(KnownNonSPMDCount == 0 && AssumedNonSPMDCount == 0 &&(static_cast <bool> (KnownNonSPMDCount == 0 && AssumedNonSPMDCount == 0 && "Expected only SPMD kernels!") ? void (0) : __assert_fail ("KnownNonSPMDCount == 0 && AssumedNonSPMDCount == 0 && \"Expected only SPMD kernels!\"" , "llvm/lib/Transforms/IPO/OpenMPOpt.cpp", 4374, __extension__ __PRETTY_FUNCTION__)) | ||||
4374 | "Expected only SPMD kernels!")(static_cast <bool> (KnownNonSPMDCount == 0 && AssumedNonSPMDCount == 0 && "Expected only SPMD kernels!") ? void (0) : __assert_fail ("KnownNonSPMDCount == 0 && AssumedNonSPMDCount == 0 && \"Expected only SPMD kernels!\"" , "llvm/lib/Transforms/IPO/OpenMPOpt.cpp", 4374, __extension__ __PRETTY_FUNCTION__)); | ||||
4375 | SimplifiedValue = ConstantInt::get(Type::getInt8Ty(Ctx), 1); | ||||
4376 | } else { | ||||
4377 | assert(KnownSPMDCount == 0 && AssumedSPMDCount == 0 &&(static_cast <bool> (KnownSPMDCount == 0 && AssumedSPMDCount == 0 && "Expected only non-SPMD kernels!") ? void (0 ) : __assert_fail ("KnownSPMDCount == 0 && AssumedSPMDCount == 0 && \"Expected only non-SPMD kernels!\"" , "llvm/lib/Transforms/IPO/OpenMPOpt.cpp", 4378, __extension__ __PRETTY_FUNCTION__)) | ||||
4378 | "Expected only non-SPMD kernels!")(static_cast <bool> (KnownSPMDCount == 0 && AssumedSPMDCount == 0 && "Expected only non-SPMD kernels!") ? void (0 ) : __assert_fail ("KnownSPMDCount == 0 && AssumedSPMDCount == 0 && \"Expected only non-SPMD kernels!\"" , "llvm/lib/Transforms/IPO/OpenMPOpt.cpp", 4378, __extension__ __PRETTY_FUNCTION__)); | ||||
4379 | SimplifiedValue = ConstantInt::get(Type::getInt8Ty(Ctx), 0); | ||||
4380 | } | ||||
4381 | return SimplifiedValue == SimplifiedValueBefore ? ChangeStatus::UNCHANGED | ||||
4382 | : ChangeStatus::CHANGED; | ||||
4383 | } | ||||
4384 | |||||
4385 | ChangeStatus foldKernelFnAttribute(Attributor &A, llvm::StringRef Attr) { | ||||
4386 | // Specialize only if all the calls agree with the attribute constant value | ||||
4387 | int32_t CurrentAttrValue = -1; | ||||
4388 | Optional<Value *> SimplifiedValueBefore = SimplifiedValue; | ||||
4389 | |||||
4390 | auto &CallerKernelInfoAA = A.getAAFor<AAKernelInfo>( | ||||
4391 | *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED); | ||||
4392 | |||||
4393 | if (!CallerKernelInfoAA.ReachingKernelEntries.isValidState()) | ||||
4394 | return indicatePessimisticFixpoint(); | ||||
4395 | |||||
4396 | // Iterate over the kernels that reach this function | ||||
4397 | for (Kernel K : CallerKernelInfoAA.ReachingKernelEntries) { | ||||
4398 | int32_t NextAttrVal = -1; | ||||
4399 | if (K->hasFnAttribute(Attr)) | ||||
4400 | NextAttrVal = | ||||
4401 | std::stoi(K->getFnAttribute(Attr).getValueAsString().str()); | ||||
4402 | |||||
4403 | if (NextAttrVal == -1 || | ||||
4404 | (CurrentAttrValue != -1 && CurrentAttrValue != NextAttrVal)) | ||||
4405 | return indicatePessimisticFixpoint(); | ||||
4406 | CurrentAttrValue = NextAttrVal; | ||||
4407 | } | ||||
4408 | |||||
4409 | if (CurrentAttrValue != -1) { | ||||
4410 | auto &Ctx = getAnchorValue().getContext(); | ||||
4411 | SimplifiedValue = | ||||
4412 | ConstantInt::get(Type::getInt32Ty(Ctx), CurrentAttrValue); | ||||
4413 | } | ||||
4414 | return SimplifiedValue == SimplifiedValueBefore ? ChangeStatus::UNCHANGED | ||||
4415 | : ChangeStatus::CHANGED; | ||||
4416 | } | ||||
4417 | |||||
4418 | /// An optional value the associated value is assumed to fold to. That is, we | ||||
4419 | /// assume the associated value (which is a call) can be replaced by this | ||||
4420 | /// simplified value. | ||||
4421 | Optional<Value *> SimplifiedValue; | ||||
4422 | |||||
4423 | /// The runtime function kind of the callee of the associated call site. | ||||
4424 | RuntimeFunction RFKind; | ||||
4425 | }; | ||||
4426 | |||||
4427 | } // namespace | ||||
4428 | |||||
4429 | /// Register folding callsite | ||||
4430 | void OpenMPOpt::registerFoldRuntimeCall(RuntimeFunction RF) { | ||||
4431 | auto &RFI = OMPInfoCache.RFIs[RF]; | ||||
4432 | RFI.foreachUse(SCC, [&](Use &U, Function &F) { | ||||
4433 | CallInst *CI = OpenMPOpt::getCallIfRegularCall(U, &RFI); | ||||
4434 | if (!CI) | ||||
4435 | return false; | ||||
4436 | A.getOrCreateAAFor<AAFoldRuntimeCall>( | ||||
4437 | IRPosition::callsite_returned(*CI), /* QueryingAA */ nullptr, | ||||
4438 | DepClassTy::NONE, /* ForceUpdate */ false, | ||||
4439 | /* UpdateAfterInit */ false); | ||||
4440 | return false; | ||||
4441 | }); | ||||
4442 | } | ||||
4443 | |||||
4444 | void OpenMPOpt::registerAAs(bool IsModulePass) { | ||||
4445 | if (SCC.empty()) | ||||
4446 | |||||
4447 | return; | ||||
4448 | if (IsModulePass) { | ||||
4449 | // Ensure we create the AAKernelInfo AAs first and without triggering an | ||||
4450 | // update. This will make sure we register all value simplification | ||||
4451 | // callbacks before any other AA has the chance to create an AAValueSimplify | ||||
4452 | // or similar. | ||||
4453 | for (Function *Kernel : OMPInfoCache.Kernels) | ||||
4454 | A.getOrCreateAAFor<AAKernelInfo>( | ||||
4455 | IRPosition::function(*Kernel), /* QueryingAA */ nullptr, | ||||
4456 | DepClassTy::NONE, /* ForceUpdate */ false, | ||||
4457 | /* UpdateAfterInit */ false); | ||||
4458 | |||||
4459 | registerFoldRuntimeCall(OMPRTL___kmpc_is_generic_main_thread_id); | ||||
4460 | registerFoldRuntimeCall(OMPRTL___kmpc_is_spmd_exec_mode); | ||||
4461 | registerFoldRuntimeCall(OMPRTL___kmpc_parallel_level); | ||||
4462 | registerFoldRuntimeCall(OMPRTL___kmpc_get_hardware_num_threads_in_block); | ||||
4463 | registerFoldRuntimeCall(OMPRTL___kmpc_get_hardware_num_blocks); | ||||
4464 | } | ||||
4465 | |||||
4466 | // Create CallSite AA for all Getters. | ||||
4467 | for (int Idx = 0; Idx < OMPInfoCache.ICVs.size() - 1; ++Idx) { | ||||
4468 | auto ICVInfo = OMPInfoCache.ICVs[static_cast<InternalControlVar>(Idx)]; | ||||
4469 | |||||
4470 | auto &GetterRFI = OMPInfoCache.RFIs[ICVInfo.Getter]; | ||||
4471 | |||||
4472 | auto CreateAA = [&](Use &U, Function &Caller) { | ||||
4473 | CallInst *CI = OpenMPOpt::getCallIfRegularCall(U, &GetterRFI); | ||||
4474 | if (!CI) | ||||
4475 | return false; | ||||
4476 | |||||
4477 | auto &CB = cast<CallBase>(*CI); | ||||
4478 | |||||
4479 | IRPosition CBPos = IRPosition::callsite_function(CB); | ||||
4480 | A.getOrCreateAAFor<AAICVTracker>(CBPos); | ||||
4481 | return false; | ||||
4482 | }; | ||||
4483 | |||||
4484 | GetterRFI.foreachUse(SCC, CreateAA); | ||||
4485 | } | ||||
4486 | auto &GlobalizationRFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared]; | ||||
4487 | auto CreateAA = [&](Use &U, Function &F) { | ||||
4488 | A.getOrCreateAAFor<AAHeapToShared>(IRPosition::function(F)); | ||||
4489 | return false; | ||||
4490 | }; | ||||
4491 | if (!DisableOpenMPOptDeglobalization) | ||||
4492 | GlobalizationRFI.foreachUse(SCC, CreateAA); | ||||
4493 | |||||
4494 | // Create an ExecutionDomain AA for every function and a HeapToStack AA for | ||||
4495 | // every function if there is a device kernel. | ||||
4496 | if (!isOpenMPDevice(M)) | ||||
4497 | return; | ||||
4498 | |||||
4499 | for (auto *F : SCC) { | ||||
4500 | if (F->isDeclaration()) | ||||
4501 | continue; | ||||
4502 | |||||
4503 | A.getOrCreateAAFor<AAExecutionDomain>(IRPosition::function(*F)); | ||||
4504 | if (!DisableOpenMPOptDeglobalization) | ||||
4505 | A.getOrCreateAAFor<AAHeapToStack>(IRPosition::function(*F)); | ||||
4506 | |||||
4507 | for (auto &I : instructions(*F)) { | ||||
4508 | if (auto *LI = dyn_cast<LoadInst>(&I)) { | ||||
4509 | bool UsedAssumedInformation = false; | ||||
4510 | A.getAssumedSimplified(IRPosition::value(*LI), /* AA */ nullptr, | ||||
4511 | UsedAssumedInformation); | ||||
4512 | } else if (auto *SI = dyn_cast<StoreInst>(&I)) { | ||||
4513 | A.getOrCreateAAFor<AAIsDead>(IRPosition::value(*SI)); | ||||
4514 | } | ||||
4515 | } | ||||
4516 | } | ||||
4517 | } | ||||
4518 | |||||
4519 | const char AAICVTracker::ID = 0; | ||||
4520 | const char AAKernelInfo::ID = 0; | ||||
4521 | const char AAExecutionDomain::ID = 0; | ||||
4522 | const char AAHeapToShared::ID = 0; | ||||
4523 | const char AAFoldRuntimeCall::ID = 0; | ||||
4524 | |||||
4525 | AAICVTracker &AAICVTracker::createForPosition(const IRPosition &IRP, | ||||
4526 | Attributor &A) { | ||||
4527 | AAICVTracker *AA = nullptr; | ||||
4528 | switch (IRP.getPositionKind()) { | ||||
4529 | case IRPosition::IRP_INVALID: | ||||
4530 | case IRPosition::IRP_FLOAT: | ||||
4531 | case IRPosition::IRP_ARGUMENT: | ||||
4532 | case IRPosition::IRP_CALL_SITE_ARGUMENT: | ||||
4533 | llvm_unreachable("ICVTracker can only be created for function position!")::llvm::llvm_unreachable_internal("ICVTracker can only be created for function position!" , "llvm/lib/Transforms/IPO/OpenMPOpt.cpp", 4533); | ||||
4534 | case IRPosition::IRP_RETURNED: | ||||
4535 | AA = new (A.Allocator) AAICVTrackerFunctionReturned(IRP, A); | ||||
4536 | break; | ||||
4537 | case IRPosition::IRP_CALL_SITE_RETURNED: | ||||
4538 | AA = new (A.Allocator) AAICVTrackerCallSiteReturned(IRP, A); | ||||
4539 | break; | ||||
4540 | case IRPosition::IRP_CALL_SITE: | ||||
4541 | AA = new (A.Allocator) AAICVTrackerCallSite(IRP, A); | ||||
4542 | break; | ||||
4543 | case IRPosition::IRP_FUNCTION: | ||||
4544 | AA = new (A.Allocator) AAICVTrackerFunction(IRP, A); | ||||
4545 | break; | ||||
4546 | } | ||||
4547 | |||||
4548 | return *AA; | ||||
4549 | } | ||||
4550 | |||||
4551 | AAExecutionDomain &AAExecutionDomain::createForPosition(const IRPosition &IRP, | ||||
4552 | Attributor &A) { | ||||
4553 | AAExecutionDomainFunction *AA = nullptr; | ||||
4554 | switch (IRP.getPositionKind()) { | ||||
4555 | case IRPosition::IRP_INVALID: | ||||
4556 | case IRPosition::IRP_FLOAT: | ||||
4557 | case IRPosition::IRP_ARGUMENT: | ||||
4558 | case IRPosition::IRP_CALL_SITE_ARGUMENT: | ||||
4559 | case IRPosition::IRP_RETURNED: | ||||
4560 | case IRPosition::IRP_CALL_SITE_RETURNED: | ||||
4561 | case IRPosition::IRP_CALL_SITE: | ||||
4562 | llvm_unreachable(::llvm::llvm_unreachable_internal("AAExecutionDomain can only be created for function position!" , "llvm/lib/Transforms/IPO/OpenMPOpt.cpp", 4563) | ||||
4563 | "AAExecutionDomain can only be created for function position!")::llvm::llvm_unreachable_internal("AAExecutionDomain can only be created for function position!" , "llvm/lib/Transforms/IPO/OpenMPOpt.cpp", 4563); | ||||
4564 | case IRPosition::IRP_FUNCTION: | ||||
4565 | AA = new (A.Allocator) AAExecutionDomainFunction(IRP, A); | ||||
4566 | break; | ||||
4567 | } | ||||
4568 | |||||
4569 | return *AA; | ||||
4570 | } | ||||
4571 | |||||
4572 | AAHeapToShared &AAHeapToShared::createForPosition(const IRPosition &IRP, | ||||
4573 | Attributor &A) { | ||||
4574 | AAHeapToSharedFunction *AA = nullptr; | ||||
4575 | switch (IRP.getPositionKind()) { | ||||
4576 | case IRPosition::IRP_INVALID: | ||||
4577 | case IRPosition::IRP_FLOAT: | ||||
4578 | case IRPosition::IRP_ARGUMENT: | ||||
4579 | case IRPosition::IRP_CALL_SITE_ARGUMENT: | ||||
4580 | case IRPosition::IRP_RETURNED: | ||||
4581 | case IRPosition::IRP_CALL_SITE_RETURNED: | ||||
4582 | case IRPosition::IRP_CALL_SITE: | ||||
4583 | llvm_unreachable(::llvm::llvm_unreachable_internal("AAHeapToShared can only be created for function position!" , "llvm/lib/Transforms/IPO/OpenMPOpt.cpp", 4584) | ||||
4584 | "AAHeapToShared can only be created for function position!")::llvm::llvm_unreachable_internal("AAHeapToShared can only be created for function position!" , "llvm/lib/Transforms/IPO/OpenMPOpt.cpp", 4584); | ||||
4585 | case IRPosition::IRP_FUNCTION: | ||||
4586 | AA = new (A.Allocator) AAHeapToSharedFunction(IRP, A); | ||||
4587 | break; | ||||
4588 | } | ||||
4589 | |||||
4590 | return *AA; | ||||
4591 | } | ||||
4592 | |||||
4593 | AAKernelInfo &AAKernelInfo::createForPosition(const IRPosition &IRP, | ||||
4594 | Attributor &A) { | ||||
4595 | AAKernelInfo *AA = nullptr; | ||||
4596 | switch (IRP.getPositionKind()) { | ||||
4597 | case IRPosition::IRP_INVALID: | ||||
4598 | case IRPosition::IRP_FLOAT: | ||||
4599 | case IRPosition::IRP_ARGUMENT: | ||||
4600 | case IRPosition::IRP_RETURNED: | ||||
4601 | case IRPosition::IRP_CALL_SITE_RETURNED: | ||||
4602 | case IRPosition::IRP_CALL_SITE_ARGUMENT: | ||||
4603 | llvm_unreachable("KernelInfo can only be created for function position!")::llvm::llvm_unreachable_internal("KernelInfo can only be created for function position!" , "llvm/lib/Transforms/IPO/OpenMPOpt.cpp", 4603); | ||||
4604 | case IRPosition::IRP_CALL_SITE: | ||||
4605 | AA = new (A.Allocator) AAKernelInfoCallSite(IRP, A); | ||||
4606 | break; | ||||
4607 | case IRPosition::IRP_FUNCTION: | ||||
4608 | AA = new (A.Allocator) AAKernelInfoFunction(IRP, A); | ||||
4609 | break; | ||||
4610 | } | ||||
4611 | |||||
4612 | return *AA; | ||||
4613 | } | ||||
4614 | |||||
4615 | AAFoldRuntimeCall &AAFoldRuntimeCall::createForPosition(const IRPosition &IRP, | ||||
4616 | Attributor &A) { | ||||
4617 | AAFoldRuntimeCall *AA = nullptr; | ||||
4618 | switch (IRP.getPositionKind()) { | ||||
4619 | case IRPosition::IRP_INVALID: | ||||
4620 | case IRPosition::IRP_FLOAT: | ||||
4621 | case IRPosition::IRP_ARGUMENT: | ||||
4622 | case IRPosition::IRP_RETURNED: | ||||
4623 | case IRPosition::IRP_FUNCTION: | ||||
4624 | case IRPosition::IRP_CALL_SITE: | ||||
4625 | case IRPosition::IRP_CALL_SITE_ARGUMENT: | ||||
4626 | llvm_unreachable("KernelInfo can only be created for call site position!")::llvm::llvm_unreachable_internal("KernelInfo can only be created for call site position!" , "llvm/lib/Transforms/IPO/OpenMPOpt.cpp", 4626); | ||||
4627 | case IRPosition::IRP_CALL_SITE_RETURNED: | ||||
4628 | AA = new (A.Allocator) AAFoldRuntimeCallCallSiteReturned(IRP, A); | ||||
4629 | break; | ||||
4630 | } | ||||
4631 | |||||
4632 | return *AA; | ||||
4633 | } | ||||
4634 | |||||
4635 | PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) { | ||||
4636 | if (!containsOpenMP(M)) | ||||
4637 | return PreservedAnalyses::all(); | ||||
4638 | if (DisableOpenMPOptimizations) | ||||
4639 | return PreservedAnalyses::all(); | ||||
4640 | |||||
4641 | FunctionAnalysisManager &FAM = | ||||
4642 | AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); | ||||
4643 | KernelSet Kernels = getDeviceKernels(M); | ||||
4644 | |||||
4645 | auto IsCalled = [&](Function &F) { | ||||
4646 | if (Kernels.contains(&F)) | ||||
4647 | return true; | ||||
4648 | for (const User *U : F.users()) | ||||
4649 | if (!isa<BlockAddress>(U)) | ||||
4650 | return true; | ||||
4651 | return false; | ||||
4652 | }; | ||||
4653 | |||||
4654 | auto EmitRemark = [&](Function &F) { | ||||
4655 | auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); | ||||
4656 | ORE.emit([&]() { | ||||
4657 | OptimizationRemarkAnalysis ORA(DEBUG_TYPE"openmp-opt", "OMP140", &F); | ||||
4658 | return ORA << "Could not internalize function. " | ||||
4659 | << "Some optimizations may not be possible. [OMP140]"; | ||||
4660 | }); | ||||
4661 | }; | ||||
4662 | |||||
4663 | // Create internal copies of each function if this is a kernel Module. This | ||||
4664 | // allows iterprocedural passes to see every call edge. | ||||
4665 | DenseMap<Function *, Function *> InternalizedMap; | ||||
4666 | if (isOpenMPDevice(M)) { | ||||
4667 | SmallPtrSet<Function *, 16> InternalizeFns; | ||||
4668 | for (Function &F : M) | ||||
4669 | if (!F.isDeclaration() && !Kernels.contains(&F) && IsCalled(F) && | ||||
4670 | !DisableInternalization) { | ||||
4671 | if (Attributor::isInternalizable(F)) { | ||||
4672 | InternalizeFns.insert(&F); | ||||
4673 | } else if (!F.hasLocalLinkage() && !F.hasFnAttribute(Attribute::Cold)) { | ||||
4674 | EmitRemark(F); | ||||
4675 | } | ||||
4676 | } | ||||
4677 | |||||
4678 | Attributor::internalizeFunctions(InternalizeFns, InternalizedMap); | ||||
4679 | } | ||||
4680 | |||||
4681 | // Look at every function in the Module unless it was internalized. | ||||
4682 | SmallVector<Function *, 16> SCC; | ||||
4683 | for (Function &F : M) | ||||
4684 | if (!F.isDeclaration() && !InternalizedMap.lookup(&F)) | ||||
4685 | SCC.push_back(&F); | ||||
4686 | |||||
4687 | if (SCC.empty()) | ||||
4688 | return PreservedAnalyses::all(); | ||||
4689 | |||||
4690 | AnalysisGetter AG(FAM); | ||||
4691 | |||||
4692 | auto OREGetter = [&FAM](Function *F) -> OptimizationRemarkEmitter & { | ||||
4693 | return FAM.getResult<OptimizationRemarkEmitterAnalysis>(*F); | ||||
4694 | }; | ||||
4695 | |||||
4696 | BumpPtrAllocator Allocator; | ||||
4697 | CallGraphUpdater CGUpdater; | ||||
4698 | |||||
4699 | SetVector<Function *> Functions(SCC.begin(), SCC.end()); | ||||
4700 | OMPInformationCache InfoCache(M, AG, Allocator, /*CGSCC*/ Functions, Kernels); | ||||
4701 | |||||
4702 | unsigned MaxFixpointIterations = | ||||
4703 | (isOpenMPDevice(M)) ? SetFixpointIterations : 32; | ||||
4704 | Attributor A(Functions, InfoCache, CGUpdater, nullptr, true, false, | ||||
4705 | MaxFixpointIterations, OREGetter, DEBUG_TYPE"openmp-opt"); | ||||
4706 | |||||
4707 | OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A); | ||||
4708 | bool Changed = OMPOpt.run(true); | ||||
4709 | |||||
4710 | // Optionally inline device functions for potentially better performance. | ||||
4711 | if (AlwaysInlineDeviceFunctions && isOpenMPDevice(M)) | ||||
4712 | for (Function &F : M) | ||||
4713 | if (!F.isDeclaration() && !Kernels.contains(&F) && | ||||
4714 | !F.hasFnAttribute(Attribute::NoInline)) | ||||
4715 | F.addFnAttr(Attribute::AlwaysInline); | ||||
4716 | |||||
4717 | if (PrintModuleAfterOptimizations) | ||||
4718 | LLVM_DEBUG(dbgs() << TAG << "Module after OpenMPOpt Module Pass:\n" << M)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { dbgs() << TAG << "Module after OpenMPOpt Module Pass:\n" << M; } } while (false); | ||||
4719 | |||||
4720 | if (Changed) | ||||
4721 | return PreservedAnalyses::none(); | ||||
4722 | |||||
4723 | return PreservedAnalyses::all(); | ||||
4724 | } | ||||
4725 | |||||
4726 | PreservedAnalyses OpenMPOptCGSCCPass::run(LazyCallGraph::SCC &C, | ||||
4727 | CGSCCAnalysisManager &AM, | ||||
4728 | LazyCallGraph &CG, | ||||
4729 | CGSCCUpdateResult &UR) { | ||||
4730 | if (!containsOpenMP(*C.begin()->getFunction().getParent())) | ||||
4731 | return PreservedAnalyses::all(); | ||||
4732 | if (DisableOpenMPOptimizations) | ||||
4733 | return PreservedAnalyses::all(); | ||||
4734 | |||||
4735 | SmallVector<Function *, 16> SCC; | ||||
4736 | // If there are kernels in the module, we have to run on all SCC's. | ||||
4737 | for (LazyCallGraph::Node &N : C) { | ||||
4738 | Function *Fn = &N.getFunction(); | ||||
4739 | SCC.push_back(Fn); | ||||
4740 | } | ||||
4741 | |||||
4742 | if (SCC.empty()) | ||||
4743 | return PreservedAnalyses::all(); | ||||
4744 | |||||
4745 | Module &M = *C.begin()->getFunction().getParent(); | ||||
4746 | |||||
4747 | KernelSet Kernels = getDeviceKernels(M); | ||||
4748 | |||||
4749 | FunctionAnalysisManager &FAM = | ||||
4750 | AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager(); | ||||
4751 | |||||
4752 | AnalysisGetter AG(FAM); | ||||
4753 | |||||
4754 | auto OREGetter = [&FAM](Function *F) -> OptimizationRemarkEmitter & { | ||||
4755 | return FAM.getResult<OptimizationRemarkEmitterAnalysis>(*F); | ||||
4756 | }; | ||||
4757 | |||||
4758 | BumpPtrAllocator Allocator; | ||||
4759 | CallGraphUpdater CGUpdater; | ||||
4760 | CGUpdater.initialize(CG, C, AM, UR); | ||||
4761 | |||||
4762 | SetVector<Function *> Functions(SCC.begin(), SCC.end()); | ||||
4763 | OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG, Allocator, | ||||
4764 | /*CGSCC*/ Functions, Kernels); | ||||
4765 | |||||
4766 | unsigned MaxFixpointIterations = | ||||
4767 | (isOpenMPDevice(M)) ? SetFixpointIterations : 32; | ||||
4768 | Attributor A(Functions, InfoCache, CGUpdater, nullptr, false, true, | ||||
4769 | MaxFixpointIterations, OREGetter, DEBUG_TYPE"openmp-opt"); | ||||
4770 | |||||
4771 | OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A); | ||||
4772 | bool Changed = OMPOpt.run(false); | ||||
4773 | |||||
4774 | if (PrintModuleAfterOptimizations) | ||||
4775 | LLVM_DEBUG(dbgs() << TAG << "Module after OpenMPOpt CGSCC Pass:\n" << M)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { dbgs() << TAG << "Module after OpenMPOpt CGSCC Pass:\n" << M; } } while (false); | ||||
4776 | |||||
4777 | if (Changed) | ||||
4778 | return PreservedAnalyses::none(); | ||||
4779 | |||||
4780 | return PreservedAnalyses::all(); | ||||
4781 | } | ||||
4782 | |||||
4783 | namespace { | ||||
4784 | |||||
4785 | struct OpenMPOptCGSCCLegacyPass : public CallGraphSCCPass { | ||||
4786 | CallGraphUpdater CGUpdater; | ||||
4787 | static char ID; | ||||
4788 | |||||
4789 | OpenMPOptCGSCCLegacyPass() : CallGraphSCCPass(ID) { | ||||
4790 | initializeOpenMPOptCGSCCLegacyPassPass(*PassRegistry::getPassRegistry()); | ||||
4791 | } | ||||
4792 | |||||
4793 | void getAnalysisUsage(AnalysisUsage &AU) const override { | ||||
4794 | CallGraphSCCPass::getAnalysisUsage(AU); | ||||
4795 | } | ||||
4796 | |||||
4797 | bool runOnSCC(CallGraphSCC &CGSCC) override { | ||||
4798 | if (!containsOpenMP(CGSCC.getCallGraph().getModule())) | ||||
4799 | return false; | ||||
4800 | if (DisableOpenMPOptimizations || skipSCC(CGSCC)) | ||||
4801 | return false; | ||||
4802 | |||||
4803 | SmallVector<Function *, 16> SCC; | ||||
4804 | // If there are kernels in the module, we have to run on all SCC's. | ||||
4805 | for (CallGraphNode *CGN : CGSCC) { | ||||
4806 | Function *Fn = CGN->getFunction(); | ||||
4807 | if (!Fn || Fn->isDeclaration()) | ||||
4808 | continue; | ||||
4809 | SCC.push_back(Fn); | ||||
4810 | } | ||||
4811 | |||||
4812 | if (SCC.empty()) | ||||
4813 | return false; | ||||
4814 | |||||
4815 | Module &M = CGSCC.getCallGraph().getModule(); | ||||
4816 | KernelSet Kernels = getDeviceKernels(M); | ||||
4817 | |||||
4818 | CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph(); | ||||
4819 | CGUpdater.initialize(CG, CGSCC); | ||||
4820 | |||||
4821 | // Maintain a map of functions to avoid rebuilding the ORE | ||||
4822 | DenseMap<Function *, std::unique_ptr<OptimizationRemarkEmitter>> OREMap; | ||||
4823 | auto OREGetter = [&OREMap](Function *F) -> OptimizationRemarkEmitter & { | ||||
4824 | std::unique_ptr<OptimizationRemarkEmitter> &ORE = OREMap[F]; | ||||
4825 | if (!ORE) | ||||
4826 | ORE = std::make_unique<OptimizationRemarkEmitter>(F); | ||||
4827 | return *ORE; | ||||
4828 | }; | ||||
4829 | |||||
4830 | AnalysisGetter AG; | ||||
4831 | SetVector<Function *> Functions(SCC.begin(), SCC.end()); | ||||
4832 | BumpPtrAllocator Allocator; | ||||
4833 | OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG, | ||||
4834 | Allocator, | ||||
4835 | /*CGSCC*/ Functions, Kernels); | ||||
4836 | |||||
4837 | unsigned MaxFixpointIterations = | ||||
4838 | (isOpenMPDevice(M)) ? SetFixpointIterations : 32; | ||||
4839 | Attributor A(Functions, InfoCache, CGUpdater, nullptr, false, true, | ||||
4840 | MaxFixpointIterations, OREGetter, DEBUG_TYPE"openmp-opt"); | ||||
4841 | |||||
4842 | OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A); | ||||
4843 | bool Result = OMPOpt.run(false); | ||||
4844 | |||||
4845 | if (PrintModuleAfterOptimizations) | ||||
4846 | LLVM_DEBUG(dbgs() << TAG << "Module after OpenMPOpt CGSCC Pass:\n" << M)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { dbgs() << TAG << "Module after OpenMPOpt CGSCC Pass:\n" << M; } } while (false); | ||||
4847 | |||||
4848 | return Result; | ||||
4849 | } | ||||
4850 | |||||
4851 | bool doFinalization(CallGraph &CG) override { return CGUpdater.finalize(); } | ||||
4852 | }; | ||||
4853 | |||||
4854 | } // end anonymous namespace | ||||
4855 | |||||
4856 | KernelSet llvm::omp::getDeviceKernels(Module &M) { | ||||
4857 | // TODO: Create a more cross-platform way of determining device kernels. | ||||
4858 | NamedMDNode *MD = M.getOrInsertNamedMetadata("nvvm.annotations"); | ||||
4859 | KernelSet Kernels; | ||||
4860 | |||||
4861 | if (!MD) | ||||
4862 | return Kernels; | ||||
4863 | |||||
4864 | for (auto *Op : MD->operands()) { | ||||
4865 | if (Op->getNumOperands() < 2) | ||||
4866 | continue; | ||||
4867 | MDString *KindID = dyn_cast<MDString>(Op->getOperand(1)); | ||||
4868 | if (!KindID || KindID->getString() != "kernel") | ||||
4869 | continue; | ||||
4870 | |||||
4871 | Function *KernelFn = | ||||
4872 | mdconst::dyn_extract_or_null<Function>(Op->getOperand(0)); | ||||
4873 | if (!KernelFn) | ||||
4874 | continue; | ||||
4875 | |||||
4876 | ++NumOpenMPTargetRegionKernels; | ||||
4877 | |||||
4878 | Kernels.insert(KernelFn); | ||||
4879 | } | ||||
4880 | |||||
4881 | return Kernels; | ||||
4882 | } | ||||
4883 | |||||
4884 | bool llvm::omp::containsOpenMP(Module &M) { | ||||
4885 | Metadata *MD = M.getModuleFlag("openmp"); | ||||
4886 | if (!MD) | ||||
4887 | return false; | ||||
4888 | |||||
4889 | return true; | ||||
4890 | } | ||||
4891 | |||||
4892 | bool llvm::omp::isOpenMPDevice(Module &M) { | ||||
4893 | Metadata *MD = M.getModuleFlag("openmp-device"); | ||||
4894 | if (!MD) | ||||
4895 | return false; | ||||
4896 | |||||
4897 | return true; | ||||
4898 | } | ||||
4899 | |||||
4900 | char OpenMPOptCGSCCLegacyPass::ID = 0; | ||||
4901 | |||||
4902 | INITIALIZE_PASS_BEGIN(OpenMPOptCGSCCLegacyPass, "openmp-opt-cgscc",static void *initializeOpenMPOptCGSCCLegacyPassPassOnce(PassRegistry &Registry) { | ||||
4903 | "OpenMP specific optimizations", false, false)static void *initializeOpenMPOptCGSCCLegacyPassPassOnce(PassRegistry &Registry) { | ||||
4904 | INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)initializeCallGraphWrapperPassPass(Registry); | ||||
4905 | INITIALIZE_PASS_END(OpenMPOptCGSCCLegacyPass, "openmp-opt-cgscc",PassInfo *PI = new PassInfo( "OpenMP specific optimizations", "openmp-opt-cgscc", &OpenMPOptCGSCCLegacyPass::ID, PassInfo ::NormalCtor_t(callDefaultCtor<OpenMPOptCGSCCLegacyPass> ), false, false); Registry.registerPass(*PI, true); return PI ; } static llvm::once_flag InitializeOpenMPOptCGSCCLegacyPassPassFlag ; void llvm::initializeOpenMPOptCGSCCLegacyPassPass(PassRegistry &Registry) { llvm::call_once(InitializeOpenMPOptCGSCCLegacyPassPassFlag , initializeOpenMPOptCGSCCLegacyPassPassOnce, std::ref(Registry )); } | ||||
4906 | "OpenMP specific optimizations", false, false)PassInfo *PI = new PassInfo( "OpenMP specific optimizations", "openmp-opt-cgscc", &OpenMPOptCGSCCLegacyPass::ID, PassInfo ::NormalCtor_t(callDefaultCtor<OpenMPOptCGSCCLegacyPass> ), false, false); Registry.registerPass(*PI, true); return PI ; } static llvm::once_flag InitializeOpenMPOptCGSCCLegacyPassPassFlag ; void llvm::initializeOpenMPOptCGSCCLegacyPassPass(PassRegistry &Registry) { llvm::call_once(InitializeOpenMPOptCGSCCLegacyPassPassFlag , initializeOpenMPOptCGSCCLegacyPassPassOnce, std::ref(Registry )); } | ||||
4907 | |||||
4908 | Pass *llvm::createOpenMPOptCGSCCLegacyPass() { | ||||
4909 | return new OpenMPOptCGSCCLegacyPass(); | ||||
4910 | } |
1 | //===- Attributor.h --- Module-wide attribute deduction ---------*- C++ -*-===// | ||||||||
2 | // | ||||||||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||||||||
4 | // See https://llvm.org/LICENSE.txt for license information. | ||||||||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||||||||
6 | // | ||||||||
7 | //===----------------------------------------------------------------------===// | ||||||||
8 | // | ||||||||
9 | // Attributor: An inter procedural (abstract) "attribute" deduction framework. | ||||||||
10 | // | ||||||||
11 | // The Attributor framework is an inter procedural abstract analysis (fixpoint | ||||||||
12 | // iteration analysis). The goal is to allow easy deduction of new attributes as | ||||||||
13 | // well as information exchange between abstract attributes in-flight. | ||||||||
14 | // | ||||||||
15 | // The Attributor class is the driver and the link between the various abstract | ||||||||
16 | // attributes. The Attributor will iterate until a fixpoint state is reached by | ||||||||
17 | // all abstract attributes in-flight, or until it will enforce a pessimistic fix | ||||||||
18 | // point because an iteration limit is reached. | ||||||||
19 | // | ||||||||
20 | // Abstract attributes, derived from the AbstractAttribute class, actually | ||||||||
21 | // describe properties of the code. They can correspond to actual LLVM-IR | ||||||||
22 | // attributes, or they can be more general, ultimately unrelated to LLVM-IR | ||||||||
23 | // attributes. The latter is useful when an abstract attributes provides | ||||||||
24 | // information to other abstract attributes in-flight but we might not want to | ||||||||
25 | // manifest the information. The Attributor allows to query in-flight abstract | ||||||||
26 | // attributes through the `Attributor::getAAFor` method (see the method | ||||||||
27 | // description for an example). If the method is used by an abstract attribute | ||||||||
28 | // P, and it results in an abstract attribute Q, the Attributor will | ||||||||
29 | // automatically capture a potential dependence from Q to P. This dependence | ||||||||
30 | // will cause P to be reevaluated whenever Q changes in the future. | ||||||||
31 | // | ||||||||
32 | // The Attributor will only reevaluate abstract attributes that might have | ||||||||
33 | // changed since the last iteration. That means that the Attribute will not | ||||||||
34 | // revisit all instructions/blocks/functions in the module but only query | ||||||||
35 | // an update from a subset of the abstract attributes. | ||||||||
36 | // | ||||||||
37 | // The update method `AbstractAttribute::updateImpl` is implemented by the | ||||||||
38 | // specific "abstract attribute" subclasses. The method is invoked whenever the | ||||||||
39 | // currently assumed state (see the AbstractState class) might not be valid | ||||||||
40 | // anymore. This can, for example, happen if the state was dependent on another | ||||||||
41 | // abstract attribute that changed. In every invocation, the update method has | ||||||||
42 | // to adjust the internal state of an abstract attribute to a point that is | ||||||||
43 | // justifiable by the underlying IR and the current state of abstract attributes | ||||||||
44 | // in-flight. Since the IR is given and assumed to be valid, the information | ||||||||
45 | // derived from it can be assumed to hold. However, information derived from | ||||||||
46 | // other abstract attributes is conditional on various things. If the justifying | ||||||||
47 | // state changed, the `updateImpl` has to revisit the situation and potentially | ||||||||
48 | // find another justification or limit the optimistic assumes made. | ||||||||
49 | // | ||||||||
50 | // Change is the key in this framework. Until a state of no-change, thus a | ||||||||
51 | // fixpoint, is reached, the Attributor will query the abstract attributes | ||||||||
52 | // in-flight to re-evaluate their state. If the (current) state is too | ||||||||
53 | // optimistic, hence it cannot be justified anymore through other abstract | ||||||||
54 | // attributes or the state of the IR, the state of the abstract attribute will | ||||||||
55 | // have to change. Generally, we assume abstract attribute state to be a finite | ||||||||
56 | // height lattice and the update function to be monotone. However, these | ||||||||
57 | // conditions are not enforced because the iteration limit will guarantee | ||||||||
58 | // termination. If an optimistic fixpoint is reached, or a pessimistic fix | ||||||||
59 | // point is enforced after a timeout, the abstract attributes are tasked to | ||||||||
60 | // manifest their result in the IR for passes to come. | ||||||||
61 | // | ||||||||
62 | // Attribute manifestation is not mandatory. If desired, there is support to | ||||||||
63 | // generate a single or multiple LLVM-IR attributes already in the helper struct | ||||||||
64 | // IRAttribute. In the simplest case, a subclass inherits from IRAttribute with | ||||||||
65 | // a proper Attribute::AttrKind as template parameter. The Attributor | ||||||||
66 | // manifestation framework will then create and place a new attribute if it is | ||||||||
67 | // allowed to do so (based on the abstract state). Other use cases can be | ||||||||
68 | // achieved by overloading AbstractAttribute or IRAttribute methods. | ||||||||
69 | // | ||||||||
70 | // | ||||||||
71 | // The "mechanics" of adding a new "abstract attribute": | ||||||||
72 | // - Define a class (transitively) inheriting from AbstractAttribute and one | ||||||||
73 | // (which could be the same) that (transitively) inherits from AbstractState. | ||||||||
74 | // For the latter, consider the already available BooleanState and | ||||||||
75 | // {Inc,Dec,Bit}IntegerState if they fit your needs, e.g., you require only a | ||||||||
76 | // number tracking or bit-encoding. | ||||||||
77 | // - Implement all pure methods. Also use overloading if the attribute is not | ||||||||
78 | // conforming with the "default" behavior: A (set of) LLVM-IR attribute(s) for | ||||||||
79 | // an argument, call site argument, function return value, or function. See | ||||||||
80 | // the class and method descriptions for more information on the two | ||||||||
81 | // "Abstract" classes and their respective methods. | ||||||||
82 | // - Register opportunities for the new abstract attribute in the | ||||||||
83 | // `Attributor::identifyDefaultAbstractAttributes` method if it should be | ||||||||
84 | // counted as a 'default' attribute. | ||||||||
85 | // - Add sufficient tests. | ||||||||
86 | // - Add a Statistics object for bookkeeping. If it is a simple (set of) | ||||||||
87 | // attribute(s) manifested through the Attributor manifestation framework, see | ||||||||
88 | // the bookkeeping function in Attributor.cpp. | ||||||||
89 | // - If instructions with a certain opcode are interesting to the attribute, add | ||||||||
90 | // that opcode to the switch in `Attributor::identifyAbstractAttributes`. This | ||||||||
91 | // will make it possible to query all those instructions through the | ||||||||
92 | // `InformationCache::getOpcodeInstMapForFunction` interface and eliminate the | ||||||||
93 | // need to traverse the IR repeatedly. | ||||||||
94 | // | ||||||||
95 | //===----------------------------------------------------------------------===// | ||||||||
96 | |||||||||
97 | #ifndef LLVM_TRANSFORMS_IPO_ATTRIBUTOR_H | ||||||||
98 | #define LLVM_TRANSFORMS_IPO_ATTRIBUTOR_H | ||||||||
99 | |||||||||
100 | #include "llvm/ADT/DenseSet.h" | ||||||||
101 | #include "llvm/ADT/GraphTraits.h" | ||||||||
102 | #include "llvm/ADT/MapVector.h" | ||||||||
103 | #include "llvm/ADT/STLExtras.h" | ||||||||
104 | #include "llvm/ADT/SetOperations.h" | ||||||||
105 | #include "llvm/ADT/SetVector.h" | ||||||||
106 | #include "llvm/ADT/Triple.h" | ||||||||
107 | #include "llvm/ADT/iterator.h" | ||||||||
108 | #include "llvm/Analysis/AssumeBundleQueries.h" | ||||||||
109 | #include "llvm/Analysis/CFG.h" | ||||||||
110 | #include "llvm/Analysis/CGSCCPassManager.h" | ||||||||
111 | #include "llvm/Analysis/LazyCallGraph.h" | ||||||||
112 | #include "llvm/Analysis/LoopInfo.h" | ||||||||
113 | #include "llvm/Analysis/MustExecute.h" | ||||||||
114 | #include "llvm/Analysis/OptimizationRemarkEmitter.h" | ||||||||
115 | #include "llvm/Analysis/PostDominators.h" | ||||||||
116 | #include "llvm/Analysis/TargetLibraryInfo.h" | ||||||||
117 | #include "llvm/IR/AbstractCallSite.h" | ||||||||
118 | #include "llvm/IR/ConstantRange.h" | ||||||||
119 | #include "llvm/IR/PassManager.h" | ||||||||
120 | #include "llvm/Support/Allocator.h" | ||||||||
121 | #include "llvm/Support/Casting.h" | ||||||||
122 | #include "llvm/Support/GraphWriter.h" | ||||||||
123 | #include "llvm/Support/TimeProfiler.h" | ||||||||
124 | #include "llvm/Transforms/Utils/CallGraphUpdater.h" | ||||||||
125 | |||||||||
126 | namespace llvm { | ||||||||
127 | |||||||||
128 | struct AADepGraphNode; | ||||||||
129 | struct AADepGraph; | ||||||||
130 | struct Attributor; | ||||||||
131 | struct AbstractAttribute; | ||||||||
132 | struct InformationCache; | ||||||||
133 | struct AAIsDead; | ||||||||
134 | struct AttributorCallGraph; | ||||||||
135 | |||||||||
136 | class AAResults; | ||||||||
137 | class Function; | ||||||||
138 | |||||||||
139 | /// Abstract Attribute helper functions. | ||||||||
140 | namespace AA { | ||||||||
141 | |||||||||
142 | /// Return true if \p V is dynamically unique, that is, there are no two | ||||||||
143 | /// "instances" of \p V at runtime with different values. | ||||||||
144 | bool isDynamicallyUnique(Attributor &A, const AbstractAttribute &QueryingAA, | ||||||||
145 | const Value &V); | ||||||||
146 | |||||||||
147 | /// Return true if \p V is a valid value in \p Scope, that is a constant or an | ||||||||
148 | /// instruction/argument of \p Scope. | ||||||||
149 | bool isValidInScope(const Value &V, const Function *Scope); | ||||||||
150 | |||||||||
151 | /// Return true if \p V is a valid value at position \p CtxI, that is a | ||||||||
152 | /// constant, an argument of the same function as \p CtxI, or an instruction in | ||||||||
153 | /// that function that dominates \p CtxI. | ||||||||
154 | bool isValidAtPosition(const Value &V, const Instruction &CtxI, | ||||||||
155 | InformationCache &InfoCache); | ||||||||
156 | |||||||||
157 | /// Try to convert \p V to type \p Ty without introducing new instructions. If | ||||||||
158 | /// this is not possible return `nullptr`. Note: this function basically knows | ||||||||
159 | /// how to cast various constants. | ||||||||
160 | Value *getWithType(Value &V, Type &Ty); | ||||||||
161 | |||||||||
162 | /// Return the combination of \p A and \p B such that the result is a possible | ||||||||
163 | /// value of both. \p B is potentially casted to match the type \p Ty or the | ||||||||
164 | /// type of \p A if \p Ty is null. | ||||||||
165 | /// | ||||||||
166 | /// Examples: | ||||||||
167 | /// X + none => X | ||||||||
168 | /// not_none + undef => not_none | ||||||||
169 | /// V1 + V2 => nullptr | ||||||||
170 | Optional<Value *> | ||||||||
171 | combineOptionalValuesInAAValueLatice(const Optional<Value *> &A, | ||||||||
172 | const Optional<Value *> &B, Type *Ty); | ||||||||
173 | |||||||||
174 | /// Return the initial value of \p Obj with type \p Ty if that is a constant. | ||||||||
175 | Constant *getInitialValueForObj(Value &Obj, Type &Ty, | ||||||||
176 | const TargetLibraryInfo *TLI); | ||||||||
177 | |||||||||
178 | /// Collect all potential underlying objects of \p Ptr at position \p CtxI in | ||||||||
179 | /// \p Objects. Assumed information is used and dependences onto \p QueryingAA | ||||||||
180 | /// are added appropriately. | ||||||||
181 | /// | ||||||||
182 | /// \returns True if \p Objects contains all assumed underlying objects, and | ||||||||
183 | /// false if something went wrong and the objects could not be | ||||||||
184 | /// determined. | ||||||||
185 | bool getAssumedUnderlyingObjects(Attributor &A, const Value &Ptr, | ||||||||
186 | SmallVectorImpl<Value *> &Objects, | ||||||||
187 | const AbstractAttribute &QueryingAA, | ||||||||
188 | const Instruction *CtxI); | ||||||||
189 | |||||||||
190 | /// Collect all potential values of the one stored by \p SI into | ||||||||
191 | /// \p PotentialCopies. That is, the only copies that were made via the | ||||||||
192 | /// store are assumed to be known and all in \p PotentialCopies. Dependences | ||||||||
193 | /// onto \p QueryingAA are properly tracked, \p UsedAssumedInformation will | ||||||||
194 | /// inform the caller if assumed information was used. | ||||||||
195 | /// | ||||||||
196 | /// \returns True if the assumed potential copies are all in \p PotentialCopies, | ||||||||
197 | /// false if something went wrong and the copies could not be | ||||||||
198 | /// determined. | ||||||||
199 | bool getPotentialCopiesOfStoredValue( | ||||||||
200 | Attributor &A, StoreInst &SI, SmallSetVector<Value *, 4> &PotentialCopies, | ||||||||
201 | const AbstractAttribute &QueryingAA, bool &UsedAssumedInformation); | ||||||||
202 | |||||||||
203 | } // namespace AA | ||||||||
204 | |||||||||
205 | /// The value passed to the line option that defines the maximal initialization | ||||||||
206 | /// chain length. | ||||||||
207 | extern unsigned MaxInitializationChainLength; | ||||||||
208 | |||||||||
209 | ///{ | ||||||||
210 | enum class ChangeStatus { | ||||||||
211 | CHANGED, | ||||||||
212 | UNCHANGED, | ||||||||
213 | }; | ||||||||
214 | |||||||||
215 | ChangeStatus operator|(ChangeStatus l, ChangeStatus r); | ||||||||
216 | ChangeStatus &operator|=(ChangeStatus &l, ChangeStatus r); | ||||||||
217 | ChangeStatus operator&(ChangeStatus l, ChangeStatus r); | ||||||||
218 | ChangeStatus &operator&=(ChangeStatus &l, ChangeStatus r); | ||||||||
219 | |||||||||
220 | enum class DepClassTy { | ||||||||
221 | REQUIRED, ///< The target cannot be valid if the source is not. | ||||||||
222 | OPTIONAL, ///< The target may be valid if the source is not. | ||||||||
223 | NONE, ///< Do not track a dependence between source and target. | ||||||||
224 | }; | ||||||||
225 | ///} | ||||||||
226 | |||||||||
227 | /// The data structure for the nodes of a dependency graph | ||||||||
228 | struct AADepGraphNode { | ||||||||
229 | public: | ||||||||
230 | virtual ~AADepGraphNode(){}; | ||||||||
231 | using DepTy = PointerIntPair<AADepGraphNode *, 1>; | ||||||||
232 | |||||||||
233 | protected: | ||||||||
234 | /// Set of dependency graph nodes which should be updated if this one | ||||||||
235 | /// is updated. The bit encodes if it is optional. | ||||||||
236 | TinyPtrVector<DepTy> Deps; | ||||||||
237 | |||||||||
238 | static AADepGraphNode *DepGetVal(DepTy &DT) { return DT.getPointer(); } | ||||||||
239 | static AbstractAttribute *DepGetValAA(DepTy &DT) { | ||||||||
240 | return cast<AbstractAttribute>(DT.getPointer()); | ||||||||
241 | } | ||||||||
242 | |||||||||
243 | operator AbstractAttribute *() { return cast<AbstractAttribute>(this); } | ||||||||
244 | |||||||||
245 | public: | ||||||||
246 | using iterator = | ||||||||
247 | mapped_iterator<TinyPtrVector<DepTy>::iterator, decltype(&DepGetVal)>; | ||||||||
248 | using aaiterator = | ||||||||
249 | mapped_iterator<TinyPtrVector<DepTy>::iterator, decltype(&DepGetValAA)>; | ||||||||
250 | |||||||||
251 | aaiterator begin() { return aaiterator(Deps.begin(), &DepGetValAA); } | ||||||||
252 | aaiterator end() { return aaiterator(Deps.end(), &DepGetValAA); } | ||||||||
253 | iterator child_begin() { return iterator(Deps.begin(), &DepGetVal); } | ||||||||
254 | iterator child_end() { return iterator(Deps.end(), &DepGetVal); } | ||||||||
255 | |||||||||
256 | virtual void print(raw_ostream &OS) const { OS << "AADepNode Impl\n"; } | ||||||||
257 | TinyPtrVector<DepTy> &getDeps() { return Deps; } | ||||||||
258 | |||||||||
259 | friend struct Attributor; | ||||||||
260 | friend struct AADepGraph; | ||||||||
261 | }; | ||||||||
262 | |||||||||
263 | /// The data structure for the dependency graph | ||||||||
264 | /// | ||||||||
265 | /// Note that in this graph if there is an edge from A to B (A -> B), | ||||||||
266 | /// then it means that B depends on A, and when the state of A is | ||||||||
267 | /// updated, node B should also be updated | ||||||||
268 | struct AADepGraph { | ||||||||
269 | AADepGraph() {} | ||||||||
270 | ~AADepGraph() {} | ||||||||
271 | |||||||||
272 | using DepTy = AADepGraphNode::DepTy; | ||||||||
273 | static AADepGraphNode *DepGetVal(DepTy &DT) { return DT.getPointer(); } | ||||||||
274 | using iterator = | ||||||||
275 | mapped_iterator<TinyPtrVector<DepTy>::iterator, decltype(&DepGetVal)>; | ||||||||
276 | |||||||||
277 | /// There is no root node for the dependency graph. But the SCCIterator | ||||||||
278 | /// requires a single entry point, so we maintain a fake("synthetic") root | ||||||||
279 | /// node that depends on every node. | ||||||||
280 | AADepGraphNode SyntheticRoot; | ||||||||
281 | AADepGraphNode *GetEntryNode() { return &SyntheticRoot; } | ||||||||
282 | |||||||||
283 | iterator begin() { return SyntheticRoot.child_begin(); } | ||||||||
284 | iterator end() { return SyntheticRoot.child_end(); } | ||||||||
285 | |||||||||
286 | void viewGraph(); | ||||||||
287 | |||||||||
288 | /// Dump graph to file | ||||||||
289 | void dumpGraph(); | ||||||||
290 | |||||||||
291 | /// Print dependency graph | ||||||||
292 | void print(); | ||||||||
293 | }; | ||||||||
294 | |||||||||
295 | /// Helper to describe and deal with positions in the LLVM-IR. | ||||||||
296 | /// | ||||||||
297 | /// A position in the IR is described by an anchor value and an "offset" that | ||||||||
298 | /// could be the argument number, for call sites and arguments, or an indicator | ||||||||
299 | /// of the "position kind". The kinds, specified in the Kind enum below, include | ||||||||
300 | /// the locations in the attribute list, i.a., function scope and return value, | ||||||||
301 | /// as well as a distinction between call sites and functions. Finally, there | ||||||||
302 | /// are floating values that do not have a corresponding attribute list | ||||||||
303 | /// position. | ||||||||
304 | struct IRPosition { | ||||||||
305 | // NOTE: In the future this definition can be changed to support recursive | ||||||||
306 | // functions. | ||||||||
307 | using CallBaseContext = CallBase; | ||||||||
308 | |||||||||
309 | /// The positions we distinguish in the IR. | ||||||||
310 | enum Kind : char { | ||||||||
311 | IRP_INVALID, ///< An invalid position. | ||||||||
312 | IRP_FLOAT, ///< A position that is not associated with a spot suitable | ||||||||
313 | ///< for attributes. This could be any value or instruction. | ||||||||
314 | IRP_RETURNED, ///< An attribute for the function return value. | ||||||||
315 | IRP_CALL_SITE_RETURNED, ///< An attribute for a call site return value. | ||||||||
316 | IRP_FUNCTION, ///< An attribute for a function (scope). | ||||||||
317 | IRP_CALL_SITE, ///< An attribute for a call site (function scope). | ||||||||
318 | IRP_ARGUMENT, ///< An attribute for a function argument. | ||||||||
319 | IRP_CALL_SITE_ARGUMENT, ///< An attribute for a call site argument. | ||||||||
320 | }; | ||||||||
321 | |||||||||
322 | /// Default constructor available to create invalid positions implicitly. All | ||||||||
323 | /// other positions need to be created explicitly through the appropriate | ||||||||
324 | /// static member function. | ||||||||
325 | IRPosition() : Enc(nullptr, ENC_VALUE) { verify(); } | ||||||||
326 | |||||||||
327 | /// Create a position describing the value of \p V. | ||||||||
328 | static const IRPosition value(const Value &V, | ||||||||
329 | const CallBaseContext *CBContext = nullptr) { | ||||||||
330 | if (auto *Arg = dyn_cast<Argument>(&V)) | ||||||||
331 | return IRPosition::argument(*Arg, CBContext); | ||||||||
332 | if (auto *CB = dyn_cast<CallBase>(&V)) | ||||||||
333 | return IRPosition::callsite_returned(*CB); | ||||||||
334 | return IRPosition(const_cast<Value &>(V), IRP_FLOAT, CBContext); | ||||||||
335 | } | ||||||||
336 | |||||||||
337 | /// Create a position describing the function scope of \p F. | ||||||||
338 | /// \p CBContext is used for call base specific analysis. | ||||||||
339 | static const IRPosition function(const Function &F, | ||||||||
340 | const CallBaseContext *CBContext = nullptr) { | ||||||||
341 | return IRPosition(const_cast<Function &>(F), IRP_FUNCTION, CBContext); | ||||||||
342 | } | ||||||||
343 | |||||||||
344 | /// Create a position describing the returned value of \p F. | ||||||||
345 | /// \p CBContext is used for call base specific analysis. | ||||||||
346 | static const IRPosition returned(const Function &F, | ||||||||
347 | const CallBaseContext *CBContext = nullptr) { | ||||||||
348 | return IRPosition(const_cast<Function &>(F), IRP_RETURNED, CBContext); | ||||||||
349 | } | ||||||||
350 | |||||||||
351 | /// Create a position describing the argument \p Arg. | ||||||||
352 | /// \p CBContext is used for call base specific analysis. | ||||||||
353 | static const IRPosition argument(const Argument &Arg, | ||||||||
354 | const CallBaseContext *CBContext = nullptr) { | ||||||||
355 | return IRPosition(const_cast<Argument &>(Arg), IRP_ARGUMENT, CBContext); | ||||||||
356 | } | ||||||||
357 | |||||||||
358 | /// Create a position describing the function scope of \p CB. | ||||||||
359 | static const IRPosition callsite_function(const CallBase &CB) { | ||||||||
360 | return IRPosition(const_cast<CallBase &>(CB), IRP_CALL_SITE); | ||||||||
361 | } | ||||||||
362 | |||||||||
363 | /// Create a position describing the returned value of \p CB. | ||||||||
364 | static const IRPosition callsite_returned(const CallBase &CB) { | ||||||||
365 | return IRPosition(const_cast<CallBase &>(CB), IRP_CALL_SITE_RETURNED); | ||||||||
366 | } | ||||||||
367 | |||||||||
368 | /// Create a position describing the argument of \p CB at position \p ArgNo. | ||||||||
369 | static const IRPosition callsite_argument(const CallBase &CB, | ||||||||
370 | unsigned ArgNo) { | ||||||||
371 | return IRPosition(const_cast<Use &>(CB.getArgOperandUse(ArgNo)), | ||||||||
372 | IRP_CALL_SITE_ARGUMENT); | ||||||||
373 | } | ||||||||
374 | |||||||||
375 | /// Create a position describing the argument of \p ACS at position \p ArgNo. | ||||||||
376 | static const IRPosition callsite_argument(AbstractCallSite ACS, | ||||||||
377 | unsigned ArgNo) { | ||||||||
378 | if (ACS.getNumArgOperands() <= ArgNo) | ||||||||
379 | return IRPosition(); | ||||||||
380 | int CSArgNo = ACS.getCallArgOperandNo(ArgNo); | ||||||||
381 | if (CSArgNo >= 0) | ||||||||
382 | return IRPosition::callsite_argument( | ||||||||
383 | cast<CallBase>(*ACS.getInstruction()), CSArgNo); | ||||||||
384 | return IRPosition(); | ||||||||
385 | } | ||||||||
386 | |||||||||
387 | /// Create a position with function scope matching the "context" of \p IRP. | ||||||||
388 | /// If \p IRP is a call site (see isAnyCallSitePosition()) then the result | ||||||||
389 | /// will be a call site position, otherwise the function position of the | ||||||||
390 | /// associated function. | ||||||||
391 | static const IRPosition | ||||||||
392 | function_scope(const IRPosition &IRP, | ||||||||
393 | const CallBaseContext *CBContext = nullptr) { | ||||||||
394 | if (IRP.isAnyCallSitePosition()) { | ||||||||
395 | return IRPosition::callsite_function( | ||||||||
396 | cast<CallBase>(IRP.getAnchorValue())); | ||||||||
397 | } | ||||||||
398 | assert(IRP.getAssociatedFunction())(static_cast <bool> (IRP.getAssociatedFunction()) ? void (0) : __assert_fail ("IRP.getAssociatedFunction()", "llvm/include/llvm/Transforms/IPO/Attributor.h" , 398, __extension__ __PRETTY_FUNCTION__)); | ||||||||
399 | return IRPosition::function(*IRP.getAssociatedFunction(), CBContext); | ||||||||
400 | } | ||||||||
401 | |||||||||
402 | bool operator==(const IRPosition &RHS) const { | ||||||||
403 | return Enc == RHS.Enc && RHS.CBContext == CBContext; | ||||||||
404 | } | ||||||||
405 | bool operator!=(const IRPosition &RHS) const { return !(*this == RHS); } | ||||||||
406 | |||||||||
407 | /// Return the value this abstract attribute is anchored with. | ||||||||
408 | /// | ||||||||
409 | /// The anchor value might not be the associated value if the latter is not | ||||||||
410 | /// sufficient to determine where arguments will be manifested. This is, so | ||||||||
411 | /// far, only the case for call site arguments as the value is not sufficient | ||||||||
412 | /// to pinpoint them. Instead, we can use the call site as an anchor. | ||||||||
413 | Value &getAnchorValue() const { | ||||||||
414 | switch (getEncodingBits()) { | ||||||||
415 | case ENC_VALUE: | ||||||||
416 | case ENC_RETURNED_VALUE: | ||||||||
417 | case ENC_FLOATING_FUNCTION: | ||||||||
418 | return *getAsValuePtr(); | ||||||||
419 | case ENC_CALL_SITE_ARGUMENT_USE: | ||||||||
420 | return *(getAsUsePtr()->getUser()); | ||||||||
421 | default: | ||||||||
422 | llvm_unreachable("Unkown encoding!")::llvm::llvm_unreachable_internal("Unkown encoding!", "llvm/include/llvm/Transforms/IPO/Attributor.h" , 422); | ||||||||
423 | }; | ||||||||
424 | } | ||||||||
425 | |||||||||
426 | /// Return the associated function, if any. | ||||||||
427 | Function *getAssociatedFunction() const { | ||||||||
428 | if (auto *CB = dyn_cast<CallBase>(&getAnchorValue())) { | ||||||||
429 | // We reuse the logic that associates callback calles to arguments of a | ||||||||
430 | // call site here to identify the callback callee as the associated | ||||||||
431 | // function. | ||||||||
432 | if (Argument *Arg = getAssociatedArgument()) | ||||||||
433 | return Arg->getParent(); | ||||||||
434 | return CB->getCalledFunction(); | ||||||||
435 | } | ||||||||
436 | return getAnchorScope(); | ||||||||
437 | } | ||||||||
438 | |||||||||
439 | /// Return the associated argument, if any. | ||||||||
440 | Argument *getAssociatedArgument() const; | ||||||||
441 | |||||||||
442 | /// Return true if the position refers to a function interface, that is the | ||||||||
443 | /// function scope, the function return, or an argument. | ||||||||
444 | bool isFnInterfaceKind() const { | ||||||||
445 | switch (getPositionKind()) { | ||||||||
446 | case IRPosition::IRP_FUNCTION: | ||||||||
447 | case IRPosition::IRP_RETURNED: | ||||||||
448 | case IRPosition::IRP_ARGUMENT: | ||||||||
449 | return true; | ||||||||
450 | default: | ||||||||
451 | return false; | ||||||||
452 | } | ||||||||
453 | } | ||||||||
454 | |||||||||
455 | /// Return the Function surrounding the anchor value. | ||||||||
456 | Function *getAnchorScope() const { | ||||||||
457 | Value &V = getAnchorValue(); | ||||||||
458 | if (isa<Function>(V)) | ||||||||
459 | return &cast<Function>(V); | ||||||||
460 | if (isa<Argument>(V)) | ||||||||
461 | return cast<Argument>(V).getParent(); | ||||||||
462 | if (isa<Instruction>(V)) | ||||||||
463 | return cast<Instruction>(V).getFunction(); | ||||||||
464 | return nullptr; | ||||||||
465 | } | ||||||||
466 | |||||||||
467 | /// Return the context instruction, if any. | ||||||||
468 | Instruction *getCtxI() const { | ||||||||
469 | Value &V = getAnchorValue(); | ||||||||
470 | if (auto *I = dyn_cast<Instruction>(&V)) | ||||||||
471 | return I; | ||||||||
472 | if (auto *Arg = dyn_cast<Argument>(&V)) | ||||||||
473 | if (!Arg->getParent()->isDeclaration()) | ||||||||
474 | return &Arg->getParent()->getEntryBlock().front(); | ||||||||
475 | if (auto *F = dyn_cast<Function>(&V)) | ||||||||
476 | if (!F->isDeclaration()) | ||||||||
477 | return &(F->getEntryBlock().front()); | ||||||||
478 | return nullptr; | ||||||||
479 | } | ||||||||
480 | |||||||||
481 | /// Return the value this abstract attribute is associated with. | ||||||||
482 | Value &getAssociatedValue() const { | ||||||||
483 | if (getCallSiteArgNo() < 0 || isa<Argument>(&getAnchorValue())) | ||||||||
484 | return getAnchorValue(); | ||||||||
485 | assert(isa<CallBase>(&getAnchorValue()) && "Expected a call base!")(static_cast <bool> (isa<CallBase>(&getAnchorValue ()) && "Expected a call base!") ? void (0) : __assert_fail ("isa<CallBase>(&getAnchorValue()) && \"Expected a call base!\"" , "llvm/include/llvm/Transforms/IPO/Attributor.h", 485, __extension__ __PRETTY_FUNCTION__)); | ||||||||
486 | return *cast<CallBase>(&getAnchorValue()) | ||||||||
487 | ->getArgOperand(getCallSiteArgNo()); | ||||||||
488 | } | ||||||||
489 | |||||||||
490 | /// Return the type this abstract attribute is associated with. | ||||||||
491 | Type *getAssociatedType() const { | ||||||||
492 | if (getPositionKind() == IRPosition::IRP_RETURNED) | ||||||||
493 | return getAssociatedFunction()->getReturnType(); | ||||||||
494 | return getAssociatedValue().getType(); | ||||||||
495 | } | ||||||||
496 | |||||||||
497 | /// Return the callee argument number of the associated value if it is an | ||||||||
498 | /// argument or call site argument, otherwise a negative value. In contrast to | ||||||||
499 | /// `getCallSiteArgNo` this method will always return the "argument number" | ||||||||
500 | /// from the perspective of the callee. This may not the same as the call site | ||||||||
501 | /// if this is a callback call. | ||||||||
502 | int getCalleeArgNo() const { | ||||||||
503 | return getArgNo(/* CallbackCalleeArgIfApplicable */ true); | ||||||||
504 | } | ||||||||
505 | |||||||||
506 | /// Return the call site argument number of the associated value if it is an | ||||||||
507 | /// argument or call site argument, otherwise a negative value. In contrast to | ||||||||
508 | /// `getCalleArgNo` this method will always return the "operand number" from | ||||||||
509 | /// the perspective of the call site. This may not the same as the callee | ||||||||
510 | /// perspective if this is a callback call. | ||||||||
511 | int getCallSiteArgNo() const { | ||||||||
512 | return getArgNo(/* CallbackCalleeArgIfApplicable */ false); | ||||||||
513 | } | ||||||||
514 | |||||||||
515 | /// Return the index in the attribute list for this position. | ||||||||
516 | unsigned getAttrIdx() const { | ||||||||
517 | switch (getPositionKind()) { | ||||||||
518 | case IRPosition::IRP_INVALID: | ||||||||
519 | case IRPosition::IRP_FLOAT: | ||||||||
520 | break; | ||||||||
521 | case IRPosition::IRP_FUNCTION: | ||||||||
522 | case IRPosition::IRP_CALL_SITE: | ||||||||
523 | return AttributeList::FunctionIndex; | ||||||||
524 | case IRPosition::IRP_RETURNED: | ||||||||
525 | case IRPosition::IRP_CALL_SITE_RETURNED: | ||||||||
526 | return AttributeList::ReturnIndex; | ||||||||
527 | case IRPosition::IRP_ARGUMENT: | ||||||||
528 | case IRPosition::IRP_CALL_SITE_ARGUMENT: | ||||||||
529 | return getCallSiteArgNo() + AttributeList::FirstArgIndex; | ||||||||
530 | } | ||||||||
531 | llvm_unreachable(::llvm::llvm_unreachable_internal("There is no attribute index for a floating or invalid position!" , "llvm/include/llvm/Transforms/IPO/Attributor.h", 532) | ||||||||
532 | "There is no attribute index for a floating or invalid position!")::llvm::llvm_unreachable_internal("There is no attribute index for a floating or invalid position!" , "llvm/include/llvm/Transforms/IPO/Attributor.h", 532); | ||||||||
533 | } | ||||||||
534 | |||||||||
535 | /// Return the associated position kind. | ||||||||
536 | Kind getPositionKind() const { | ||||||||
537 | char EncodingBits = getEncodingBits(); | ||||||||
538 | if (EncodingBits == ENC_CALL_SITE_ARGUMENT_USE) | ||||||||
539 | return IRP_CALL_SITE_ARGUMENT; | ||||||||
540 | if (EncodingBits == ENC_FLOATING_FUNCTION) | ||||||||
541 | return IRP_FLOAT; | ||||||||
542 | |||||||||
543 | Value *V = getAsValuePtr(); | ||||||||
544 | if (!V) | ||||||||
545 | return IRP_INVALID; | ||||||||
546 | if (isa<Argument>(V)) | ||||||||
547 | return IRP_ARGUMENT; | ||||||||
548 | if (isa<Function>(V)) | ||||||||
549 | return isReturnPosition(EncodingBits) ? IRP_RETURNED : IRP_FUNCTION; | ||||||||
550 | if (isa<CallBase>(V)) | ||||||||
551 | return isReturnPosition(EncodingBits) ? IRP_CALL_SITE_RETURNED | ||||||||
552 | : IRP_CALL_SITE; | ||||||||
553 | return IRP_FLOAT; | ||||||||
554 | } | ||||||||
555 | |||||||||
556 | /// TODO: Figure out if the attribute related helper functions should live | ||||||||
557 | /// here or somewhere else. | ||||||||
558 | |||||||||
559 | /// Return true if any kind in \p AKs existing in the IR at a position that | ||||||||
560 | /// will affect this one. See also getAttrs(...). | ||||||||
561 | /// \param IgnoreSubsumingPositions Flag to determine if subsuming positions, | ||||||||
562 | /// e.g., the function position if this is an | ||||||||
563 | /// argument position, should be ignored. | ||||||||
564 | bool hasAttr(ArrayRef<Attribute::AttrKind> AKs, | ||||||||
565 | bool IgnoreSubsumingPositions = false, | ||||||||
566 | Attributor *A = nullptr) const; | ||||||||
567 | |||||||||
568 | /// Return the attributes of any kind in \p AKs existing in the IR at a | ||||||||
569 | /// position that will affect this one. While each position can only have a | ||||||||
570 | /// single attribute of any kind in \p AKs, there are "subsuming" positions | ||||||||
571 | /// that could have an attribute as well. This method returns all attributes | ||||||||
572 | /// found in \p Attrs. | ||||||||
573 | /// \param IgnoreSubsumingPositions Flag to determine if subsuming positions, | ||||||||
574 | /// e.g., the function position if this is an | ||||||||
575 | /// argument position, should be ignored. | ||||||||
576 | void getAttrs(ArrayRef<Attribute::AttrKind> AKs, | ||||||||
577 | SmallVectorImpl<Attribute> &Attrs, | ||||||||
578 | bool IgnoreSubsumingPositions = false, | ||||||||
579 | Attributor *A = nullptr) const; | ||||||||
580 | |||||||||
581 | /// Remove the attribute of kind \p AKs existing in the IR at this position. | ||||||||
582 | void removeAttrs(ArrayRef<Attribute::AttrKind> AKs) const { | ||||||||
583 | if (getPositionKind() == IRP_INVALID || getPositionKind() == IRP_FLOAT) | ||||||||
584 | return; | ||||||||
585 | |||||||||
586 | AttributeList AttrList; | ||||||||
587 | auto *CB = dyn_cast<CallBase>(&getAnchorValue()); | ||||||||
588 | if (CB) | ||||||||
589 | AttrList = CB->getAttributes(); | ||||||||
590 | else | ||||||||
591 | AttrList = getAssociatedFunction()->getAttributes(); | ||||||||
592 | |||||||||
593 | LLVMContext &Ctx = getAnchorValue().getContext(); | ||||||||
594 | for (Attribute::AttrKind AK : AKs) | ||||||||
595 | AttrList = AttrList.removeAttributeAtIndex(Ctx, getAttrIdx(), AK); | ||||||||
596 | |||||||||
597 | if (CB) | ||||||||
598 | CB->setAttributes(AttrList); | ||||||||
599 | else | ||||||||
600 | getAssociatedFunction()->setAttributes(AttrList); | ||||||||
601 | } | ||||||||
602 | |||||||||
603 | bool isAnyCallSitePosition() const { | ||||||||
604 | switch (getPositionKind()) { | ||||||||
605 | case IRPosition::IRP_CALL_SITE: | ||||||||
606 | case IRPosition::IRP_CALL_SITE_RETURNED: | ||||||||
607 | case IRPosition::IRP_CALL_SITE_ARGUMENT: | ||||||||
608 | return true; | ||||||||
609 | default: | ||||||||
610 | return false; | ||||||||
611 | } | ||||||||
612 | } | ||||||||
613 | |||||||||
614 | /// Return true if the position is an argument or call site argument. | ||||||||
615 | bool isArgumentPosition() const { | ||||||||
616 | switch (getPositionKind()) { | ||||||||
617 | case IRPosition::IRP_ARGUMENT: | ||||||||
618 | case IRPosition::IRP_CALL_SITE_ARGUMENT: | ||||||||
619 | return true; | ||||||||
620 | default: | ||||||||
621 | return false; | ||||||||
622 | } | ||||||||
623 | } | ||||||||
624 | |||||||||
625 | /// Return the same position without the call base context. | ||||||||
626 | IRPosition stripCallBaseContext() const { | ||||||||
627 | IRPosition Result = *this; | ||||||||
628 | Result.CBContext = nullptr; | ||||||||
629 | return Result; | ||||||||
630 | } | ||||||||
631 | |||||||||
632 | /// Get the call base context from the position. | ||||||||
633 | const CallBaseContext *getCallBaseContext() const { return CBContext; } | ||||||||
634 | |||||||||
635 | /// Check if the position has any call base context. | ||||||||
636 | bool hasCallBaseContext() const { return CBContext != nullptr; } | ||||||||
637 | |||||||||
638 | /// Special DenseMap key values. | ||||||||
639 | /// | ||||||||
640 | ///{ | ||||||||
641 | static const IRPosition EmptyKey; | ||||||||
642 | static const IRPosition TombstoneKey; | ||||||||
643 | ///} | ||||||||
644 | |||||||||
645 | /// Conversion into a void * to allow reuse of pointer hashing. | ||||||||
646 | operator void *() const { return Enc.getOpaqueValue(); } | ||||||||
647 | |||||||||
648 | private: | ||||||||
649 | /// Private constructor for special values only! | ||||||||
650 | explicit IRPosition(void *Ptr, const CallBaseContext *CBContext = nullptr) | ||||||||
651 | : CBContext(CBContext) { | ||||||||
652 | Enc.setFromOpaqueValue(Ptr); | ||||||||
653 | } | ||||||||
654 | |||||||||
655 | /// IRPosition anchored at \p AnchorVal with kind/argument numbet \p PK. | ||||||||
656 | explicit IRPosition(Value &AnchorVal, Kind PK, | ||||||||
657 | const CallBaseContext *CBContext = nullptr) | ||||||||
658 | : CBContext(CBContext) { | ||||||||
659 | switch (PK) { | ||||||||
660 | case IRPosition::IRP_INVALID: | ||||||||
661 | llvm_unreachable("Cannot create invalid IRP with an anchor value!")::llvm::llvm_unreachable_internal("Cannot create invalid IRP with an anchor value!" , "llvm/include/llvm/Transforms/IPO/Attributor.h", 661); | ||||||||
662 | break; | ||||||||
663 | case IRPosition::IRP_FLOAT: | ||||||||
664 | // Special case for floating functions. | ||||||||
665 | if (isa<Function>(AnchorVal)) | ||||||||
666 | Enc = {&AnchorVal, ENC_FLOATING_FUNCTION}; | ||||||||
667 | else | ||||||||
668 | Enc = {&AnchorVal, ENC_VALUE}; | ||||||||
669 | break; | ||||||||
670 | case IRPosition::IRP_FUNCTION: | ||||||||
671 | case IRPosition::IRP_CALL_SITE: | ||||||||
672 | Enc = {&AnchorVal, ENC_VALUE}; | ||||||||
673 | break; | ||||||||
674 | case IRPosition::IRP_RETURNED: | ||||||||
675 | case IRPosition::IRP_CALL_SITE_RETURNED: | ||||||||
676 | Enc = {&AnchorVal, ENC_RETURNED_VALUE}; | ||||||||
677 | break; | ||||||||
678 | case IRPosition::IRP_ARGUMENT: | ||||||||
679 | Enc = {&AnchorVal, ENC_VALUE}; | ||||||||
680 | break; | ||||||||
681 | case IRPosition::IRP_CALL_SITE_ARGUMENT: | ||||||||
682 | llvm_unreachable(::llvm::llvm_unreachable_internal("Cannot create call site argument IRP with an anchor value!" , "llvm/include/llvm/Transforms/IPO/Attributor.h", 683) | ||||||||
683 | "Cannot create call site argument IRP with an anchor value!")::llvm::llvm_unreachable_internal("Cannot create call site argument IRP with an anchor value!" , "llvm/include/llvm/Transforms/IPO/Attributor.h", 683); | ||||||||
684 | break; | ||||||||
685 | } | ||||||||
686 | verify(); | ||||||||
687 | } | ||||||||
688 | |||||||||
689 | /// Return the callee argument number of the associated value if it is an | ||||||||
690 | /// argument or call site argument. See also `getCalleeArgNo` and | ||||||||
691 | /// `getCallSiteArgNo`. | ||||||||
692 | int getArgNo(bool CallbackCalleeArgIfApplicable) const { | ||||||||
693 | if (CallbackCalleeArgIfApplicable) | ||||||||
694 | if (Argument *Arg = getAssociatedArgument()) | ||||||||
695 | return Arg->getArgNo(); | ||||||||
696 | switch (getPositionKind()) { | ||||||||
697 | case IRPosition::IRP_ARGUMENT: | ||||||||
698 | return cast<Argument>(getAsValuePtr())->getArgNo(); | ||||||||
699 | case IRPosition::IRP_CALL_SITE_ARGUMENT: { | ||||||||
700 | Use &U = *getAsUsePtr(); | ||||||||
701 | return cast<CallBase>(U.getUser())->getArgOperandNo(&U); | ||||||||
702 | } | ||||||||
703 | default: | ||||||||
704 | return -1; | ||||||||
705 | } | ||||||||
706 | } | ||||||||
707 | |||||||||
708 | /// IRPosition for the use \p U. The position kind \p PK needs to be | ||||||||
709 | /// IRP_CALL_SITE_ARGUMENT, the anchor value is the user, the associated value | ||||||||
710 | /// the used value. | ||||||||
711 | explicit IRPosition(Use &U, Kind PK) { | ||||||||
712 | assert(PK == IRP_CALL_SITE_ARGUMENT &&(static_cast <bool> (PK == IRP_CALL_SITE_ARGUMENT && "Use constructor is for call site arguments only!") ? void ( 0) : __assert_fail ("PK == IRP_CALL_SITE_ARGUMENT && \"Use constructor is for call site arguments only!\"" , "llvm/include/llvm/Transforms/IPO/Attributor.h", 713, __extension__ __PRETTY_FUNCTION__)) | ||||||||
713 | "Use constructor is for call site arguments only!")(static_cast <bool> (PK == IRP_CALL_SITE_ARGUMENT && "Use constructor is for call site arguments only!") ? void ( 0) : __assert_fail ("PK == IRP_CALL_SITE_ARGUMENT && \"Use constructor is for call site arguments only!\"" , "llvm/include/llvm/Transforms/IPO/Attributor.h", 713, __extension__ __PRETTY_FUNCTION__)); | ||||||||
714 | Enc = {&U, ENC_CALL_SITE_ARGUMENT_USE}; | ||||||||
715 | verify(); | ||||||||
716 | } | ||||||||
717 | |||||||||
718 | /// Verify internal invariants. | ||||||||
719 | void verify(); | ||||||||
720 | |||||||||
721 | /// Return the attributes of kind \p AK existing in the IR as attribute. | ||||||||
722 | bool getAttrsFromIRAttr(Attribute::AttrKind AK, | ||||||||
723 | SmallVectorImpl<Attribute> &Attrs) const; | ||||||||
724 | |||||||||
725 | /// Return the attributes of kind \p AK existing in the IR as operand bundles | ||||||||
726 | /// of an llvm.assume. | ||||||||
727 | bool getAttrsFromAssumes(Attribute::AttrKind AK, | ||||||||
728 | SmallVectorImpl<Attribute> &Attrs, | ||||||||
729 | Attributor &A) const; | ||||||||
730 | |||||||||
731 | /// Return the underlying pointer as Value *, valid for all positions but | ||||||||
732 | /// IRP_CALL_SITE_ARGUMENT. | ||||||||
733 | Value *getAsValuePtr() const { | ||||||||
734 | assert(getEncodingBits() != ENC_CALL_SITE_ARGUMENT_USE &&(static_cast <bool> (getEncodingBits() != ENC_CALL_SITE_ARGUMENT_USE && "Not a value pointer!") ? void (0) : __assert_fail ("getEncodingBits() != ENC_CALL_SITE_ARGUMENT_USE && \"Not a value pointer!\"" , "llvm/include/llvm/Transforms/IPO/Attributor.h", 735, __extension__ __PRETTY_FUNCTION__)) | ||||||||
735 | "Not a value pointer!")(static_cast <bool> (getEncodingBits() != ENC_CALL_SITE_ARGUMENT_USE && "Not a value pointer!") ? void (0) : __assert_fail ("getEncodingBits() != ENC_CALL_SITE_ARGUMENT_USE && \"Not a value pointer!\"" , "llvm/include/llvm/Transforms/IPO/Attributor.h", 735, __extension__ __PRETTY_FUNCTION__)); | ||||||||
736 | return reinterpret_cast<Value *>(Enc.getPointer()); | ||||||||
737 | } | ||||||||
738 | |||||||||
739 | /// Return the underlying pointer as Use *, valid only for | ||||||||
740 | /// IRP_CALL_SITE_ARGUMENT positions. | ||||||||
741 | Use *getAsUsePtr() const { | ||||||||
742 | assert(getEncodingBits() == ENC_CALL_SITE_ARGUMENT_USE &&(static_cast <bool> (getEncodingBits() == ENC_CALL_SITE_ARGUMENT_USE && "Not a value pointer!") ? void (0) : __assert_fail ("getEncodingBits() == ENC_CALL_SITE_ARGUMENT_USE && \"Not a value pointer!\"" , "llvm/include/llvm/Transforms/IPO/Attributor.h", 743, __extension__ __PRETTY_FUNCTION__)) | ||||||||
743 | "Not a value pointer!")(static_cast <bool> (getEncodingBits() == ENC_CALL_SITE_ARGUMENT_USE && "Not a value pointer!") ? void (0) : __assert_fail ("getEncodingBits() == ENC_CALL_SITE_ARGUMENT_USE && \"Not a value pointer!\"" , "llvm/include/llvm/Transforms/IPO/Attributor.h", 743, __extension__ __PRETTY_FUNCTION__)); | ||||||||
744 | return reinterpret_cast<Use *>(Enc.getPointer()); | ||||||||
745 | } | ||||||||
746 | |||||||||
747 | /// Return true if \p EncodingBits describe a returned or call site returned | ||||||||
748 | /// position. | ||||||||
749 | static bool isReturnPosition(char EncodingBits) { | ||||||||
750 | return EncodingBits == ENC_RETURNED_VALUE; | ||||||||
751 | } | ||||||||
752 | |||||||||
753 | /// Return true if the encoding bits describe a returned or call site returned | ||||||||
754 | /// position. | ||||||||
755 | bool isReturnPosition() const { return isReturnPosition(getEncodingBits()); } | ||||||||
756 | |||||||||
757 | /// The encoding of the IRPosition is a combination of a pointer and two | ||||||||
758 | /// encoding bits. The values of the encoding bits are defined in the enum | ||||||||
759 | /// below. The pointer is either a Value* (for the first three encoding bit | ||||||||
760 | /// combinations) or Use* (for ENC_CALL_SITE_ARGUMENT_USE). | ||||||||
761 | /// | ||||||||
762 | ///{ | ||||||||
763 | enum { | ||||||||
764 | ENC_VALUE = 0b00, | ||||||||
765 | ENC_RETURNED_VALUE = 0b01, | ||||||||
766 | ENC_FLOATING_FUNCTION = 0b10, | ||||||||
767 | ENC_CALL_SITE_ARGUMENT_USE = 0b11, | ||||||||
768 | }; | ||||||||
769 | |||||||||
770 | // Reserve the maximal amount of bits so there is no need to mask out the | ||||||||
771 | // remaining ones. We will not encode anything else in the pointer anyway. | ||||||||
772 | static constexpr int NumEncodingBits = | ||||||||
773 | PointerLikeTypeTraits<void *>::NumLowBitsAvailable; | ||||||||
774 | static_assert(NumEncodingBits >= 2, "At least two bits are required!"); | ||||||||
775 | |||||||||
776 | /// The pointer with the encoding bits. | ||||||||
777 | PointerIntPair<void *, NumEncodingBits, char> Enc; | ||||||||
778 | ///} | ||||||||
779 | |||||||||
780 | /// Call base context. Used for callsite specific analysis. | ||||||||
781 | const CallBaseContext *CBContext = nullptr; | ||||||||
782 | |||||||||
783 | /// Return the encoding bits. | ||||||||
784 | char getEncodingBits() const { return Enc.getInt(); } | ||||||||
785 | }; | ||||||||
786 | |||||||||
787 | /// Helper that allows IRPosition as a key in a DenseMap. | ||||||||
788 | template <> struct DenseMapInfo<IRPosition> { | ||||||||
789 | static inline IRPosition getEmptyKey() { return IRPosition::EmptyKey; } | ||||||||
790 | static inline IRPosition getTombstoneKey() { | ||||||||
791 | return IRPosition::TombstoneKey; | ||||||||
792 | } | ||||||||
793 | static unsigned getHashValue(const IRPosition &IRP) { | ||||||||
794 | return (DenseMapInfo<void *>::getHashValue(IRP) << 4) ^ | ||||||||
795 | (DenseMapInfo<Value *>::getHashValue(IRP.getCallBaseContext())); | ||||||||
796 | } | ||||||||
797 | |||||||||
798 | static bool isEqual(const IRPosition &a, const IRPosition &b) { | ||||||||
799 | return a == b; | ||||||||
800 | } | ||||||||
801 | }; | ||||||||
802 | |||||||||
803 | /// A visitor class for IR positions. | ||||||||
804 | /// | ||||||||
805 | /// Given a position P, the SubsumingPositionIterator allows to visit "subsuming | ||||||||
806 | /// positions" wrt. attributes/information. Thus, if a piece of information | ||||||||
807 | /// holds for a subsuming position, it also holds for the position P. | ||||||||
808 | /// | ||||||||
809 | /// The subsuming positions always include the initial position and then, | ||||||||
810 | /// depending on the position kind, additionally the following ones: | ||||||||
811 | /// - for IRP_RETURNED: | ||||||||
812 | /// - the function (IRP_FUNCTION) | ||||||||
813 | /// - for IRP_ARGUMENT: | ||||||||
814 | /// - the function (IRP_FUNCTION) | ||||||||
815 | /// - for IRP_CALL_SITE: | ||||||||
816 | /// - the callee (IRP_FUNCTION), if known | ||||||||
817 | /// - for IRP_CALL_SITE_RETURNED: | ||||||||
818 | /// - the callee (IRP_RETURNED), if known | ||||||||
819 | /// - the call site (IRP_FUNCTION) | ||||||||
820 | /// - the callee (IRP_FUNCTION), if known | ||||||||
821 | /// - for IRP_CALL_SITE_ARGUMENT: | ||||||||
822 | /// - the argument of the callee (IRP_ARGUMENT), if known | ||||||||
823 | /// - the callee (IRP_FUNCTION), if known | ||||||||
824 | /// - the position the call site argument is associated with if it is not | ||||||||
825 | /// anchored to the call site, e.g., if it is an argument then the argument | ||||||||
826 | /// (IRP_ARGUMENT) | ||||||||
827 | class SubsumingPositionIterator { | ||||||||
828 | SmallVector<IRPosition, 4> IRPositions; | ||||||||
829 | using iterator = decltype(IRPositions)::iterator; | ||||||||
830 | |||||||||
831 | public: | ||||||||
832 | SubsumingPositionIterator(const IRPosition &IRP); | ||||||||
833 | iterator begin() { return IRPositions.begin(); } | ||||||||
834 | iterator end() { return IRPositions.end(); } | ||||||||
835 | }; | ||||||||
836 | |||||||||
837 | /// Wrapper for FunctoinAnalysisManager. | ||||||||
838 | struct AnalysisGetter { | ||||||||
839 | template <typename Analysis> | ||||||||
840 | typename Analysis::Result *getAnalysis(const Function &F) { | ||||||||
841 | if (!FAM || !F.getParent()) | ||||||||
842 | return nullptr; | ||||||||
843 | return &FAM->getResult<Analysis>(const_cast<Function &>(F)); | ||||||||
844 | } | ||||||||
845 | |||||||||
846 | AnalysisGetter(FunctionAnalysisManager &FAM) : FAM(&FAM) {} | ||||||||
847 | AnalysisGetter() {} | ||||||||
848 | |||||||||
849 | private: | ||||||||
850 | FunctionAnalysisManager *FAM = nullptr; | ||||||||
851 | }; | ||||||||
852 | |||||||||
853 | /// Data structure to hold cached (LLVM-IR) information. | ||||||||
854 | /// | ||||||||
855 | /// All attributes are given an InformationCache object at creation time to | ||||||||
856 | /// avoid inspection of the IR by all of them individually. This default | ||||||||
857 | /// InformationCache will hold information required by 'default' attributes, | ||||||||
858 | /// thus the ones deduced when Attributor::identifyDefaultAbstractAttributes(..) | ||||||||
859 | /// is called. | ||||||||
860 | /// | ||||||||
861 | /// If custom abstract attributes, registered manually through | ||||||||
862 | /// Attributor::registerAA(...), need more information, especially if it is not | ||||||||
863 | /// reusable, it is advised to inherit from the InformationCache and cast the | ||||||||
864 | /// instance down in the abstract attributes. | ||||||||
865 | struct InformationCache { | ||||||||
866 | InformationCache(const Module &M, AnalysisGetter &AG, | ||||||||
867 | BumpPtrAllocator &Allocator, SetVector<Function *> *CGSCC) | ||||||||
868 | : DL(M.getDataLayout()), Allocator(Allocator), | ||||||||
869 | Explorer( | ||||||||
870 | /* ExploreInterBlock */ true, /* ExploreCFGForward */ true, | ||||||||
871 | /* ExploreCFGBackward */ true, | ||||||||
872 | /* LIGetter */ | ||||||||
873 | [&](const Function &F) { return AG.getAnalysis<LoopAnalysis>(F); }, | ||||||||
874 | /* DTGetter */ | ||||||||
875 | [&](const Function &F) { | ||||||||
876 | return AG.getAnalysis<DominatorTreeAnalysis>(F); | ||||||||
877 | }, | ||||||||
878 | /* PDTGetter */ | ||||||||
879 | [&](const Function &F) { | ||||||||
880 | return AG.getAnalysis<PostDominatorTreeAnalysis>(F); | ||||||||
881 | }), | ||||||||
882 | AG(AG), CGSCC(CGSCC), TargetTriple(M.getTargetTriple()) { | ||||||||
883 | if (CGSCC) | ||||||||
884 | initializeModuleSlice(*CGSCC); | ||||||||
885 | } | ||||||||
886 | |||||||||
887 | ~InformationCache() { | ||||||||
888 | // The FunctionInfo objects are allocated via a BumpPtrAllocator, we call | ||||||||
889 | // the destructor manually. | ||||||||
890 | for (auto &It : FuncInfoMap) | ||||||||
891 | It.getSecond()->~FunctionInfo(); | ||||||||
892 | } | ||||||||
893 | |||||||||
894 | /// Apply \p CB to all uses of \p F. If \p LookThroughConstantExprUses is | ||||||||
895 | /// true, constant expression users are not given to \p CB but their uses are | ||||||||
896 | /// traversed transitively. | ||||||||
897 | template <typename CBTy> | ||||||||
898 | static void foreachUse(Function &F, CBTy CB, | ||||||||
899 | bool LookThroughConstantExprUses = true) { | ||||||||
900 | SmallVector<Use *, 8> Worklist(make_pointer_range(F.uses())); | ||||||||
901 | |||||||||
902 | for (unsigned Idx = 0; Idx < Worklist.size(); ++Idx) { | ||||||||
903 | Use &U = *Worklist[Idx]; | ||||||||
904 | |||||||||
905 | // Allow use in constant bitcasts and simply look through them. | ||||||||
906 | if (LookThroughConstantExprUses && isa<ConstantExpr>(U.getUser())) { | ||||||||
907 | for (Use &CEU : cast<ConstantExpr>(U.getUser())->uses()) | ||||||||
908 | Worklist.push_back(&CEU); | ||||||||
909 | continue; | ||||||||
910 | } | ||||||||
911 | |||||||||
912 | CB(U); | ||||||||
913 | } | ||||||||
914 | } | ||||||||
915 | |||||||||
916 | /// Initialize the ModuleSlice member based on \p SCC. ModuleSlices contains | ||||||||
917 | /// (a subset of) all functions that we can look at during this SCC traversal. | ||||||||
918 | /// This includes functions (transitively) called from the SCC and the | ||||||||
919 | /// (transitive) callers of SCC functions. We also can look at a function if | ||||||||
920 | /// there is a "reference edge", i.a., if the function somehow uses (!=calls) | ||||||||
921 | /// a function in the SCC or a caller of a function in the SCC. | ||||||||
922 | void initializeModuleSlice(SetVector<Function *> &SCC) { | ||||||||
923 | ModuleSlice.insert(SCC.begin(), SCC.end()); | ||||||||
924 | |||||||||
925 | SmallPtrSet<Function *, 16> Seen; | ||||||||
926 | SmallVector<Function *, 16> Worklist(SCC.begin(), SCC.end()); | ||||||||
927 | while (!Worklist.empty()) { | ||||||||
928 | Function *F = Worklist.pop_back_val(); | ||||||||
929 | ModuleSlice.insert(F); | ||||||||
930 | |||||||||
931 | for (Instruction &I : instructions(*F)) | ||||||||
932 | if (auto *CB = dyn_cast<CallBase>(&I)) | ||||||||
933 | if (Function *Callee = CB->getCalledFunction()) | ||||||||
934 | if (Seen.insert(Callee).second) | ||||||||
935 | Worklist.push_back(Callee); | ||||||||
936 | } | ||||||||
937 | |||||||||
938 | Seen.clear(); | ||||||||
939 | Worklist.append(SCC.begin(), SCC.end()); | ||||||||
940 | while (!Worklist.empty()) { | ||||||||
941 | Function *F = Worklist.pop_back_val(); | ||||||||
942 | ModuleSlice.insert(F); | ||||||||
943 | |||||||||
944 | // Traverse all transitive uses. | ||||||||
945 | foreachUse(*F, [&](Use &U) { | ||||||||
946 | if (auto *UsrI = dyn_cast<Instruction>(U.getUser())) | ||||||||
947 | if (Seen.insert(UsrI->getFunction()).second) | ||||||||
948 | Worklist.push_back(UsrI->getFunction()); | ||||||||
949 | }); | ||||||||
950 | } | ||||||||
951 | } | ||||||||
952 | |||||||||
953 | /// The slice of the module we are allowed to look at. | ||||||||
954 | SmallPtrSet<Function *, 8> ModuleSlice; | ||||||||
955 | |||||||||
956 | /// A vector type to hold instructions. | ||||||||
957 | using InstructionVectorTy = SmallVector<Instruction *, 8>; | ||||||||
958 | |||||||||
959 | /// A map type from opcodes to instructions with this opcode. | ||||||||
960 | using OpcodeInstMapTy = DenseMap<unsigned, InstructionVectorTy *>; | ||||||||
961 | |||||||||
962 | /// Return the map that relates "interesting" opcodes with all instructions | ||||||||
963 | /// with that opcode in \p F. | ||||||||
964 | OpcodeInstMapTy &getOpcodeInstMapForFunction(const Function &F) { | ||||||||
965 | return getFunctionInfo(F).OpcodeInstMap; | ||||||||
966 | } | ||||||||
967 | |||||||||
968 | /// Return the instructions in \p F that may read or write memory. | ||||||||
969 | InstructionVectorTy &getReadOrWriteInstsForFunction(const Function &F) { | ||||||||
970 | return getFunctionInfo(F).RWInsts; | ||||||||
971 | } | ||||||||
972 | |||||||||
973 | /// Return MustBeExecutedContextExplorer | ||||||||
974 | MustBeExecutedContextExplorer &getMustBeExecutedContextExplorer() { | ||||||||
975 | return Explorer; | ||||||||
976 | } | ||||||||
977 | |||||||||
978 | /// Return TargetLibraryInfo for function \p F. | ||||||||
979 | TargetLibraryInfo *getTargetLibraryInfoForFunction(const Function &F) { | ||||||||
980 | return AG.getAnalysis<TargetLibraryAnalysis>(F); | ||||||||
981 | } | ||||||||
982 | |||||||||
983 | /// Return AliasAnalysis Result for function \p F. | ||||||||
984 | AAResults *getAAResultsForFunction(const Function &F); | ||||||||
985 | |||||||||
986 | /// Return true if \p Arg is involved in a must-tail call, thus the argument | ||||||||
987 | /// of the caller or callee. | ||||||||
988 | bool isInvolvedInMustTailCall(const Argument &Arg) { | ||||||||
989 | FunctionInfo &FI = getFunctionInfo(*Arg.getParent()); | ||||||||
990 | return FI.CalledViaMustTail || FI.ContainsMustTailCall; | ||||||||
991 | } | ||||||||
992 | |||||||||
993 | /// Return the analysis result from a pass \p AP for function \p F. | ||||||||
994 | template <typename AP> | ||||||||
995 | typename AP::Result *getAnalysisResultForFunction(const Function &F) { | ||||||||
996 | return AG.getAnalysis<AP>(F); | ||||||||
997 | } | ||||||||
998 | |||||||||
999 | /// Return SCC size on call graph for function \p F or 0 if unknown. | ||||||||
1000 | unsigned getSccSize(const Function &F) { | ||||||||
1001 | if (CGSCC && CGSCC->count(const_cast<Function *>(&F))) | ||||||||
1002 | return CGSCC->size(); | ||||||||
1003 | return 0; | ||||||||
1004 | } | ||||||||
1005 | |||||||||
1006 | /// Return datalayout used in the module. | ||||||||
1007 | const DataLayout &getDL() { return DL; } | ||||||||
1008 | |||||||||
1009 | /// Return the map conaining all the knowledge we have from `llvm.assume`s. | ||||||||
1010 | const RetainedKnowledgeMap &getKnowledgeMap() const { return KnowledgeMap; } | ||||||||
1011 | |||||||||
1012 | /// Return if \p To is potentially reachable form \p From or not | ||||||||
1013 | /// If the same query was answered, return cached result | ||||||||
1014 | bool getPotentiallyReachable(const Instruction &From, const Instruction &To) { | ||||||||
1015 | auto KeyPair = std::make_pair(&From, &To); | ||||||||
1016 | auto Iter = PotentiallyReachableMap.find(KeyPair); | ||||||||
1017 | if (Iter != PotentiallyReachableMap.end()) | ||||||||
1018 | return Iter->second; | ||||||||
1019 | const Function &F = *From.getFunction(); | ||||||||
1020 | bool Result = true; | ||||||||
1021 | if (From.getFunction() == To.getFunction()) | ||||||||
1022 | Result = isPotentiallyReachable(&From, &To, nullptr, | ||||||||
1023 | AG.getAnalysis<DominatorTreeAnalysis>(F), | ||||||||
1024 | AG.getAnalysis<LoopAnalysis>(F)); | ||||||||
1025 | PotentiallyReachableMap.insert(std::make_pair(KeyPair, Result)); | ||||||||
1026 | return Result; | ||||||||
1027 | } | ||||||||
1028 | |||||||||
1029 | /// Check whether \p F is part of module slice. | ||||||||
1030 | bool isInModuleSlice(const Function &F) { | ||||||||
1031 | return ModuleSlice.count(const_cast<Function *>(&F)); | ||||||||
1032 | } | ||||||||
1033 | |||||||||
1034 | /// Return true if the stack (llvm::Alloca) can be accessed by other threads. | ||||||||
1035 | bool stackIsAccessibleByOtherThreads() { return !targetIsGPU(); } | ||||||||
1036 | |||||||||
1037 | /// Return true if the target is a GPU. | ||||||||
1038 | bool targetIsGPU() { | ||||||||
1039 | return TargetTriple.isAMDGPU() || TargetTriple.isNVPTX(); | ||||||||
1040 | } | ||||||||
1041 | |||||||||
1042 | private: | ||||||||
1043 | struct FunctionInfo { | ||||||||
1044 | ~FunctionInfo(); | ||||||||
1045 | |||||||||
1046 | /// A nested map that remembers all instructions in a function with a | ||||||||
1047 | /// certain instruction opcode (Instruction::getOpcode()). | ||||||||
1048 | OpcodeInstMapTy OpcodeInstMap; | ||||||||
1049 | |||||||||
1050 | /// A map from functions to their instructions that may read or write | ||||||||
1051 | /// memory. | ||||||||
1052 | InstructionVectorTy RWInsts; | ||||||||
1053 | |||||||||
1054 | /// Function is called by a `musttail` call. | ||||||||
1055 | bool CalledViaMustTail; | ||||||||
1056 | |||||||||
1057 | /// Function contains a `musttail` call. | ||||||||
1058 | bool ContainsMustTailCall; | ||||||||
1059 | }; | ||||||||
1060 | |||||||||
1061 | /// A map type from functions to informatio about it. | ||||||||
1062 | DenseMap<const Function *, FunctionInfo *> FuncInfoMap; | ||||||||
1063 | |||||||||
1064 | /// Return information about the function \p F, potentially by creating it. | ||||||||
1065 | FunctionInfo &getFunctionInfo(const Function &F) { | ||||||||
1066 | FunctionInfo *&FI = FuncInfoMap[&F]; | ||||||||
1067 | if (!FI) { | ||||||||
1068 | FI = new (Allocator) FunctionInfo(); | ||||||||
1069 | initializeInformationCache(F, *FI); | ||||||||
1070 | } | ||||||||
1071 | return *FI; | ||||||||
1072 | } | ||||||||
1073 | |||||||||
1074 | /// Initialize the function information cache \p FI for the function \p F. | ||||||||
1075 | /// | ||||||||
1076 | /// This method needs to be called for all function that might be looked at | ||||||||
1077 | /// through the information cache interface *prior* to looking at them. | ||||||||
1078 | void initializeInformationCache(const Function &F, FunctionInfo &FI); | ||||||||
1079 | |||||||||
1080 | /// The datalayout used in the module. | ||||||||
1081 | const DataLayout &DL; | ||||||||
1082 | |||||||||
1083 | /// The allocator used to allocate memory, e.g. for `FunctionInfo`s. | ||||||||
1084 | BumpPtrAllocator &Allocator; | ||||||||
1085 | |||||||||
1086 | /// MustBeExecutedContextExplorer | ||||||||
1087 | MustBeExecutedContextExplorer Explorer; | ||||||||
1088 | |||||||||
1089 | /// A map with knowledge retained in `llvm.assume` instructions. | ||||||||
1090 | RetainedKnowledgeMap KnowledgeMap; | ||||||||
1091 | |||||||||
1092 | /// Getters for analysis. | ||||||||
1093 | AnalysisGetter &AG; | ||||||||
1094 | |||||||||
1095 | /// The underlying CGSCC, or null if not available. | ||||||||
1096 | SetVector<Function *> *CGSCC; | ||||||||
1097 | |||||||||
1098 | /// Set of inlineable functions | ||||||||
1099 | SmallPtrSet<const Function *, 8> InlineableFunctions; | ||||||||
1100 | |||||||||
1101 | /// A map for caching results of queries for isPotentiallyReachable | ||||||||
1102 | DenseMap<std::pair<const Instruction *, const Instruction *>, bool> | ||||||||
1103 | PotentiallyReachableMap; | ||||||||
1104 | |||||||||
1105 | /// The triple describing the target machine. | ||||||||
1106 | Triple TargetTriple; | ||||||||
1107 | |||||||||
1108 | /// Give the Attributor access to the members so | ||||||||
1109 | /// Attributor::identifyDefaultAbstractAttributes(...) can initialize them. | ||||||||
1110 | friend struct Attributor; | ||||||||
1111 | }; | ||||||||
1112 | |||||||||
1113 | /// The fixpoint analysis framework that orchestrates the attribute deduction. | ||||||||
1114 | /// | ||||||||
1115 | /// The Attributor provides a general abstract analysis framework (guided | ||||||||
1116 | /// fixpoint iteration) as well as helper functions for the deduction of | ||||||||
1117 | /// (LLVM-IR) attributes. However, also other code properties can be deduced, | ||||||||
1118 | /// propagated, and ultimately manifested through the Attributor framework. This | ||||||||
1119 | /// is particularly useful if these properties interact with attributes and a | ||||||||
1120 | /// co-scheduled deduction allows to improve the solution. Even if not, thus if | ||||||||
1121 | /// attributes/properties are completely isolated, they should use the | ||||||||
1122 | /// Attributor framework to reduce the number of fixpoint iteration frameworks | ||||||||
1123 | /// in the code base. Note that the Attributor design makes sure that isolated | ||||||||
1124 | /// attributes are not impacted, in any way, by others derived at the same time | ||||||||
1125 | /// if there is no cross-reasoning performed. | ||||||||
1126 | /// | ||||||||
1127 | /// The public facing interface of the Attributor is kept simple and basically | ||||||||
1128 | /// allows abstract attributes to one thing, query abstract attributes | ||||||||
1129 | /// in-flight. There are two reasons to do this: | ||||||||
1130 | /// a) The optimistic state of one abstract attribute can justify an | ||||||||
1131 | /// optimistic state of another, allowing to framework to end up with an | ||||||||
1132 | /// optimistic (=best possible) fixpoint instead of one based solely on | ||||||||
1133 | /// information in the IR. | ||||||||
1134 | /// b) This avoids reimplementing various kinds of lookups, e.g., to check | ||||||||
1135 | /// for existing IR attributes, in favor of a single lookups interface | ||||||||
1136 | /// provided by an abstract attribute subclass. | ||||||||
1137 | /// | ||||||||
1138 | /// NOTE: The mechanics of adding a new "concrete" abstract attribute are | ||||||||
1139 | /// described in the file comment. | ||||||||
1140 | struct Attributor { | ||||||||
1141 | |||||||||
1142 | using OptimizationRemarkGetter = | ||||||||
1143 | function_ref<OptimizationRemarkEmitter &(Function *)>; | ||||||||
1144 | |||||||||
1145 | /// Constructor | ||||||||
1146 | /// | ||||||||
1147 | /// \param Functions The set of functions we are deriving attributes for. | ||||||||
1148 | /// \param InfoCache Cache to hold various information accessible for | ||||||||
1149 | /// the abstract attributes. | ||||||||
1150 | /// \param CGUpdater Helper to update an underlying call graph. | ||||||||
1151 | /// \param Allowed If not null, a set limiting the attribute opportunities. | ||||||||
1152 | /// \param DeleteFns Whether to delete functions. | ||||||||
1153 | /// \param RewriteSignatures Whether to rewrite function signatures. | ||||||||
1154 | Attributor(SetVector<Function *> &Functions, InformationCache &InfoCache, | ||||||||
1155 | CallGraphUpdater &CGUpdater, | ||||||||
1156 | DenseSet<const char *> *Allowed = nullptr, bool DeleteFns = true, | ||||||||
1157 | bool RewriteSignatures = true) | ||||||||
1158 | : Allocator(InfoCache.Allocator), Functions(Functions), | ||||||||
1159 | InfoCache(InfoCache), CGUpdater(CGUpdater), Allowed(Allowed), | ||||||||
1160 | DeleteFns(DeleteFns), RewriteSignatures(RewriteSignatures), | ||||||||
1161 | MaxFixpointIterations(None), OREGetter(None), PassName("") {} | ||||||||
1162 | |||||||||
1163 | /// Constructor | ||||||||
1164 | /// | ||||||||
1165 | /// \param Functions The set of functions we are deriving attributes for. | ||||||||
1166 | /// \param InfoCache Cache to hold various information accessible for | ||||||||
1167 | /// the abstract attributes. | ||||||||
1168 | /// \param CGUpdater Helper to update an underlying call graph. | ||||||||
1169 | /// \param Allowed If not null, a set limiting the attribute opportunities. | ||||||||
1170 | /// \param DeleteFns Whether to delete functions | ||||||||
1171 | /// \param RewriteSignatures Whether to rewrite function signatures. | ||||||||
1172 | /// \param MaxFixpointIterations Maximum number of iterations to run until | ||||||||
1173 | /// fixpoint. | ||||||||
1174 | /// \param OREGetter A callback function that returns an ORE object from a | ||||||||
1175 | /// Function pointer. | ||||||||
1176 | /// \param PassName The name of the pass emitting remarks. | ||||||||
1177 | Attributor(SetVector<Function *> &Functions, InformationCache &InfoCache, | ||||||||
1178 | CallGraphUpdater &CGUpdater, DenseSet<const char *> *Allowed, | ||||||||
1179 | bool DeleteFns, bool RewriteSignatures, | ||||||||
1180 | Optional<unsigned> MaxFixpointIterations, | ||||||||
1181 | OptimizationRemarkGetter OREGetter, const char *PassName) | ||||||||
1182 | : Allocator(InfoCache.Allocator), Functions(Functions), | ||||||||
1183 | InfoCache(InfoCache), CGUpdater(CGUpdater), Allowed(Allowed), | ||||||||
1184 | DeleteFns(DeleteFns), RewriteSignatures(RewriteSignatures), | ||||||||
1185 | MaxFixpointIterations(MaxFixpointIterations), | ||||||||
1186 | OREGetter(Optional<OptimizationRemarkGetter>(OREGetter)), | ||||||||
1187 | PassName(PassName) {} | ||||||||
1188 | |||||||||
1189 | ~Attributor(); | ||||||||
1190 | |||||||||
1191 | /// Run the analyses until a fixpoint is reached or enforced (timeout). | ||||||||
1192 | /// | ||||||||
1193 | /// The attributes registered with this Attributor can be used after as long | ||||||||
1194 | /// as the Attributor is not destroyed (it owns the attributes now). | ||||||||
1195 | /// | ||||||||
1196 | /// \Returns CHANGED if the IR was changed, otherwise UNCHANGED. | ||||||||
1197 | ChangeStatus run(); | ||||||||
1198 | |||||||||
1199 | /// Lookup an abstract attribute of type \p AAType at position \p IRP. While | ||||||||
1200 | /// no abstract attribute is found equivalent positions are checked, see | ||||||||
1201 | /// SubsumingPositionIterator. Thus, the returned abstract attribute | ||||||||
1202 | /// might be anchored at a different position, e.g., the callee if \p IRP is a | ||||||||
1203 | /// call base. | ||||||||
1204 | /// | ||||||||
1205 | /// This method is the only (supported) way an abstract attribute can retrieve | ||||||||
1206 | /// information from another abstract attribute. As an example, take an | ||||||||
1207 | /// abstract attribute that determines the memory access behavior for a | ||||||||
1208 | /// argument (readnone, readonly, ...). It should use `getAAFor` to get the | ||||||||
1209 | /// most optimistic information for other abstract attributes in-flight, e.g. | ||||||||
1210 | /// the one reasoning about the "captured" state for the argument or the one | ||||||||
1211 | /// reasoning on the memory access behavior of the function as a whole. | ||||||||
1212 | /// | ||||||||
1213 | /// If the DepClass enum is set to `DepClassTy::None` the dependence from | ||||||||
1214 | /// \p QueryingAA to the return abstract attribute is not automatically | ||||||||
1215 | /// recorded. This should only be used if the caller will record the | ||||||||
1216 | /// dependence explicitly if necessary, thus if it the returned abstract | ||||||||
1217 | /// attribute is used for reasoning. To record the dependences explicitly use | ||||||||
1218 | /// the `Attributor::recordDependence` method. | ||||||||
1219 | template <typename AAType> | ||||||||
1220 | const AAType &getAAFor(const AbstractAttribute &QueryingAA, | ||||||||
1221 | const IRPosition &IRP, DepClassTy DepClass) { | ||||||||
1222 | return getOrCreateAAFor<AAType>(IRP, &QueryingAA, DepClass, | ||||||||
1223 | /* ForceUpdate */ false); | ||||||||
1224 | } | ||||||||
1225 | |||||||||
1226 | /// Similar to getAAFor but the return abstract attribute will be updated (via | ||||||||
1227 | /// `AbstractAttribute::update`) even if it is found in the cache. This is | ||||||||
1228 | /// especially useful for AAIsDead as changes in liveness can make updates | ||||||||
1229 | /// possible/useful that were not happening before as the abstract attribute | ||||||||
1230 | /// was assumed dead. | ||||||||
1231 | template <typename AAType> | ||||||||
1232 | const AAType &getAndUpdateAAFor(const AbstractAttribute &QueryingAA, | ||||||||
1233 | const IRPosition &IRP, DepClassTy DepClass) { | ||||||||
1234 | return getOrCreateAAFor<AAType>(IRP, &QueryingAA, DepClass, | ||||||||
1235 | /* ForceUpdate */ true); | ||||||||
1236 | } | ||||||||
1237 | |||||||||
1238 | /// The version of getAAFor that allows to omit a querying abstract | ||||||||
1239 | /// attribute. Using this after Attributor started running is restricted to | ||||||||
1240 | /// only the Attributor itself. Initial seeding of AAs can be done via this | ||||||||
1241 | /// function. | ||||||||
1242 | /// NOTE: ForceUpdate is ignored in any stage other than the update stage. | ||||||||
1243 | template <typename AAType> | ||||||||
1244 | const AAType &getOrCreateAAFor(IRPosition IRP, | ||||||||
1245 | const AbstractAttribute *QueryingAA, | ||||||||
1246 | DepClassTy DepClass, bool ForceUpdate = false, | ||||||||
1247 | bool UpdateAfterInit = true) { | ||||||||
1248 | if (!shouldPropagateCallBaseContext(IRP)) | ||||||||
1249 | IRP = IRP.stripCallBaseContext(); | ||||||||
1250 | |||||||||
1251 | if (AAType *AAPtr = lookupAAFor<AAType>(IRP, QueryingAA, DepClass, | ||||||||
1252 | /* AllowInvalidState */ true)) { | ||||||||
1253 | if (ForceUpdate && Phase == AttributorPhase::UPDATE) | ||||||||
1254 | updateAA(*AAPtr); | ||||||||
1255 | return *AAPtr; | ||||||||
1256 | } | ||||||||
1257 | |||||||||
1258 | // No matching attribute found, create one. | ||||||||
1259 | // Use the static create method. | ||||||||
1260 | auto &AA = AAType::createForPosition(IRP, *this); | ||||||||
1261 | |||||||||
1262 | // If we are currenty seeding attributes, enforce seeding rules. | ||||||||
1263 | if (Phase == AttributorPhase::SEEDING && !shouldSeedAttribute(AA)) { | ||||||||
1264 | AA.getState().indicatePessimisticFixpoint(); | ||||||||
1265 | return AA; | ||||||||
1266 | } | ||||||||
1267 | |||||||||
1268 | registerAA(AA); | ||||||||
1269 | |||||||||
1270 | // For now we ignore naked and optnone functions. | ||||||||
1271 | bool Invalidate = Allowed && !Allowed->count(&AAType::ID); | ||||||||
1272 | const Function *FnScope = IRP.getAnchorScope(); | ||||||||
1273 | if (FnScope) | ||||||||
1274 | Invalidate |= FnScope->hasFnAttribute(Attribute::Naked) || | ||||||||
1275 | FnScope->hasFnAttribute(Attribute::OptimizeNone); | ||||||||
1276 | |||||||||
1277 | // Avoid too many nested initializations to prevent a stack overflow. | ||||||||
1278 | Invalidate |= InitializationChainLength > MaxInitializationChainLength; | ||||||||
1279 | |||||||||
1280 | // Bootstrap the new attribute with an initial update to propagate | ||||||||
1281 | // information, e.g., function -> call site. If it is not on a given | ||||||||
1282 | // Allowed we will not perform updates at all. | ||||||||
1283 | if (Invalidate) { | ||||||||
1284 | AA.getState().indicatePessimisticFixpoint(); | ||||||||
1285 | return AA; | ||||||||
1286 | } | ||||||||
1287 | |||||||||
1288 | { | ||||||||
1289 | TimeTraceScope TimeScope(AA.getName() + "::initialize"); | ||||||||
1290 | ++InitializationChainLength; | ||||||||
1291 | AA.initialize(*this); | ||||||||
1292 | --InitializationChainLength; | ||||||||
1293 | } | ||||||||
1294 | |||||||||
1295 | // Initialize and update is allowed for code outside of the current function | ||||||||
1296 | // set, but only if it is part of module slice we are allowed to look at. | ||||||||
1297 | // Only exception is AAIsDeadFunction whose initialization is prevented | ||||||||
1298 | // directly, since we don't to compute it twice. | ||||||||
1299 | if (FnScope && !Functions.count(const_cast<Function *>(FnScope))) { | ||||||||
1300 | if (!getInfoCache().isInModuleSlice(*FnScope)) { | ||||||||
1301 | AA.getState().indicatePessimisticFixpoint(); | ||||||||
1302 | return AA; | ||||||||
1303 | } | ||||||||
1304 | } | ||||||||
1305 | |||||||||
1306 | // If this is queried in the manifest stage, we force the AA to indicate | ||||||||
1307 | // pessimistic fixpoint immediately. | ||||||||
1308 | if (Phase == AttributorPhase::MANIFEST) { | ||||||||
1309 | AA.getState().indicatePessimisticFixpoint(); | ||||||||
1310 | return AA; | ||||||||
1311 | } | ||||||||
1312 | |||||||||
1313 | // Allow seeded attributes to declare dependencies. | ||||||||
1314 | // Remember the seeding state. | ||||||||
1315 | if (UpdateAfterInit) { | ||||||||
1316 | AttributorPhase OldPhase = Phase; | ||||||||
1317 | Phase = AttributorPhase::UPDATE; | ||||||||
1318 | |||||||||
1319 | updateAA(AA); | ||||||||
1320 | |||||||||
1321 | Phase = OldPhase; | ||||||||
1322 | } | ||||||||
1323 | |||||||||
1324 | if (QueryingAA && AA.getState().isValidState()) | ||||||||
1325 | recordDependence(AA, const_cast<AbstractAttribute &>(*QueryingAA), | ||||||||
1326 | DepClass); | ||||||||
1327 | return AA; | ||||||||
1328 | } | ||||||||
1329 | template <typename AAType> | ||||||||
1330 | const AAType &getOrCreateAAFor(const IRPosition &IRP) { | ||||||||
1331 | return getOrCreateAAFor<AAType>(IRP, /* QueryingAA */ nullptr, | ||||||||
1332 | DepClassTy::NONE); | ||||||||
1333 | } | ||||||||
1334 | |||||||||
1335 | /// Return the attribute of \p AAType for \p IRP if existing and valid. This | ||||||||
1336 | /// also allows non-AA users lookup. | ||||||||
1337 | template <typename AAType> | ||||||||
1338 | AAType *lookupAAFor(const IRPosition &IRP, | ||||||||
1339 | const AbstractAttribute *QueryingAA = nullptr, | ||||||||
1340 | DepClassTy DepClass = DepClassTy::OPTIONAL, | ||||||||
1341 | bool AllowInvalidState = false) { | ||||||||
1342 | static_assert(std::is_base_of<AbstractAttribute, AAType>::value, | ||||||||
1343 | "Cannot query an attribute with a type not derived from " | ||||||||
1344 | "'AbstractAttribute'!"); | ||||||||
1345 | // Lookup the abstract attribute of type AAType. If found, return it after | ||||||||
1346 | // registering a dependence of QueryingAA on the one returned attribute. | ||||||||
1347 | AbstractAttribute *AAPtr = AAMap.lookup({&AAType::ID, IRP}); | ||||||||
1348 | if (!AAPtr) | ||||||||
1349 | return nullptr; | ||||||||
1350 | |||||||||
1351 | AAType *AA = static_cast<AAType *>(AAPtr); | ||||||||
1352 | |||||||||
1353 | // Do not register a dependence on an attribute with an invalid state. | ||||||||
1354 | if (DepClass
| ||||||||
1355 | AA->getState().isValidState()) | ||||||||
1356 | recordDependence(*AA, const_cast<AbstractAttribute &>(*QueryingAA), | ||||||||
1357 | DepClass); | ||||||||
1358 | |||||||||
1359 | // Return nullptr if this attribute has an invalid state. | ||||||||
1360 | if (!AllowInvalidState
| ||||||||
1361 | return nullptr; | ||||||||
1362 | return AA; | ||||||||
1363 | } | ||||||||
1364 | |||||||||
1365 | /// Explicitly record a dependence from \p FromAA to \p ToAA, that is if | ||||||||
1366 | /// \p FromAA changes \p ToAA should be updated as well. | ||||||||
1367 | /// | ||||||||
1368 | /// This method should be used in conjunction with the `getAAFor` method and | ||||||||
1369 | /// with the DepClass enum passed to the method set to None. This can | ||||||||
1370 | /// be beneficial to avoid false dependences but it requires the users of | ||||||||
1371 | /// `getAAFor` to explicitly record true dependences through this method. | ||||||||
1372 | /// The \p DepClass flag indicates if the dependence is striclty necessary. | ||||||||
1373 | /// That means for required dependences, if \p FromAA changes to an invalid | ||||||||
1374 | /// state, \p ToAA can be moved to a pessimistic fixpoint because it required | ||||||||
1375 | /// information from \p FromAA but none are available anymore. | ||||||||
1376 | void recordDependence(const AbstractAttribute &FromAA, | ||||||||
1377 | const AbstractAttribute &ToAA, DepClassTy DepClass); | ||||||||
1378 | |||||||||
1379 | /// Introduce a new abstract attribute into the fixpoint analysis. | ||||||||
1380 | /// | ||||||||
1381 | /// Note that ownership of the attribute is given to the Attributor. It will | ||||||||
1382 | /// invoke delete for the Attributor on destruction of the Attributor. | ||||||||
1383 | /// | ||||||||
1384 | /// Attributes are identified by their IR position (AAType::getIRPosition()) | ||||||||
1385 | /// and the address of their static member (see AAType::ID). | ||||||||
1386 | template <typename AAType> AAType ®isterAA(AAType &AA) { | ||||||||
1387 | static_assert(std::is_base_of<AbstractAttribute, AAType>::value, | ||||||||
1388 | "Cannot register an attribute with a type not derived from " | ||||||||
1389 | "'AbstractAttribute'!"); | ||||||||
1390 | // Put the attribute in the lookup map structure and the container we use to | ||||||||
1391 | // keep track of all attributes. | ||||||||
1392 | const IRPosition &IRP = AA.getIRPosition(); | ||||||||
1393 | AbstractAttribute *&AAPtr = AAMap[{&AAType::ID, IRP}]; | ||||||||
1394 | |||||||||
1395 | assert(!AAPtr && "Attribute already in map!")(static_cast <bool> (!AAPtr && "Attribute already in map!" ) ? void (0) : __assert_fail ("!AAPtr && \"Attribute already in map!\"" , "llvm/include/llvm/Transforms/IPO/Attributor.h", 1395, __extension__ __PRETTY_FUNCTION__)); | ||||||||
1396 | AAPtr = &AA; | ||||||||
1397 | |||||||||
1398 | // Register AA with the synthetic root only before the manifest stage. | ||||||||
1399 | if (Phase == AttributorPhase::SEEDING || Phase == AttributorPhase::UPDATE) | ||||||||
1400 | DG.SyntheticRoot.Deps.push_back( | ||||||||
1401 | AADepGraphNode::DepTy(&AA, unsigned(DepClassTy::REQUIRED))); | ||||||||
1402 | |||||||||
1403 | return AA; | ||||||||
1404 | } | ||||||||
1405 | |||||||||
1406 | /// Return the internal information cache. | ||||||||
1407 | InformationCache &getInfoCache() { return InfoCache; } | ||||||||
1408 | |||||||||
1409 | /// Return true if this is a module pass, false otherwise. | ||||||||
1410 | bool isModulePass() const { | ||||||||
1411 | return !Functions.empty() && | ||||||||
1412 | Functions.size() == Functions.front()->getParent()->size(); | ||||||||
1413 | } | ||||||||
1414 | |||||||||
1415 | /// Return true if we derive attributes for \p Fn | ||||||||
1416 | bool isRunOn(Function &Fn) const { | ||||||||
1417 | return Functions.empty() || Functions.count(&Fn); | ||||||||
1418 | } | ||||||||
1419 | |||||||||
1420 | /// Determine opportunities to derive 'default' attributes in \p F and create | ||||||||
1421 | /// abstract attribute objects for them. | ||||||||
1422 | /// | ||||||||
1423 | /// \param F The function that is checked for attribute opportunities. | ||||||||
1424 | /// | ||||||||
1425 | /// Note that abstract attribute instances are generally created even if the | ||||||||
1426 | /// IR already contains the information they would deduce. The most important | ||||||||
1427 | /// reason for this is the single interface, the one of the abstract attribute | ||||||||
1428 | /// instance, which can be queried without the need to look at the IR in | ||||||||
1429 | /// various places. | ||||||||
1430 | void identifyDefaultAbstractAttributes(Function &F); | ||||||||
1431 | |||||||||
1432 | /// Determine whether the function \p F is IPO amendable | ||||||||
1433 | /// | ||||||||
1434 | /// If a function is exactly defined or it has alwaysinline attribute | ||||||||
1435 | /// and is viable to be inlined, we say it is IPO amendable | ||||||||
1436 | bool isFunctionIPOAmendable(const Function &F) { | ||||||||
1437 | return F.hasExactDefinition() || InfoCache.InlineableFunctions.count(&F); | ||||||||
1438 | } | ||||||||
1439 | |||||||||
1440 | /// Mark the internal function \p F as live. | ||||||||
1441 | /// | ||||||||
1442 | /// This will trigger the identification and initialization of attributes for | ||||||||
1443 | /// \p F. | ||||||||
1444 | void markLiveInternalFunction(const Function &F) { | ||||||||
1445 | assert(F.hasLocalLinkage() &&(static_cast <bool> (F.hasLocalLinkage() && "Only local linkage is assumed dead initially." ) ? void (0) : __assert_fail ("F.hasLocalLinkage() && \"Only local linkage is assumed dead initially.\"" , "llvm/include/llvm/Transforms/IPO/Attributor.h", 1446, __extension__ __PRETTY_FUNCTION__)) | ||||||||
1446 | "Only local linkage is assumed dead initially.")(static_cast <bool> (F.hasLocalLinkage() && "Only local linkage is assumed dead initially." ) ? void (0) : __assert_fail ("F.hasLocalLinkage() && \"Only local linkage is assumed dead initially.\"" , "llvm/include/llvm/Transforms/IPO/Attributor.h", 1446, __extension__ __PRETTY_FUNCTION__)); | ||||||||
1447 | |||||||||
1448 | identifyDefaultAbstractAttributes(const_cast<Function &>(F)); | ||||||||
1449 | } | ||||||||
1450 | |||||||||
1451 | /// Helper function to remove callsite. | ||||||||
1452 | void removeCallSite(CallInst *CI) { | ||||||||
1453 | if (!CI) | ||||||||
1454 | return; | ||||||||
1455 | |||||||||
1456 | CGUpdater.removeCallSite(*CI); | ||||||||
1457 | } | ||||||||
1458 | |||||||||
1459 | /// Record that \p U is to be replaces with \p NV after information was | ||||||||
1460 | /// manifested. This also triggers deletion of trivially dead istructions. | ||||||||
1461 | bool changeUseAfterManifest(Use &U, Value &NV) { | ||||||||
1462 | Value *&V = ToBeChangedUses[&U]; | ||||||||
1463 | if (V && (V->stripPointerCasts() == NV.stripPointerCasts() || | ||||||||
1464 | isa_and_nonnull<UndefValue>(V))) | ||||||||
1465 | return false; | ||||||||
1466 | assert((!V || V == &NV || isa<UndefValue>(NV)) &&(static_cast <bool> ((!V || V == &NV || isa<UndefValue >(NV)) && "Use was registered twice for replacement with different values!" ) ? void (0) : __assert_fail ("(!V || V == &NV || isa<UndefValue>(NV)) && \"Use was registered twice for replacement with different values!\"" , "llvm/include/llvm/Transforms/IPO/Attributor.h", 1467, __extension__ __PRETTY_FUNCTION__)) | ||||||||
1467 | "Use was registered twice for replacement with different values!")(static_cast <bool> ((!V || V == &NV || isa<UndefValue >(NV)) && "Use was registered twice for replacement with different values!" ) ? void (0) : __assert_fail ("(!V || V == &NV || isa<UndefValue>(NV)) && \"Use was registered twice for replacement with different values!\"" , "llvm/include/llvm/Transforms/IPO/Attributor.h", 1467, __extension__ __PRETTY_FUNCTION__)); | ||||||||
1468 | V = &NV; | ||||||||
1469 | return true; | ||||||||
1470 | } | ||||||||
1471 | |||||||||
1472 | /// Helper function to replace all uses of \p V with \p NV. Return true if | ||||||||
1473 | /// there is any change. The flag \p ChangeDroppable indicates if dropppable | ||||||||
1474 | /// uses should be changed too. | ||||||||
1475 | bool changeValueAfterManifest(Value &V, Value &NV, | ||||||||
1476 | bool ChangeDroppable = true) { | ||||||||
1477 | auto &Entry = ToBeChangedValues[&V]; | ||||||||
1478 | Value *&CurNV = Entry.first; | ||||||||
1479 | if (CurNV && (CurNV->stripPointerCasts() == NV.stripPointerCasts() || | ||||||||
1480 | isa<UndefValue>(CurNV))) | ||||||||
1481 | return false; | ||||||||
1482 | assert((!CurNV || CurNV == &NV || isa<UndefValue>(NV)) &&(static_cast <bool> ((!CurNV || CurNV == &NV || isa <UndefValue>(NV)) && "Value replacement was registered twice with different values!" ) ? void (0) : __assert_fail ("(!CurNV || CurNV == &NV || isa<UndefValue>(NV)) && \"Value replacement was registered twice with different values!\"" , "llvm/include/llvm/Transforms/IPO/Attributor.h", 1483, __extension__ __PRETTY_FUNCTION__)) | ||||||||
1483 | "Value replacement was registered twice with different values!")(static_cast <bool> ((!CurNV || CurNV == &NV || isa <UndefValue>(NV)) && "Value replacement was registered twice with different values!" ) ? void (0) : __assert_fail ("(!CurNV || CurNV == &NV || isa<UndefValue>(NV)) && \"Value replacement was registered twice with different values!\"" , "llvm/include/llvm/Transforms/IPO/Attributor.h", 1483, __extension__ __PRETTY_FUNCTION__)); | ||||||||
1484 | CurNV = &NV; | ||||||||
1485 | Entry.second = ChangeDroppable; | ||||||||
1486 | return true; | ||||||||
1487 | } | ||||||||
1488 | |||||||||
1489 | /// Record that \p I is to be replaced with `unreachable` after information | ||||||||
1490 | /// was manifested. | ||||||||
1491 | void changeToUnreachableAfterManifest(Instruction *I) { | ||||||||
1492 | ToBeChangedToUnreachableInsts.insert(I); | ||||||||
1493 | } | ||||||||
1494 | |||||||||
1495 | /// Record that \p II has at least one dead successor block. This information | ||||||||
1496 | /// is used, e.g., to replace \p II with a call, after information was | ||||||||
1497 | /// manifested. | ||||||||
1498 | void registerInvokeWithDeadSuccessor(InvokeInst &II) { | ||||||||
1499 | InvokeWithDeadSuccessor.push_back(&II); | ||||||||
1500 | } | ||||||||
1501 | |||||||||
1502 | /// Record that \p I is deleted after information was manifested. This also | ||||||||
1503 | /// triggers deletion of trivially dead istructions. | ||||||||
1504 | void deleteAfterManifest(Instruction &I) { ToBeDeletedInsts.insert(&I); } | ||||||||
1505 | |||||||||
1506 | /// Record that \p BB is deleted after information was manifested. This also | ||||||||
1507 | /// triggers deletion of trivially dead istructions. | ||||||||
1508 | void deleteAfterManifest(BasicBlock &BB) { ToBeDeletedBlocks.insert(&BB); } | ||||||||
1509 | |||||||||
1510 | // Record that \p BB is added during the manifest of an AA. Added basic blocks | ||||||||
1511 | // are preserved in the IR. | ||||||||
1512 | void registerManifestAddedBasicBlock(BasicBlock &BB) { | ||||||||
1513 | ManifestAddedBlocks.insert(&BB); | ||||||||
1514 | } | ||||||||
1515 | |||||||||
1516 | /// Record that \p F is deleted after information was manifested. | ||||||||
1517 | void deleteAfterManifest(Function &F) { | ||||||||
1518 | if (DeleteFns) | ||||||||
1519 | ToBeDeletedFunctions.insert(&F); | ||||||||
1520 | } | ||||||||
1521 | |||||||||
1522 | /// If \p IRP is assumed to be a constant, return it, if it is unclear yet, | ||||||||
1523 | /// return None, otherwise return `nullptr`. | ||||||||
1524 | Optional<Constant *> getAssumedConstant(const IRPosition &IRP, | ||||||||
1525 | const AbstractAttribute &AA, | ||||||||
1526 | bool &UsedAssumedInformation); | ||||||||
1527 | Optional<Constant *> getAssumedConstant(const Value &V, | ||||||||
1528 | const AbstractAttribute &AA, | ||||||||
1529 | bool &UsedAssumedInformation) { | ||||||||
1530 | return getAssumedConstant(IRPosition::value(V), AA, UsedAssumedInformation); | ||||||||
1531 | } | ||||||||
1532 | |||||||||
1533 | /// If \p V is assumed simplified, return it, if it is unclear yet, | ||||||||
1534 | /// return None, otherwise return `nullptr`. | ||||||||
1535 | Optional<Value *> getAssumedSimplified(const IRPosition &IRP, | ||||||||
1536 | const AbstractAttribute &AA, | ||||||||
1537 | bool &UsedAssumedInformation) { | ||||||||
1538 | return getAssumedSimplified(IRP, &AA, UsedAssumedInformation); | ||||||||
1539 | } | ||||||||
1540 | Optional<Value *> getAssumedSimplified(const Value &V, | ||||||||
1541 | const AbstractAttribute &AA, | ||||||||
1542 | bool &UsedAssumedInformation) { | ||||||||
1543 | return getAssumedSimplified(IRPosition::value(V), AA, | ||||||||
1544 | UsedAssumedInformation); | ||||||||
1545 | } | ||||||||
1546 | |||||||||
1547 | /// If \p V is assumed simplified, return it, if it is unclear yet, | ||||||||
1548 | /// return None, otherwise return `nullptr`. Same as the public version | ||||||||
1549 | /// except that it can be used without recording dependences on any \p AA. | ||||||||
1550 | Optional<Value *> getAssumedSimplified(const IRPosition &V, | ||||||||
1551 | const AbstractAttribute *AA, | ||||||||
1552 | bool &UsedAssumedInformation); | ||||||||
1553 | |||||||||
1554 | /// Register \p CB as a simplification callback. | ||||||||
1555 | /// `Attributor::getAssumedSimplified` will use these callbacks before | ||||||||
1556 | /// we it will ask `AAValueSimplify`. It is important to ensure this | ||||||||
1557 | /// is called before `identifyDefaultAbstractAttributes`, assuming the | ||||||||
1558 | /// latter is called at all. | ||||||||
1559 | using SimplifictionCallbackTy = std::function<Optional<Value *>( | ||||||||
1560 | const IRPosition &, const AbstractAttribute *, bool &)>; | ||||||||
1561 | void registerSimplificationCallback(const IRPosition &IRP, | ||||||||
1562 | const SimplifictionCallbackTy &CB) { | ||||||||
1563 | SimplificationCallbacks[IRP].emplace_back(CB); | ||||||||
1564 | } | ||||||||
1565 | |||||||||
1566 | /// Return true if there is a simplification callback for \p IRP. | ||||||||
1567 | bool hasSimplificationCallback(const IRPosition &IRP) { | ||||||||
1568 | return SimplificationCallbacks.count(IRP); | ||||||||
1569 | } | ||||||||
1570 | |||||||||
1571 | private: | ||||||||
1572 | /// The vector with all simplification callbacks registered by outside AAs. | ||||||||
1573 | DenseMap<IRPosition, SmallVector<SimplifictionCallbackTy, 1>> | ||||||||
1574 | SimplificationCallbacks; | ||||||||
1575 | |||||||||
1576 | public: | ||||||||
1577 | /// Translate \p V from the callee context into the call site context. | ||||||||
1578 | Optional<Value *> | ||||||||
1579 | translateArgumentToCallSiteContent(Optional<Value *> V, CallBase &CB, | ||||||||
1580 | const AbstractAttribute &AA, | ||||||||
1581 | bool &UsedAssumedInformation); | ||||||||
1582 | |||||||||
1583 | /// Return true if \p AA (or its context instruction) is assumed dead. | ||||||||
1584 | /// | ||||||||
1585 | /// If \p LivenessAA is not provided it is queried. | ||||||||
1586 | bool isAssumedDead(const AbstractAttribute &AA, const AAIsDead *LivenessAA, | ||||||||
1587 | bool &UsedAssumedInformation, | ||||||||
1588 | bool CheckBBLivenessOnly = false, | ||||||||
1589 | DepClassTy DepClass = DepClassTy::OPTIONAL); | ||||||||
1590 | |||||||||
1591 | /// Return true if \p I is assumed dead. | ||||||||
1592 | /// | ||||||||
1593 | /// If \p LivenessAA is not provided it is queried. | ||||||||
1594 | bool isAssumedDead(const Instruction &I, const AbstractAttribute *QueryingAA, | ||||||||
1595 | const AAIsDead *LivenessAA, bool &UsedAssumedInformation, | ||||||||
1596 | bool CheckBBLivenessOnly = false, | ||||||||
1597 | DepClassTy DepClass = DepClassTy::OPTIONAL); | ||||||||
1598 | |||||||||
1599 | /// Return true if \p U is assumed dead. | ||||||||
1600 | /// | ||||||||
1601 | /// If \p FnLivenessAA is not provided it is queried. | ||||||||
1602 | bool isAssumedDead(const Use &U, const AbstractAttribute *QueryingAA, | ||||||||
1603 | const AAIsDead *FnLivenessAA, bool &UsedAssumedInformation, | ||||||||
1604 | bool CheckBBLivenessOnly = false, | ||||||||
1605 | DepClassTy DepClass = DepClassTy::OPTIONAL); | ||||||||
1606 | |||||||||
1607 | /// Return true if \p IRP is assumed dead. | ||||||||
1608 | /// | ||||||||
1609 | /// If \p FnLivenessAA is not provided it is queried. | ||||||||
1610 | bool isAssumedDead(const IRPosition &IRP, const AbstractAttribute *QueryingAA, | ||||||||
1611 | const AAIsDead *FnLivenessAA, bool &UsedAssumedInformation, | ||||||||
1612 | bool CheckBBLivenessOnly = false, | ||||||||
1613 | DepClassTy DepClass = DepClassTy::OPTIONAL); | ||||||||
1614 | |||||||||
1615 | /// Return true if \p BB is assumed dead. | ||||||||
1616 | /// | ||||||||
1617 | /// If \p LivenessAA is not provided it is queried. | ||||||||
1618 | bool isAssumedDead(const BasicBlock &BB, const AbstractAttribute *QueryingAA, | ||||||||
1619 | const AAIsDead *FnLivenessAA, | ||||||||
1620 | DepClassTy DepClass = DepClassTy::OPTIONAL); | ||||||||
1621 | |||||||||
1622 | /// Check \p Pred on all (transitive) uses of \p V. | ||||||||
1623 | /// | ||||||||
1624 | /// This method will evaluate \p Pred on all (transitive) uses of the | ||||||||
1625 | /// associated value and return true if \p Pred holds every time. | ||||||||
1626 | /// If uses are skipped in favor of equivalent ones, e.g., if we look through | ||||||||
1627 | /// memory, the \p EquivalentUseCB will be used to give the caller an idea | ||||||||
1628 | /// what original used was replaced by a new one (or new ones). The visit is | ||||||||
1629 | /// cut short if \p EquivalentUseCB returns false and the function will return | ||||||||
1630 | /// false as well. | ||||||||
1631 | bool checkForAllUses(function_ref<bool(const Use &, bool &)> Pred, | ||||||||
1632 | const AbstractAttribute &QueryingAA, const Value &V, | ||||||||
1633 | bool CheckBBLivenessOnly = false, | ||||||||
1634 | DepClassTy LivenessDepClass = DepClassTy::OPTIONAL, | ||||||||
1635 | function_ref<bool(const Use &OldU, const Use &NewU)> | ||||||||
1636 | EquivalentUseCB = nullptr); | ||||||||
1637 | |||||||||
1638 | /// Emit a remark generically. | ||||||||
1639 | /// | ||||||||
1640 | /// This template function can be used to generically emit a remark. The | ||||||||
1641 | /// RemarkKind should be one of the following: | ||||||||
1642 | /// - OptimizationRemark to indicate a successful optimization attempt | ||||||||
1643 | /// - OptimizationRemarkMissed to report a failed optimization attempt | ||||||||
1644 | /// - OptimizationRemarkAnalysis to provide additional information about an | ||||||||
1645 | /// optimization attempt | ||||||||
1646 | /// | ||||||||
1647 | /// The remark is built using a callback function \p RemarkCB that takes a | ||||||||
1648 | /// RemarkKind as input and returns a RemarkKind. | ||||||||
1649 | template <typename RemarkKind, typename RemarkCallBack> | ||||||||
1650 | void emitRemark(Instruction *I, StringRef RemarkName, | ||||||||
1651 | RemarkCallBack &&RemarkCB) const { | ||||||||
1652 | if (!OREGetter) | ||||||||
1653 | return; | ||||||||
1654 | |||||||||
1655 | Function *F = I->getFunction(); | ||||||||
1656 | auto &ORE = OREGetter.getValue()(F); | ||||||||
1657 | |||||||||
1658 | if (RemarkName.startswith("OMP")) | ||||||||
1659 | ORE.emit([&]() { | ||||||||
1660 | return RemarkCB(RemarkKind(PassName, RemarkName, I)) | ||||||||
1661 | << " [" << RemarkName << "]"; | ||||||||
1662 | }); | ||||||||
1663 | else | ||||||||
1664 | ORE.emit([&]() { return RemarkCB(RemarkKind(PassName, RemarkName, I)); }); | ||||||||
1665 | } | ||||||||
1666 | |||||||||
1667 | /// Emit a remark on a function. | ||||||||
1668 | template <typename RemarkKind, typename RemarkCallBack> | ||||||||
1669 | void emitRemark(Function *F, StringRef RemarkName, | ||||||||
1670 | RemarkCallBack &&RemarkCB) const { | ||||||||
1671 | if (!OREGetter) | ||||||||
1672 | return; | ||||||||
1673 | |||||||||
1674 | auto &ORE = OREGetter.getValue()(F); | ||||||||
1675 | |||||||||
1676 | if (RemarkName.startswith("OMP")) | ||||||||
1677 | ORE.emit([&]() { | ||||||||
1678 | return RemarkCB(RemarkKind(PassName, RemarkName, F)) | ||||||||
1679 | << " [" << RemarkName << "]"; | ||||||||
1680 | }); | ||||||||
1681 | else | ||||||||
1682 | ORE.emit([&]() { return RemarkCB(RemarkKind(PassName, RemarkName, F)); }); | ||||||||
1683 | } | ||||||||
1684 | |||||||||
1685 | /// Helper struct used in the communication between an abstract attribute (AA) | ||||||||
1686 | /// that wants to change the signature of a function and the Attributor which | ||||||||
1687 | /// applies the changes. The struct is partially initialized with the | ||||||||
1688 | /// information from the AA (see the constructor). All other members are | ||||||||
1689 | /// provided by the Attributor prior to invoking any callbacks. | ||||||||
1690 | struct ArgumentReplacementInfo { | ||||||||
1691 | /// Callee repair callback type | ||||||||
1692 | /// | ||||||||
1693 | /// The function repair callback is invoked once to rewire the replacement | ||||||||
1694 | /// arguments in the body of the new function. The argument replacement info | ||||||||
1695 | /// is passed, as build from the registerFunctionSignatureRewrite call, as | ||||||||
1696 | /// well as the replacement function and an iteratore to the first | ||||||||
1697 | /// replacement argument. | ||||||||
1698 | using CalleeRepairCBTy = std::function<void( | ||||||||
1699 | const ArgumentReplacementInfo &, Function &, Function::arg_iterator)>; | ||||||||
1700 | |||||||||
1701 | /// Abstract call site (ACS) repair callback type | ||||||||
1702 | /// | ||||||||
1703 | /// The abstract call site repair callback is invoked once on every abstract | ||||||||
1704 | /// call site of the replaced function (\see ReplacedFn). The callback needs | ||||||||
1705 | /// to provide the operands for the call to the new replacement function. | ||||||||
1706 | /// The number and type of the operands appended to the provided vector | ||||||||
1707 | /// (second argument) is defined by the number and types determined through | ||||||||
1708 | /// the replacement type vector (\see ReplacementTypes). The first argument | ||||||||
1709 | /// is the ArgumentReplacementInfo object registered with the Attributor | ||||||||
1710 | /// through the registerFunctionSignatureRewrite call. | ||||||||
1711 | using ACSRepairCBTy = | ||||||||
1712 | std::function<void(const ArgumentReplacementInfo &, AbstractCallSite, | ||||||||
1713 | SmallVectorImpl<Value *> &)>; | ||||||||
1714 | |||||||||
1715 | /// Simple getters, see the corresponding members for details. | ||||||||
1716 | ///{ | ||||||||
1717 | |||||||||
1718 | Attributor &getAttributor() const { return A; } | ||||||||
1719 | const Function &getReplacedFn() const { return ReplacedFn; } | ||||||||
1720 | const Argument &getReplacedArg() const { return ReplacedArg; } | ||||||||
1721 | unsigned getNumReplacementArgs() const { return ReplacementTypes.size(); } | ||||||||
1722 | const SmallVectorImpl<Type *> &getReplacementTypes() const { | ||||||||
1723 | return ReplacementTypes; | ||||||||
1724 | } | ||||||||
1725 | |||||||||
1726 | ///} | ||||||||
1727 | |||||||||
1728 | private: | ||||||||
1729 | /// Constructor that takes the argument to be replaced, the types of | ||||||||
1730 | /// the replacement arguments, as well as callbacks to repair the call sites | ||||||||
1731 | /// and new function after the replacement happened. | ||||||||
1732 | ArgumentReplacementInfo(Attributor &A, Argument &Arg, | ||||||||
1733 | ArrayRef<Type *> ReplacementTypes, | ||||||||
1734 | CalleeRepairCBTy &&CalleeRepairCB, | ||||||||
1735 | ACSRepairCBTy &&ACSRepairCB) | ||||||||
1736 | : A(A), ReplacedFn(*Arg.getParent()), ReplacedArg(Arg), | ||||||||
1737 | ReplacementTypes(ReplacementTypes.begin(), ReplacementTypes.end()), | ||||||||
1738 | CalleeRepairCB(std::move(CalleeRepairCB)), | ||||||||
1739 | ACSRepairCB(std::move(ACSRepairCB)) {} | ||||||||
1740 | |||||||||
1741 | /// Reference to the attributor to allow access from the callbacks. | ||||||||
1742 | Attributor &A; | ||||||||
1743 | |||||||||
1744 | /// The "old" function replaced by ReplacementFn. | ||||||||
1745 | const Function &ReplacedFn; | ||||||||
1746 | |||||||||
1747 | /// The "old" argument replaced by new ones defined via ReplacementTypes. | ||||||||
1748 | const Argument &ReplacedArg; | ||||||||
1749 | |||||||||
1750 | /// The types of the arguments replacing ReplacedArg. | ||||||||
1751 | const SmallVector<Type *, 8> ReplacementTypes; | ||||||||
1752 | |||||||||
1753 | /// Callee repair callback, see CalleeRepairCBTy. | ||||||||
1754 | const CalleeRepairCBTy CalleeRepairCB; | ||||||||
1755 | |||||||||
1756 | /// Abstract call site (ACS) repair callback, see ACSRepairCBTy. | ||||||||
1757 | const ACSRepairCBTy ACSRepairCB; | ||||||||
1758 | |||||||||
1759 | /// Allow access to the private members from the Attributor. | ||||||||
1760 | friend struct Attributor; | ||||||||
1761 | }; | ||||||||
1762 | |||||||||
1763 | /// Check if we can rewrite a function signature. | ||||||||
1764 | /// | ||||||||
1765 | /// The argument \p Arg is replaced with new ones defined by the number, | ||||||||
1766 | /// order, and types in \p ReplacementTypes. | ||||||||
1767 | /// | ||||||||
1768 | /// \returns True, if the replacement can be registered, via | ||||||||
1769 | /// registerFunctionSignatureRewrite, false otherwise. | ||||||||
1770 | bool isValidFunctionSignatureRewrite(Argument &Arg, | ||||||||
1771 | ArrayRef<Type *> ReplacementTypes); | ||||||||
1772 | |||||||||
1773 | /// Register a rewrite for a function signature. | ||||||||
1774 | /// | ||||||||
1775 | /// The argument \p Arg is replaced with new ones defined by the number, | ||||||||
1776 | /// order, and types in \p ReplacementTypes. The rewiring at the call sites is | ||||||||
1777 | /// done through \p ACSRepairCB and at the callee site through | ||||||||
1778 | /// \p CalleeRepairCB. | ||||||||
1779 | /// | ||||||||
1780 | /// \returns True, if the replacement was registered, false otherwise. | ||||||||
1781 | bool registerFunctionSignatureRewrite( | ||||||||
1782 | Argument &Arg, ArrayRef<Type *> ReplacementTypes, | ||||||||
1783 | ArgumentReplacementInfo::CalleeRepairCBTy &&CalleeRepairCB, | ||||||||
1784 | ArgumentReplacementInfo::ACSRepairCBTy &&ACSRepairCB); | ||||||||
1785 | |||||||||
1786 | /// Check \p Pred on all function call sites. | ||||||||
1787 | /// | ||||||||
1788 | /// This method will evaluate \p Pred on call sites and return | ||||||||
1789 | /// true if \p Pred holds in every call sites. However, this is only possible | ||||||||
1790 | /// all call sites are known, hence the function has internal linkage. | ||||||||
1791 | /// If true is returned, \p AllCallSitesKnown is set if all possible call | ||||||||
1792 | /// sites of the function have been visited. | ||||||||
1793 | bool checkForAllCallSites(function_ref<bool(AbstractCallSite)> Pred, | ||||||||
1794 | const AbstractAttribute &QueryingAA, | ||||||||
1795 | bool RequireAllCallSites, bool &AllCallSitesKnown); | ||||||||
1796 | |||||||||
1797 | /// Check \p Pred on all values potentially returned by \p F. | ||||||||
1798 | /// | ||||||||
1799 | /// This method will evaluate \p Pred on all values potentially returned by | ||||||||
1800 | /// the function associated with \p QueryingAA. The returned values are | ||||||||
1801 | /// matched with their respective return instructions. Returns true if \p Pred | ||||||||
1802 | /// holds on all of them. | ||||||||
1803 | bool checkForAllReturnedValuesAndReturnInsts( | ||||||||
1804 | function_ref<bool(Value &, const SmallSetVector<ReturnInst *, 4> &)> Pred, | ||||||||
1805 | const AbstractAttribute &QueryingAA); | ||||||||
1806 | |||||||||
1807 | /// Check \p Pred on all values potentially returned by the function | ||||||||
1808 | /// associated with \p QueryingAA. | ||||||||
1809 | /// | ||||||||
1810 | /// This is the context insensitive version of the method above. | ||||||||
1811 | bool checkForAllReturnedValues(function_ref<bool(Value &)> Pred, | ||||||||
1812 | const AbstractAttribute &QueryingAA); | ||||||||
1813 | |||||||||
1814 | /// Check \p Pred on all instructions with an opcode present in \p Opcodes. | ||||||||
1815 | /// | ||||||||
1816 | /// This method will evaluate \p Pred on all instructions with an opcode | ||||||||
1817 | /// present in \p Opcode and return true if \p Pred holds on all of them. | ||||||||
1818 | bool checkForAllInstructions(function_ref<bool(Instruction &)> Pred, | ||||||||
1819 | const AbstractAttribute &QueryingAA, | ||||||||
1820 | const ArrayRef<unsigned> &Opcodes, | ||||||||
1821 | bool &UsedAssumedInformation, | ||||||||
1822 | bool CheckBBLivenessOnly = false, | ||||||||
1823 | bool CheckPotentiallyDead = false); | ||||||||
1824 | |||||||||
1825 | /// Check \p Pred on all call-like instructions (=CallBased derived). | ||||||||
1826 | /// | ||||||||
1827 | /// See checkForAllCallLikeInstructions(...) for more information. | ||||||||
1828 | bool checkForAllCallLikeInstructions(function_ref<bool(Instruction &)> Pred, | ||||||||
1829 | const AbstractAttribute &QueryingAA, | ||||||||
1830 | bool &UsedAssumedInformation, | ||||||||
1831 | bool CheckBBLivenessOnly = false, | ||||||||
1832 | bool CheckPotentiallyDead = false) { | ||||||||
1833 | return checkForAllInstructions( | ||||||||
1834 | Pred, QueryingAA, | ||||||||
1835 | {(unsigned)Instruction::Invoke, (unsigned)Instruction::CallBr, | ||||||||
1836 | (unsigned)Instruction::Call}, | ||||||||
1837 | UsedAssumedInformation, CheckBBLivenessOnly, CheckPotentiallyDead); | ||||||||
1838 | } | ||||||||
1839 | |||||||||
1840 | /// Check \p Pred on all Read/Write instructions. | ||||||||
1841 | /// | ||||||||
1842 | /// This method will evaluate \p Pred on all instructions that read or write | ||||||||
1843 | /// to memory present in the information cache and return true if \p Pred | ||||||||
1844 | /// holds on all of them. | ||||||||
1845 | bool checkForAllReadWriteInstructions(function_ref<bool(Instruction &)> Pred, | ||||||||
1846 | AbstractAttribute &QueryingAA, | ||||||||
1847 | bool &UsedAssumedInformation); | ||||||||
1848 | |||||||||
1849 | /// Create a shallow wrapper for \p F such that \p F has internal linkage | ||||||||
1850 | /// afterwards. It also sets the original \p F 's name to anonymous | ||||||||
1851 | /// | ||||||||
1852 | /// A wrapper is a function with the same type (and attributes) as \p F | ||||||||
1853 | /// that will only call \p F and return the result, if any. | ||||||||
1854 | /// | ||||||||
1855 | /// Assuming the declaration of looks like: | ||||||||
1856 | /// rty F(aty0 arg0, ..., atyN argN); | ||||||||
1857 | /// | ||||||||
1858 | /// The wrapper will then look as follows: | ||||||||
1859 | /// rty wrapper(aty0 arg0, ..., atyN argN) { | ||||||||
1860 | /// return F(arg0, ..., argN); | ||||||||
1861 | /// } | ||||||||
1862 | /// | ||||||||
1863 | static void createShallowWrapper(Function &F); | ||||||||
1864 | |||||||||
1865 | /// Returns true if the function \p F can be internalized. i.e. it has a | ||||||||
1866 | /// compatible linkage. | ||||||||
1867 | static bool isInternalizable(Function &F); | ||||||||
1868 | |||||||||
1869 | /// Make another copy of the function \p F such that the copied version has | ||||||||
1870 | /// internal linkage afterwards and can be analysed. Then we replace all uses | ||||||||
1871 | /// of the original function to the copied one | ||||||||
1872 | /// | ||||||||
1873 | /// Only non-locally linked functions that have `linkonce_odr` or `weak_odr` | ||||||||
1874 | /// linkage can be internalized because these linkages guarantee that other | ||||||||
1875 | /// definitions with the same name have the same semantics as this one. | ||||||||
1876 | /// | ||||||||
1877 | /// This will only be run if the `attributor-allow-deep-wrappers` option is | ||||||||
1878 | /// set, or if the function is called with \p Force set to true. | ||||||||
1879 | /// | ||||||||
1880 | /// If the function \p F failed to be internalized the return value will be a | ||||||||
1881 | /// null pointer. | ||||||||
1882 | static Function *internalizeFunction(Function &F, bool Force = false); | ||||||||
1883 | |||||||||
1884 | /// Make copies of each function in the set \p FnSet such that the copied | ||||||||
1885 | /// version has internal linkage afterwards and can be analysed. Then we | ||||||||
1886 | /// replace all uses of the original function to the copied one. The map | ||||||||
1887 | /// \p FnMap contains a mapping of functions to their internalized versions. | ||||||||
1888 | /// | ||||||||
1889 | /// Only non-locally linked functions that have `linkonce_odr` or `weak_odr` | ||||||||
1890 | /// linkage can be internalized because these linkages guarantee that other | ||||||||
1891 | /// definitions with the same name have the same semantics as this one. | ||||||||
1892 | /// | ||||||||
1893 | /// This version will internalize all the functions in the set \p FnSet at | ||||||||
1894 | /// once and then replace the uses. This prevents internalized functions being | ||||||||
1895 | /// called by external functions when there is an internalized version in the | ||||||||
1896 | /// module. | ||||||||
1897 | static bool internalizeFunctions(SmallPtrSetImpl<Function *> &FnSet, | ||||||||
1898 | DenseMap<Function *, Function *> &FnMap); | ||||||||
1899 | |||||||||
1900 | /// Return the data layout associated with the anchor scope. | ||||||||
1901 | const DataLayout &getDataLayout() const { return InfoCache.DL; } | ||||||||
1902 | |||||||||
1903 | /// The allocator used to allocate memory, e.g. for `AbstractAttribute`s. | ||||||||
1904 | BumpPtrAllocator &Allocator; | ||||||||
1905 | |||||||||
1906 | private: | ||||||||
1907 | /// This method will do fixpoint iteration until fixpoint or the | ||||||||
1908 | /// maximum iteration count is reached. | ||||||||
1909 | /// | ||||||||
1910 | /// If the maximum iteration count is reached, This method will | ||||||||
1911 | /// indicate pessimistic fixpoint on attributes that transitively depend | ||||||||
1912 | /// on attributes that were scheduled for an update. | ||||||||
1913 | void runTillFixpoint(); | ||||||||
1914 | |||||||||
1915 | /// Gets called after scheduling, manifests attributes to the LLVM IR. | ||||||||
1916 | ChangeStatus manifestAttributes(); | ||||||||
1917 | |||||||||
1918 | /// Gets called after attributes have been manifested, cleans up the IR. | ||||||||
1919 | /// Deletes dead functions, blocks and instructions. | ||||||||
1920 | /// Rewrites function signitures and updates the call graph. | ||||||||
1921 | ChangeStatus cleanupIR(); | ||||||||
1922 | |||||||||
1923 | /// Identify internal functions that are effectively dead, thus not reachable | ||||||||
1924 | /// from a live entry point. The functions are added to ToBeDeletedFunctions. | ||||||||
1925 | void identifyDeadInternalFunctions(); | ||||||||
1926 | |||||||||
1927 | /// Run `::update` on \p AA and track the dependences queried while doing so. | ||||||||
1928 | /// Also adjust the state if we know further updates are not necessary. | ||||||||
1929 | ChangeStatus updateAA(AbstractAttribute &AA); | ||||||||
1930 | |||||||||
1931 | /// Remember the dependences on the top of the dependence stack such that they | ||||||||
1932 | /// may trigger further updates. (\see DependenceStack) | ||||||||
1933 | void rememberDependences(); | ||||||||
1934 | |||||||||
1935 | /// Check \p Pred on all call sites of \p Fn. | ||||||||
1936 | /// | ||||||||
1937 | /// This method will evaluate \p Pred on call sites and return | ||||||||
1938 | /// true if \p Pred holds in every call sites. However, this is only possible | ||||||||
1939 | /// all call sites are known, hence the function has internal linkage. | ||||||||
1940 | /// If true is returned, \p AllCallSitesKnown is set if all possible call | ||||||||
1941 | /// sites of the function have been visited. | ||||||||
1942 | bool checkForAllCallSites(function_ref<bool(AbstractCallSite)> Pred, | ||||||||
1943 | const Function &Fn, bool RequireAllCallSites, | ||||||||
1944 | const AbstractAttribute *QueryingAA, | ||||||||
1945 | bool &AllCallSitesKnown); | ||||||||
1946 | |||||||||
1947 | /// Determine if CallBase context in \p IRP should be propagated. | ||||||||
1948 | bool shouldPropagateCallBaseContext(const IRPosition &IRP); | ||||||||
1949 | |||||||||
1950 | /// Apply all requested function signature rewrites | ||||||||
1951 | /// (\see registerFunctionSignatureRewrite) and return Changed if the module | ||||||||
1952 | /// was altered. | ||||||||
1953 | ChangeStatus | ||||||||
1954 | rewriteFunctionSignatures(SmallPtrSetImpl<Function *> &ModifiedFns); | ||||||||
1955 | |||||||||
1956 | /// Check if the Attribute \p AA should be seeded. | ||||||||
1957 | /// See getOrCreateAAFor. | ||||||||
1958 | bool shouldSeedAttribute(AbstractAttribute &AA); | ||||||||
1959 | |||||||||
1960 | /// A nested map to lookup abstract attributes based on the argument position | ||||||||
1961 | /// on the outer level, and the addresses of the static member (AAType::ID) on | ||||||||
1962 | /// the inner level. | ||||||||
1963 | ///{ | ||||||||
1964 | using AAMapKeyTy = std::pair<const char *, IRPosition>; | ||||||||
1965 | DenseMap<AAMapKeyTy, AbstractAttribute *> AAMap; | ||||||||
1966 | ///} | ||||||||
1967 | |||||||||
1968 | /// Map to remember all requested signature changes (= argument replacements). | ||||||||
1969 | DenseMap<Function *, SmallVector<std::unique_ptr<ArgumentReplacementInfo>, 8>> | ||||||||
1970 | ArgumentReplacementMap; | ||||||||
1971 | |||||||||
1972 | /// The set of functions we are deriving attributes for. | ||||||||
1973 | SetVector<Function *> &Functions; | ||||||||
1974 | |||||||||
1975 | /// The information cache that holds pre-processed (LLVM-IR) information. | ||||||||
1976 | InformationCache &InfoCache; | ||||||||
1977 | |||||||||
1978 | /// Helper to update an underlying call graph. | ||||||||
1979 | CallGraphUpdater &CGUpdater; | ||||||||
1980 | |||||||||
1981 | /// Abstract Attribute dependency graph | ||||||||
1982 | AADepGraph DG; | ||||||||
1983 | |||||||||
1984 | /// Set of functions for which we modified the content such that it might | ||||||||
1985 | /// impact the call graph. | ||||||||
1986 | SmallPtrSet<Function *, 8> CGModifiedFunctions; | ||||||||
1987 | |||||||||
1988 | /// Information about a dependence. If FromAA is changed ToAA needs to be | ||||||||
1989 | /// updated as well. | ||||||||
1990 | struct DepInfo { | ||||||||
1991 | const AbstractAttribute *FromAA; | ||||||||
1992 | const AbstractAttribute *ToAA; | ||||||||
1993 | DepClassTy DepClass; | ||||||||
1994 | }; | ||||||||
1995 | |||||||||
1996 | /// The dependence stack is used to track dependences during an | ||||||||
1997 | /// `AbstractAttribute::update` call. As `AbstractAttribute::update` can be | ||||||||
1998 | /// recursive we might have multiple vectors of dependences in here. The stack | ||||||||
1999 | /// size, should be adjusted according to the expected recursion depth and the | ||||||||
2000 | /// inner dependence vector size to the expected number of dependences per | ||||||||
2001 | /// abstract attribute. Since the inner vectors are actually allocated on the | ||||||||
2002 | /// stack we can be generous with their size. | ||||||||
2003 | using DependenceVector = SmallVector<DepInfo, 8>; | ||||||||
2004 | SmallVector<DependenceVector *, 16> DependenceStack; | ||||||||
2005 | |||||||||
2006 | /// If not null, a set limiting the attribute opportunities. | ||||||||
2007 | const DenseSet<const char *> *Allowed; | ||||||||
2008 | |||||||||
2009 | /// Whether to delete functions. | ||||||||
2010 | const bool DeleteFns; | ||||||||
2011 | |||||||||
2012 | /// Whether to rewrite signatures. | ||||||||
2013 | const bool RewriteSignatures; | ||||||||
2014 | |||||||||
2015 | /// Maximum number of fixedpoint iterations. | ||||||||
2016 | Optional<unsigned> MaxFixpointIterations; | ||||||||
2017 | |||||||||
2018 | /// A set to remember the functions we already assume to be live and visited. | ||||||||
2019 | DenseSet<const Function *> VisitedFunctions; | ||||||||
2020 | |||||||||
2021 | /// Uses we replace with a new value after manifest is done. We will remove | ||||||||
2022 | /// then trivially dead instructions as well. | ||||||||
2023 | DenseMap<Use *, Value *> ToBeChangedUses; | ||||||||
2024 | |||||||||
2025 | /// Values we replace with a new value after manifest is done. We will remove | ||||||||
2026 | /// then trivially dead instructions as well. | ||||||||
2027 | DenseMap<Value *, std::pair<Value *, bool>> ToBeChangedValues; | ||||||||
2028 | |||||||||
2029 | /// Instructions we replace with `unreachable` insts after manifest is done. | ||||||||
2030 | SmallDenseSet<WeakVH, 16> ToBeChangedToUnreachableInsts; | ||||||||
2031 | |||||||||
2032 | /// Invoke instructions with at least a single dead successor block. | ||||||||
2033 | SmallVector<WeakVH, 16> InvokeWithDeadSuccessor; | ||||||||
2034 | |||||||||
2035 | /// A flag that indicates which stage of the process we are in. Initially, the | ||||||||
2036 | /// phase is SEEDING. Phase is changed in `Attributor::run()` | ||||||||
2037 | enum class AttributorPhase { | ||||||||
2038 | SEEDING, | ||||||||
2039 | UPDATE, | ||||||||
2040 | MANIFEST, | ||||||||
2041 | CLEANUP, | ||||||||
2042 | } Phase = AttributorPhase::SEEDING; | ||||||||
2043 | |||||||||
2044 | /// The current initialization chain length. Tracked to avoid stack overflows. | ||||||||
2045 | unsigned InitializationChainLength = 0; | ||||||||
2046 | |||||||||
2047 | /// Functions, blocks, and instructions we delete after manifest is done. | ||||||||
2048 | /// | ||||||||
2049 | ///{ | ||||||||
2050 | SmallPtrSet<Function *, 8> ToBeDeletedFunctions; | ||||||||
2051 | SmallPtrSet<BasicBlock *, 8> ToBeDeletedBlocks; | ||||||||
2052 | SmallPtrSet<BasicBlock *, 8> ManifestAddedBlocks; | ||||||||
2053 | SmallDenseSet<WeakVH, 8> ToBeDeletedInsts; | ||||||||
2054 | ///} | ||||||||
2055 | |||||||||
2056 | /// Callback to get an OptimizationRemarkEmitter from a Function *. | ||||||||
2057 | Optional<OptimizationRemarkGetter> OREGetter; | ||||||||
2058 | |||||||||
2059 | /// The name of the pass to emit remarks for. | ||||||||
2060 | const char *PassName = ""; | ||||||||
2061 | |||||||||
2062 | friend AADepGraph; | ||||||||
2063 | friend AttributorCallGraph; | ||||||||
2064 | }; | ||||||||
2065 | |||||||||
2066 | /// An interface to query the internal state of an abstract attribute. | ||||||||
2067 | /// | ||||||||
2068 | /// The abstract state is a minimal interface that allows the Attributor to | ||||||||
2069 | /// communicate with the abstract attributes about their internal state without | ||||||||
2070 | /// enforcing or exposing implementation details, e.g., the (existence of an) | ||||||||
2071 | /// underlying lattice. | ||||||||
2072 | /// | ||||||||
2073 | /// It is sufficient to be able to query if a state is (1) valid or invalid, (2) | ||||||||
2074 | /// at a fixpoint, and to indicate to the state that (3) an optimistic fixpoint | ||||||||
2075 | /// was reached or (4) a pessimistic fixpoint was enforced. | ||||||||
2076 | /// | ||||||||
2077 | /// All methods need to be implemented by the subclass. For the common use case, | ||||||||
2078 | /// a single boolean state or a bit-encoded state, the BooleanState and | ||||||||
2079 | /// {Inc,Dec,Bit}IntegerState classes are already provided. An abstract | ||||||||
2080 | /// attribute can inherit from them to get the abstract state interface and | ||||||||
2081 | /// additional methods to directly modify the state based if needed. See the | ||||||||
2082 | /// class comments for help. | ||||||||
2083 | struct AbstractState { | ||||||||
2084 | virtual ~AbstractState() {} | ||||||||
2085 | |||||||||
2086 | /// Return if this abstract state is in a valid state. If false, no | ||||||||
2087 | /// information provided should be used. | ||||||||
2088 | virtual bool isValidState() const = 0; | ||||||||
2089 | |||||||||
2090 | /// Return if this abstract state is fixed, thus does not need to be updated | ||||||||
2091 | /// if information changes as it cannot change itself. | ||||||||
2092 | virtual bool isAtFixpoint() const = 0; | ||||||||
2093 | |||||||||
2094 | /// Indicate that the abstract state should converge to the optimistic state. | ||||||||
2095 | /// | ||||||||
2096 | /// This will usually make the optimistically assumed state the known to be | ||||||||
2097 | /// true state. | ||||||||
2098 | /// | ||||||||
2099 | /// \returns ChangeStatus::UNCHANGED as the assumed value should not change. | ||||||||
2100 | virtual ChangeStatus indicateOptimisticFixpoint() = 0; | ||||||||
2101 | |||||||||
2102 | /// Indicate that the abstract state should converge to the pessimistic state. | ||||||||
2103 | /// | ||||||||
2104 | /// This will usually revert the optimistically assumed state to the known to | ||||||||
2105 | /// be true state. | ||||||||
2106 | /// | ||||||||
2107 | /// \returns ChangeStatus::CHANGED as the assumed value may change. | ||||||||
2108 | virtual ChangeStatus indicatePessimisticFixpoint() = 0; | ||||||||
2109 | }; | ||||||||
2110 | |||||||||
2111 | /// Simple state with integers encoding. | ||||||||
2112 | /// | ||||||||
2113 | /// The interface ensures that the assumed bits are always a subset of the known | ||||||||
2114 | /// bits. Users can only add known bits and, except through adding known bits, | ||||||||
2115 | /// they can only remove assumed bits. This should guarantee monotoniticy and | ||||||||
2116 | /// thereby the existence of a fixpoint (if used corretly). The fixpoint is | ||||||||
2117 | /// reached when the assumed and known state/bits are equal. Users can | ||||||||
2118 | /// force/inidicate a fixpoint. If an optimistic one is indicated, the known | ||||||||
2119 | /// state will catch up with the assumed one, for a pessimistic fixpoint it is | ||||||||
2120 | /// the other way around. | ||||||||
2121 | template <typename base_ty, base_ty BestState, base_ty WorstState> | ||||||||
2122 | struct IntegerStateBase : public AbstractState { | ||||||||
2123 | using base_t = base_ty; | ||||||||
2124 | |||||||||
2125 | IntegerStateBase() {} | ||||||||
2126 | IntegerStateBase(base_t Assumed) : Assumed(Assumed) {} | ||||||||
2127 | |||||||||
2128 | /// Return the best possible representable state. | ||||||||
2129 | static constexpr base_t getBestState() { return BestState; } | ||||||||
2130 | static constexpr base_t getBestState(const IntegerStateBase &) { | ||||||||
2131 | return getBestState(); | ||||||||
2132 | } | ||||||||
2133 | |||||||||
2134 | /// Return the worst possible representable state. | ||||||||
2135 | static constexpr base_t getWorstState() { return WorstState; } | ||||||||
2136 | static constexpr base_t getWorstState(const IntegerStateBase &) { | ||||||||
2137 | return getWorstState(); | ||||||||
2138 | } | ||||||||
2139 | |||||||||
2140 | /// See AbstractState::isValidState() | ||||||||
2141 | /// NOTE: For now we simply pretend that the worst possible state is invalid. | ||||||||
2142 | bool isValidState() const override { return Assumed != getWorstState(); } | ||||||||
2143 | |||||||||
2144 | /// See AbstractState::isAtFixpoint() | ||||||||
2145 | bool isAtFixpoint() const override { return Assumed == Known; } | ||||||||
2146 | |||||||||
2147 | /// See AbstractState::indicateOptimisticFixpoint(...) | ||||||||
2148 | ChangeStatus indicateOptimisticFixpoint() override { | ||||||||
2149 | Known = Assumed; | ||||||||
2150 | return ChangeStatus::UNCHANGED; | ||||||||
2151 | } | ||||||||
2152 | |||||||||
2153 | /// See AbstractState::indicatePessimisticFixpoint(...) | ||||||||
2154 | ChangeStatus indicatePessimisticFixpoint() override { | ||||||||
2155 | Assumed = Known; | ||||||||
2156 | return ChangeStatus::CHANGED; | ||||||||
2157 | } | ||||||||
2158 | |||||||||
2159 | /// Return the known state encoding | ||||||||
2160 | base_t getKnown() const { return Known; } | ||||||||
2161 | |||||||||
2162 | /// Return the assumed state encoding. | ||||||||
2163 | base_t getAssumed() const { return Assumed; } | ||||||||
2164 | |||||||||
2165 | /// Equality for IntegerStateBase. | ||||||||
2166 | bool | ||||||||
2167 | operator==(const IntegerStateBase<base_t, BestState, WorstState> &R) const { | ||||||||
2168 | return this->getAssumed() == R.getAssumed() && | ||||||||
2169 | this->getKnown() == R.getKnown(); | ||||||||
2170 | } | ||||||||
2171 | |||||||||
2172 | /// Inequality for IntegerStateBase. | ||||||||
2173 | bool | ||||||||
2174 | operator!=(const IntegerStateBase<base_t, BestState, WorstState> &R) const { | ||||||||
2175 | return !(*this == R); | ||||||||
2176 | } | ||||||||
2177 | |||||||||
2178 | /// "Clamp" this state with \p R. The result is subtype dependent but it is | ||||||||
2179 | /// intended that only information assumed in both states will be assumed in | ||||||||
2180 | /// this one afterwards. | ||||||||
2181 | void operator^=(const IntegerStateBase<base_t, BestState, WorstState> &R) { | ||||||||
2182 | handleNewAssumedValue(R.getAssumed()); | ||||||||
2183 | } | ||||||||
2184 | |||||||||
2185 | /// "Clamp" this state with \p R. The result is subtype dependent but it is | ||||||||
2186 | /// intended that information known in either state will be known in | ||||||||
2187 | /// this one afterwards. | ||||||||
2188 | void operator+=(const IntegerStateBase<base_t, BestState, WorstState> &R) { | ||||||||
2189 | handleNewKnownValue(R.getKnown()); | ||||||||
2190 | } | ||||||||
2191 | |||||||||
2192 | void operator|=(const IntegerStateBase<base_t, BestState, WorstState> &R) { | ||||||||
2193 | joinOR(R.getAssumed(), R.getKnown()); | ||||||||
2194 | } | ||||||||
2195 | |||||||||
2196 | void operator&=(const IntegerStateBase<base_t, BestState, WorstState> &R) { | ||||||||
2197 | joinAND(R.getAssumed(), R.getKnown()); | ||||||||
2198 | } | ||||||||
2199 | |||||||||
2200 | protected: | ||||||||
2201 | /// Handle a new assumed value \p Value. Subtype dependent. | ||||||||
2202 | virtual void handleNewAssumedValue(base_t Value) = 0; | ||||||||
2203 | |||||||||
2204 | /// Handle a new known value \p Value. Subtype dependent. | ||||||||
2205 | virtual void handleNewKnownValue(base_t Value) = 0; | ||||||||
2206 | |||||||||
2207 | /// Handle a value \p Value. Subtype dependent. | ||||||||
2208 | virtual void joinOR(base_t AssumedValue, base_t KnownValue) = 0; | ||||||||
2209 | |||||||||
2210 | /// Handle a new assumed value \p Value. Subtype dependent. | ||||||||
2211 | virtual void joinAND(base_t AssumedValue, base_t KnownValue) = 0; | ||||||||
2212 | |||||||||
2213 | /// The known state encoding in an integer of type base_t. | ||||||||
2214 | base_t Known = getWorstState(); | ||||||||
2215 | |||||||||
2216 | /// The assumed state encoding in an integer of type base_t. | ||||||||
2217 | base_t Assumed = getBestState(); | ||||||||
2218 | }; | ||||||||
2219 | |||||||||
2220 | /// Specialization of the integer state for a bit-wise encoding. | ||||||||
2221 | template <typename base_ty = uint32_t, base_ty BestState = ~base_ty(0), | ||||||||
2222 | base_ty WorstState = 0> | ||||||||
2223 | struct BitIntegerState | ||||||||
2224 | : public IntegerStateBase<base_ty, BestState, WorstState> { | ||||||||
2225 | using base_t = base_ty; | ||||||||
2226 | |||||||||
2227 | /// Return true if the bits set in \p BitsEncoding are "known bits". | ||||||||
2228 | bool isKnown(base_t BitsEncoding) const { | ||||||||
2229 | return (this->Known & BitsEncoding) == BitsEncoding; | ||||||||
2230 | } | ||||||||
2231 | |||||||||
2232 | /// Return true if the bits set in \p BitsEncoding are "assumed bits". | ||||||||
2233 | bool isAssumed(base_t BitsEncoding) const { | ||||||||
2234 | return (this->Assumed & BitsEncoding) == BitsEncoding; | ||||||||
2235 | } | ||||||||
2236 | |||||||||
2237 | /// Add the bits in \p BitsEncoding to the "known bits". | ||||||||
2238 | BitIntegerState &addKnownBits(base_t Bits) { | ||||||||
2239 | // Make sure we never miss any "known bits". | ||||||||
2240 | this->Assumed |= Bits; | ||||||||
2241 | this->Known |= Bits; | ||||||||
2242 | return *this; | ||||||||
2243 | } | ||||||||
2244 | |||||||||
2245 | /// Remove the bits in \p BitsEncoding from the "assumed bits" if not known. | ||||||||
2246 | BitIntegerState &removeAssumedBits(base_t BitsEncoding) { | ||||||||
2247 | return intersectAssumedBits(~BitsEncoding); | ||||||||
2248 | } | ||||||||
2249 | |||||||||
2250 | /// Remove the bits in \p BitsEncoding from the "known bits". | ||||||||
2251 | BitIntegerState &removeKnownBits(base_t BitsEncoding) { | ||||||||
2252 | this->Known = (this->Known & ~BitsEncoding); | ||||||||
2253 | return *this; | ||||||||
2254 | } | ||||||||
2255 | |||||||||
2256 | /// Keep only "assumed bits" also set in \p BitsEncoding but all known ones. | ||||||||
2257 | BitIntegerState &intersectAssumedBits(base_t BitsEncoding) { | ||||||||
2258 | // Make sure we never loose any "known bits". | ||||||||
2259 | this->Assumed = (this->Assumed & BitsEncoding) | this->Known; | ||||||||
2260 | return *this; | ||||||||
2261 | } | ||||||||
2262 | |||||||||
2263 | private: | ||||||||
2264 | void handleNewAssumedValue(base_t Value) override { | ||||||||
2265 | intersectAssumedBits(Value); | ||||||||
2266 | } | ||||||||
2267 | void handleNewKnownValue(base_t Value) override { addKnownBits(Value); } | ||||||||
2268 | void joinOR(base_t AssumedValue, base_t KnownValue) override { | ||||||||
2269 | this->Known |= KnownValue; | ||||||||
2270 | this->Assumed |= AssumedValue; | ||||||||
2271 | } | ||||||||
2272 | void joinAND(base_t AssumedValue, base_t KnownValue) override { | ||||||||
2273 | this->Known &= KnownValue; | ||||||||
2274 | this->Assumed &= AssumedValue; | ||||||||
2275 | } | ||||||||
2276 | }; | ||||||||
2277 | |||||||||
2278 | /// Specialization of the integer state for an increasing value, hence ~0u is | ||||||||
2279 | /// the best state and 0 the worst. | ||||||||
2280 | template <typename base_ty = uint32_t, base_ty BestState = ~base_ty(0), | ||||||||
2281 | base_ty WorstState = 0> | ||||||||
2282 | struct IncIntegerState | ||||||||
2283 | : public IntegerStateBase<base_ty, BestState, WorstState> { | ||||||||
2284 | using super = IntegerStateBase<base_ty, BestState, WorstState>; | ||||||||
2285 | using base_t = base_ty; | ||||||||
2286 | |||||||||
2287 | IncIntegerState() : super() {} | ||||||||
2288 | IncIntegerState(base_t Assumed) : super(Assumed) {} | ||||||||
2289 | |||||||||
2290 | /// Return the best possible representable state. | ||||||||
2291 | static constexpr base_t getBestState() { return BestState; } | ||||||||
2292 | static constexpr base_t | ||||||||
2293 | getBestState(const IncIntegerState<base_ty, BestState, WorstState> &) { | ||||||||
2294 | return getBestState(); | ||||||||
2295 | } | ||||||||
2296 | |||||||||
2297 | /// Take minimum of assumed and \p Value. | ||||||||
2298 | IncIntegerState &takeAssumedMinimum(base_t Value) { | ||||||||
2299 | // Make sure we never loose "known value". | ||||||||
2300 | this->Assumed = std::max(std::min(this->Assumed, Value), this->Known); | ||||||||
2301 | return *this; | ||||||||
2302 | } | ||||||||
2303 | |||||||||
2304 | /// Take maximum of known and \p Value. | ||||||||
2305 | IncIntegerState &takeKnownMaximum(base_t Value) { | ||||||||
2306 | // Make sure we never loose "known value". | ||||||||
2307 | this->Assumed = std::max(Value, this->Assumed); | ||||||||
2308 | this->Known = std::max(Value, this->Known); | ||||||||
2309 | return *this; | ||||||||
2310 | } | ||||||||
2311 | |||||||||
2312 | private: | ||||||||
2313 | void handleNewAssumedValue(base_t Value) override { | ||||||||
2314 | takeAssumedMinimum(Value); | ||||||||
2315 | } | ||||||||
2316 | void handleNewKnownValue(base_t Value) override { takeKnownMaximum(Value); } | ||||||||
2317 | void joinOR(base_t AssumedValue, base_t KnownValue) override { | ||||||||
2318 | this->Known = std::max(this->Known, KnownValue); | ||||||||
2319 | this->Assumed = std::max(this->Assumed, AssumedValue); | ||||||||
2320 | } | ||||||||
2321 | void joinAND(base_t AssumedValue, base_t KnownValue) override { | ||||||||
2322 | this->Known = std::min(this->Known, KnownValue); | ||||||||
2323 | this->Assumed = std::min(this->Assumed, AssumedValue); | ||||||||
2324 | } | ||||||||
2325 | }; | ||||||||
2326 | |||||||||
2327 | /// Specialization of the integer state for a decreasing value, hence 0 is the | ||||||||
2328 | /// best state and ~0u the worst. | ||||||||
2329 | template <typename base_ty = uint32_t> | ||||||||
2330 | struct DecIntegerState : public IntegerStateBase<base_ty, 0, ~base_ty(0)> { | ||||||||
2331 | using base_t = base_ty; | ||||||||
2332 | |||||||||
2333 | /// Take maximum of assumed and \p Value. | ||||||||
2334 | DecIntegerState &takeAssumedMaximum(base_t Value) { | ||||||||
2335 | // Make sure we never loose "known value". | ||||||||
2336 | this->Assumed = std::min(std::max(this->Assumed, Value), this->Known); | ||||||||
2337 | return *this; | ||||||||
2338 | } | ||||||||
2339 | |||||||||
2340 | /// Take minimum of known and \p Value. | ||||||||
2341 | DecIntegerState &takeKnownMinimum(base_t Value) { | ||||||||
2342 | // Make sure we never loose "known value". | ||||||||
2343 | this->Assumed = std::min(Value, this->Assumed); | ||||||||
2344 | this->Known = std::min(Value, this->Known); | ||||||||
2345 | return *this; | ||||||||
2346 | } | ||||||||
2347 | |||||||||
2348 | private: | ||||||||
2349 | void handleNewAssumedValue(base_t Value) override { | ||||||||
2350 | takeAssumedMaximum(Value); | ||||||||
2351 | } | ||||||||
2352 | void handleNewKnownValue(base_t Value) override { takeKnownMinimum(Value); } | ||||||||
2353 | void joinOR(base_t AssumedValue, base_t KnownValue) override { | ||||||||
2354 | this->Assumed = std::min(this->Assumed, KnownValue); | ||||||||
2355 | this->Assumed = std::min(this->Assumed, AssumedValue); | ||||||||
2356 | } | ||||||||
2357 | void joinAND(base_t AssumedValue, base_t KnownValue) override { | ||||||||
2358 | this->Assumed = std::max(this->Assumed, KnownValue); | ||||||||
2359 | this->Assumed = std::max(this->Assumed, AssumedValue); | ||||||||
2360 | } | ||||||||
2361 | }; | ||||||||
2362 | |||||||||
2363 | /// Simple wrapper for a single bit (boolean) state. | ||||||||
2364 | struct BooleanState : public IntegerStateBase<bool, true, false> { | ||||||||
2365 | using super = IntegerStateBase<bool, true, false>; | ||||||||
2366 | using base_t = IntegerStateBase::base_t; | ||||||||
2367 | |||||||||
2368 | BooleanState() : super() {} | ||||||||
2369 | BooleanState(base_t Assumed) : super(Assumed) {} | ||||||||
2370 | |||||||||
2371 | /// Set the assumed value to \p Value but never below the known one. | ||||||||
2372 | void setAssumed(bool Value) { Assumed &= (Known | Value); } | ||||||||
2373 | |||||||||
2374 | /// Set the known and asssumed value to \p Value. | ||||||||
2375 | void setKnown(bool Value) { | ||||||||
2376 | Known |= Value; | ||||||||
2377 | Assumed |= Value; | ||||||||
2378 | } | ||||||||
2379 | |||||||||
2380 | /// Return true if the state is assumed to hold. | ||||||||
2381 | bool isAssumed() const { return getAssumed(); } | ||||||||
2382 | |||||||||
2383 | /// Return true if the state is known to hold. | ||||||||
2384 | bool isKnown() const { return getKnown(); } | ||||||||
2385 | |||||||||
2386 | private: | ||||||||
2387 | void handleNewAssumedValue(base_t Value) override { | ||||||||
2388 | if (!Value) | ||||||||
2389 | Assumed = Known; | ||||||||
2390 | } | ||||||||
2391 | void handleNewKnownValue(base_t Value) override { | ||||||||
2392 | if (Value) | ||||||||
2393 | Known = (Assumed = Value); | ||||||||
2394 | } | ||||||||
2395 | void joinOR(base_t AssumedValue, base_t KnownValue) override { | ||||||||
2396 | Known |= KnownValue; | ||||||||
2397 | Assumed |= AssumedValue; | ||||||||
2398 | } | ||||||||
2399 | void joinAND(base_t AssumedValue, base_t KnownValue) override { | ||||||||
2400 | Known &= KnownValue; | ||||||||
2401 | Assumed &= AssumedValue; | ||||||||
2402 | } | ||||||||
2403 | }; | ||||||||
2404 | |||||||||
2405 | /// State for an integer range. | ||||||||
2406 | struct IntegerRangeState : public AbstractState { | ||||||||
2407 | |||||||||
2408 | /// Bitwidth of the associated value. | ||||||||
2409 | uint32_t BitWidth; | ||||||||
2410 | |||||||||
2411 | /// State representing assumed range, initially set to empty. | ||||||||
2412 | ConstantRange Assumed; | ||||||||
2413 | |||||||||
2414 | /// State representing known range, initially set to [-inf, inf]. | ||||||||
2415 | ConstantRange Known; | ||||||||
2416 | |||||||||
2417 | IntegerRangeState(uint32_t BitWidth) | ||||||||
2418 | : BitWidth(BitWidth), Assumed(ConstantRange::getEmpty(BitWidth)), | ||||||||
2419 | Known(ConstantRange::getFull(BitWidth)) {} | ||||||||
2420 | |||||||||
2421 | IntegerRangeState(const ConstantRange &CR) | ||||||||
2422 | : BitWidth(CR.getBitWidth()), Assumed(CR), | ||||||||
2423 | Known(getWorstState(CR.getBitWidth())) {} | ||||||||
2424 | |||||||||
2425 | /// Return the worst possible representable state. | ||||||||
2426 | static ConstantRange getWorstState(uint32_t BitWidth) { | ||||||||
2427 | return ConstantRange::getFull(BitWidth); | ||||||||
2428 | } | ||||||||
2429 | |||||||||
2430 | /// Return the best possible representable state. | ||||||||
2431 | static ConstantRange getBestState(uint32_t BitWidth) { | ||||||||
2432 | return ConstantRange::getEmpty(BitWidth); | ||||||||
2433 | } | ||||||||
2434 | static ConstantRange getBestState(const IntegerRangeState &IRS) { | ||||||||
2435 | return getBestState(IRS.getBitWidth()); | ||||||||
2436 | } | ||||||||
2437 | |||||||||
2438 | /// Return associated values' bit width. | ||||||||
2439 | uint32_t getBitWidth() const { return BitWidth; } | ||||||||
2440 | |||||||||
2441 | /// See AbstractState::isValidState() | ||||||||
2442 | bool isValidState() const override { | ||||||||
2443 | return BitWidth > 0 && !Assumed.isFullSet(); | ||||||||
2444 | } | ||||||||
2445 | |||||||||
2446 | /// See AbstractState::isAtFixpoint() | ||||||||
2447 | bool isAtFixpoint() const override { return Assumed == Known; } | ||||||||
2448 | |||||||||
2449 | /// See AbstractState::indicateOptimisticFixpoint(...) | ||||||||
2450 | ChangeStatus indicateOptimisticFixpoint() override { | ||||||||
2451 | Known = Assumed; | ||||||||
2452 | return ChangeStatus::CHANGED; | ||||||||
2453 | } | ||||||||
2454 | |||||||||
2455 | /// See AbstractState::indicatePessimisticFixpoint(...) | ||||||||
2456 | ChangeStatus indicatePessimisticFixpoint() override { | ||||||||
2457 | Assumed = Known; | ||||||||
2458 | return ChangeStatus::CHANGED; | ||||||||
2459 | } | ||||||||
2460 | |||||||||
2461 | /// Return the known state encoding | ||||||||
2462 | ConstantRange getKnown() const { return Known; } | ||||||||
2463 | |||||||||
2464 | /// Return the assumed state encoding. | ||||||||
2465 | ConstantRange getAssumed() const { return Assumed; } | ||||||||
2466 | |||||||||
2467 | /// Unite assumed range with the passed state. | ||||||||
2468 | void unionAssumed(const ConstantRange &R) { | ||||||||
2469 | // Don't loose a known range. | ||||||||
2470 | Assumed = Assumed.unionWith(R).intersectWith(Known); | ||||||||
2471 | } | ||||||||
2472 | |||||||||
2473 | /// See IntegerRangeState::unionAssumed(..). | ||||||||
2474 | void unionAssumed(const IntegerRangeState &R) { | ||||||||
2475 | unionAssumed(R.getAssumed()); | ||||||||
2476 | } | ||||||||
2477 | |||||||||
2478 | /// Unite known range with the passed state. | ||||||||
2479 | void unionKnown(const ConstantRange &R) { | ||||||||
2480 | // Don't loose a known range. | ||||||||
2481 | Known = Known.unionWith(R); | ||||||||
2482 | Assumed = Assumed.unionWith(Known); | ||||||||
2483 | } | ||||||||
2484 | |||||||||
2485 | /// See IntegerRangeState::unionKnown(..). | ||||||||
2486 | void unionKnown(const IntegerRangeState &R) { unionKnown(R.getKnown()); } | ||||||||
2487 | |||||||||
2488 | /// Intersect known range with the passed state. | ||||||||
2489 | void intersectKnown(const ConstantRange &R) { | ||||||||
2490 | Assumed = Assumed.intersectWith(R); | ||||||||
2491 | Known = Known.intersectWith(R); | ||||||||
2492 | } | ||||||||
2493 | |||||||||
2494 | /// See IntegerRangeState::intersectKnown(..). | ||||||||
2495 | void intersectKnown(const IntegerRangeState &R) { | ||||||||
2496 | intersectKnown(R.getKnown()); | ||||||||
2497 | } | ||||||||
2498 | |||||||||
2499 | /// Equality for IntegerRangeState. | ||||||||
2500 | bool operator==(const IntegerRangeState &R) const { | ||||||||
2501 | return getAssumed() == R.getAssumed() && getKnown() == R.getKnown(); | ||||||||
2502 | } | ||||||||
2503 | |||||||||
2504 | /// "Clamp" this state with \p R. The result is subtype dependent but it is | ||||||||
2505 | /// intended that only information assumed in both states will be assumed in | ||||||||
2506 | /// this one afterwards. | ||||||||
2507 | IntegerRangeState operator^=(const IntegerRangeState &R) { | ||||||||
2508 | // NOTE: `^=` operator seems like `intersect` but in this case, we need to | ||||||||
2509 | // take `union`. | ||||||||
2510 | unionAssumed(R); | ||||||||
2511 | return *this; | ||||||||
2512 | } | ||||||||
2513 | |||||||||
2514 | IntegerRangeState operator&=(const IntegerRangeState &R) { | ||||||||
2515 | // NOTE: `&=` operator seems like `intersect` but in this case, we need to | ||||||||
2516 | // take `union`. | ||||||||
2517 | unionKnown(R); | ||||||||
2518 | unionAssumed(R); | ||||||||
2519 | return *this; | ||||||||
2520 | } | ||||||||
2521 | }; | ||||||||
2522 | |||||||||
2523 | /// Simple state for a set. | ||||||||
2524 | /// | ||||||||
2525 | /// This represents a state containing a set of values. The interface supports | ||||||||
2526 | /// modelling sets that contain all possible elements. The state's internal | ||||||||
2527 | /// value is modified using union or intersection operations. | ||||||||
2528 | template <typename BaseTy> struct SetState : public AbstractState { | ||||||||
2529 | /// A wrapper around a set that has semantics for handling unions and | ||||||||
2530 | /// intersections with a "universal" set that contains all elements. | ||||||||
2531 | struct SetContents { | ||||||||
2532 | /// Creates a universal set with no concrete elements or an empty set. | ||||||||
2533 | SetContents(bool Universal) : Universal(Universal) {} | ||||||||
2534 | |||||||||
2535 | /// Creates a non-universal set with concrete values. | ||||||||
2536 | SetContents(const DenseSet<BaseTy> &Assumptions) | ||||||||
2537 | : Universal(false), Set(Assumptions) {} | ||||||||
2538 | |||||||||
2539 | SetContents(bool Universal, const DenseSet<BaseTy> &Assumptions) | ||||||||
2540 | : Universal(Universal), Set(Assumptions) {} | ||||||||
2541 | |||||||||
2542 | const DenseSet<BaseTy> &getSet() const { return Set; } | ||||||||
2543 | |||||||||
2544 | bool isUniversal() const { return Universal; } | ||||||||
2545 | |||||||||
2546 | bool empty() const { return Set.empty() && !Universal; } | ||||||||
2547 | |||||||||
2548 | /// Finds A := A ^ B where A or B could be the "Universal" set which | ||||||||
2549 | /// contains every possible attribute. Returns true if changes were made. | ||||||||
2550 | bool getIntersection(const SetContents &RHS) { | ||||||||
2551 | bool IsUniversal = Universal; | ||||||||
2552 | unsigned Size = Set.size(); | ||||||||
2553 | |||||||||
2554 | // A := A ^ U = A | ||||||||
2555 | if (RHS.isUniversal()) | ||||||||
2556 | return false; | ||||||||
2557 | |||||||||
2558 | // A := U ^ B = B | ||||||||
2559 | if (Universal) | ||||||||
2560 | Set = RHS.getSet(); | ||||||||
2561 | else | ||||||||
2562 | set_intersect(Set, RHS.getSet()); | ||||||||
2563 | |||||||||
2564 | Universal &= RHS.isUniversal(); | ||||||||
2565 | return IsUniversal != Universal || Size != Set.size(); | ||||||||
2566 | } | ||||||||
2567 | |||||||||
2568 | /// Finds A := A u B where A or B could be the "Universal" set which | ||||||||
2569 | /// contains every possible attribute. returns true if changes were made. | ||||||||
2570 | bool getUnion(const SetContents &RHS) { | ||||||||
2571 | bool IsUniversal = Universal; | ||||||||
2572 | unsigned Size = Set.size(); | ||||||||
2573 | |||||||||
2574 | // A := A u U = U = U u B | ||||||||
2575 | if (!RHS.isUniversal() && !Universal) | ||||||||
2576 | set_union(Set, RHS.getSet()); | ||||||||
2577 | |||||||||
2578 | Universal |= RHS.isUniversal(); | ||||||||
2579 | return IsUniversal != Universal || Size != Set.size(); | ||||||||
2580 | } | ||||||||
2581 | |||||||||
2582 | private: | ||||||||
2583 | /// Indicates if this set is "universal", containing every possible element. | ||||||||
2584 | bool Universal; | ||||||||
2585 | |||||||||
2586 | /// The set of currently active assumptions. | ||||||||
2587 | DenseSet<BaseTy> Set; | ||||||||
2588 | }; | ||||||||
2589 | |||||||||
2590 | SetState() : Known(false), Assumed(true), IsAtFixedpoint(false) {} | ||||||||
2591 | |||||||||
2592 | /// Initializes the known state with an initial set and initializes the | ||||||||
2593 | /// assumed state as universal. | ||||||||
2594 | SetState(const DenseSet<BaseTy> &Known) | ||||||||
2595 | : Known(Known), Assumed(true), IsAtFixedpoint(false) {} | ||||||||
2596 | |||||||||
2597 | /// See AbstractState::isValidState() | ||||||||
2598 | bool isValidState() const override { return !Assumed.empty(); } | ||||||||
2599 | |||||||||
2600 | /// See AbstractState::isAtFixpoint() | ||||||||
2601 | bool isAtFixpoint() const override { return IsAtFixedpoint; } | ||||||||
2602 | |||||||||
2603 | /// See AbstractState::indicateOptimisticFixpoint(...) | ||||||||
2604 | ChangeStatus indicateOptimisticFixpoint() override { | ||||||||
2605 | IsAtFixedpoint = true; | ||||||||
2606 | Known = Assumed; | ||||||||
2607 | return ChangeStatus::UNCHANGED; | ||||||||
2608 | } | ||||||||
2609 | |||||||||
2610 | /// See AbstractState::indicatePessimisticFixpoint(...) | ||||||||
2611 | ChangeStatus indicatePessimisticFixpoint() override { | ||||||||
2612 | IsAtFixedpoint = true; | ||||||||
2613 | Assumed = Known; | ||||||||
2614 | return ChangeStatus::CHANGED; | ||||||||
2615 | } | ||||||||
2616 | |||||||||
2617 | /// Return the known state encoding. | ||||||||
2618 | const SetContents &getKnown() const { return Known; } | ||||||||
2619 | |||||||||
2620 | /// Return the assumed state encoding. | ||||||||
2621 | const SetContents &getAssumed() const { return Assumed; } | ||||||||
2622 | |||||||||
2623 | /// Returns if the set state contains the element. | ||||||||
2624 | bool setContains(const BaseTy &Elem) const { | ||||||||
2625 | return Assumed.getSet().contains(Elem) || Known.getSet().contains(Elem); | ||||||||
2626 | } | ||||||||
2627 | |||||||||
2628 | /// Performs the set intersection between this set and \p RHS. Returns true if | ||||||||
2629 | /// changes were made. | ||||||||
2630 | bool getIntersection(const SetContents &RHS) { | ||||||||
2631 | unsigned SizeBefore = Assumed.getSet().size(); | ||||||||
2632 | |||||||||
2633 | // Get intersection and make sure that the known set is still a proper | ||||||||
2634 | // subset of the assumed set. A := K u (A ^ R). | ||||||||
2635 | Assumed.getIntersection(RHS); | ||||||||
2636 | Assumed.getUnion(Known); | ||||||||
2637 | |||||||||
2638 | return SizeBefore != Assumed.getSet().size(); | ||||||||
2639 | } | ||||||||
2640 | |||||||||
2641 | /// Performs the set union between this set and \p RHS. Returns true if | ||||||||
2642 | /// changes were made. | ||||||||
2643 | bool getUnion(const SetContents &RHS) { return Assumed.getUnion(RHS); } | ||||||||
2644 | |||||||||
2645 | private: | ||||||||
2646 | /// The set of values known for this state. | ||||||||
2647 | SetContents Known; | ||||||||
2648 | |||||||||
2649 | /// The set of assumed values for this state. | ||||||||
2650 | SetContents Assumed; | ||||||||
2651 | |||||||||
2652 | bool IsAtFixedpoint; | ||||||||
2653 | }; | ||||||||
2654 | |||||||||
2655 | /// Helper struct necessary as the modular build fails if the virtual method | ||||||||
2656 | /// IRAttribute::manifest is defined in the Attributor.cpp. | ||||||||
2657 | struct IRAttributeManifest { | ||||||||
2658 | static ChangeStatus manifestAttrs(Attributor &A, const IRPosition &IRP, | ||||||||
2659 | const ArrayRef<Attribute> &DeducedAttrs, | ||||||||
2660 | bool ForceReplace = false); | ||||||||
2661 | }; | ||||||||
2662 | |||||||||
2663 | /// Helper to tie a abstract state implementation to an abstract attribute. | ||||||||
2664 | template <typename StateTy, typename BaseType, class... Ts> | ||||||||
2665 | struct StateWrapper : public BaseType, public StateTy { | ||||||||
2666 | /// Provide static access to the type of the state. | ||||||||
2667 | using StateType = StateTy; | ||||||||
2668 | |||||||||
2669 | StateWrapper(const IRPosition &IRP, Ts... Args) | ||||||||
2670 | : BaseType(IRP), StateTy(Args...) {} | ||||||||
2671 | |||||||||
2672 | /// See AbstractAttribute::getState(...). | ||||||||
2673 | StateType &getState() override { return *this; } | ||||||||
2674 | |||||||||
2675 | /// See AbstractAttribute::getState(...). | ||||||||
2676 | const StateType &getState() const override { return *this; } | ||||||||
2677 | }; | ||||||||
2678 | |||||||||
2679 | /// Helper class that provides common functionality to manifest IR attributes. | ||||||||
2680 | template <Attribute::AttrKind AK, typename BaseType> | ||||||||
2681 | struct IRAttribute : public BaseType { | ||||||||
2682 | IRAttribute(const IRPosition &IRP) : BaseType(IRP) {} | ||||||||
2683 | |||||||||
2684 | /// See AbstractAttribute::initialize(...). | ||||||||
2685 | virtual void initialize(Attributor &A) override { | ||||||||
2686 | const IRPosition &IRP = this->getIRPosition(); | ||||||||
2687 | if (isa<UndefValue>(IRP.getAssociatedValue()) || | ||||||||
2688 | this->hasAttr(getAttrKind(), /* IgnoreSubsumingPositions */ false, | ||||||||
2689 | &A)) { | ||||||||
2690 | this->getState().indicateOptimisticFixpoint(); | ||||||||
2691 | return; | ||||||||
2692 | } | ||||||||
2693 | |||||||||
2694 | bool IsFnInterface = IRP.isFnInterfaceKind(); | ||||||||
2695 | const Function *FnScope = IRP.getAnchorScope(); | ||||||||
2696 | // TODO: Not all attributes require an exact definition. Find a way to | ||||||||
2697 | // enable deduction for some but not all attributes in case the | ||||||||
2698 | // definition might be changed at runtime, see also | ||||||||
2699 | // http://lists.llvm.org/pipermail/llvm-dev/2018-February/121275.html. | ||||||||
2700 | // TODO: We could always determine abstract attributes and if sufficient | ||||||||
2701 | // information was found we could duplicate the functions that do not | ||||||||
2702 | // have an exact definition. | ||||||||
2703 | if (IsFnInterface && (!FnScope || !A.isFunctionIPOAmendable(*FnScope))) | ||||||||
2704 | this->getState().indicatePessimisticFixpoint(); | ||||||||
2705 | } | ||||||||
2706 | |||||||||
2707 | /// See AbstractAttribute::manifest(...). | ||||||||
2708 | ChangeStatus manifest(Attributor &A) override { | ||||||||
2709 | if (isa<UndefValue>(this->getIRPosition().getAssociatedValue())) | ||||||||
2710 | return ChangeStatus::UNCHANGED; | ||||||||
2711 | SmallVector<Attribute, 4> DeducedAttrs; | ||||||||
2712 | getDeducedAttributes(this->getAnchorValue().getContext(), DeducedAttrs); | ||||||||
2713 | return IRAttributeManifest::manifestAttrs(A, this->getIRPosition(), | ||||||||
2714 | DeducedAttrs); | ||||||||
2715 | } | ||||||||
2716 | |||||||||
2717 | /// Return the kind that identifies the abstract attribute implementation. | ||||||||
2718 | Attribute::AttrKind getAttrKind() const { return AK; } | ||||||||
2719 | |||||||||
2720 | /// Return the deduced attributes in \p Attrs. | ||||||||
2721 | virtual void getDeducedAttributes(LLVMContext &Ctx, | ||||||||
2722 | SmallVectorImpl<Attribute> &Attrs) const { | ||||||||
2723 | Attrs.emplace_back(Attribute::get(Ctx, getAttrKind())); | ||||||||
2724 | } | ||||||||
2725 | }; | ||||||||
2726 | |||||||||
2727 | /// Base struct for all "concrete attribute" deductions. | ||||||||
2728 | /// | ||||||||
2729 | /// The abstract attribute is a minimal interface that allows the Attributor to | ||||||||
2730 | /// orchestrate the abstract/fixpoint analysis. The design allows to hide away | ||||||||
2731 | /// implementation choices made for the subclasses but also to structure their | ||||||||
2732 | /// implementation and simplify the use of other abstract attributes in-flight. | ||||||||
2733 | /// | ||||||||
2734 | /// To allow easy creation of new attributes, most methods have default | ||||||||
2735 | /// implementations. The ones that do not are generally straight forward, except | ||||||||
2736 | /// `AbstractAttribute::updateImpl` which is the location of most reasoning | ||||||||
2737 | /// associated with the abstract attribute. The update is invoked by the | ||||||||
2738 | /// Attributor in case the situation used to justify the current optimistic | ||||||||
2739 | /// state might have changed. The Attributor determines this automatically | ||||||||
2740 | /// by monitoring the `Attributor::getAAFor` calls made by abstract attributes. | ||||||||
2741 | /// | ||||||||
2742 | /// The `updateImpl` method should inspect the IR and other abstract attributes | ||||||||
2743 | /// in-flight to justify the best possible (=optimistic) state. The actual | ||||||||
2744 | /// implementation is, similar to the underlying abstract state encoding, not | ||||||||
2745 | /// exposed. In the most common case, the `updateImpl` will go through a list of | ||||||||
2746 | /// reasons why its optimistic state is valid given the current information. If | ||||||||
2747 | /// any combination of them holds and is sufficient to justify the current | ||||||||
2748 | /// optimistic state, the method shall return UNCHAGED. If not, the optimistic | ||||||||
2749 | /// state is adjusted to the situation and the method shall return CHANGED. | ||||||||
2750 | /// | ||||||||
2751 | /// If the manifestation of the "concrete attribute" deduced by the subclass | ||||||||
2752 | /// differs from the "default" behavior, which is a (set of) LLVM-IR | ||||||||
2753 | /// attribute(s) for an argument, call site argument, function return value, or | ||||||||
2754 | /// function, the `AbstractAttribute::manifest` method should be overloaded. | ||||||||
2755 | /// | ||||||||
2756 | /// NOTE: If the state obtained via getState() is INVALID, thus if | ||||||||
2757 | /// AbstractAttribute::getState().isValidState() returns false, no | ||||||||
2758 | /// information provided by the methods of this class should be used. | ||||||||
2759 | /// NOTE: The Attributor currently has certain limitations to what we can do. | ||||||||
2760 | /// As a general rule of thumb, "concrete" abstract attributes should *for | ||||||||
2761 | /// now* only perform "backward" information propagation. That means | ||||||||
2762 | /// optimistic information obtained through abstract attributes should | ||||||||
2763 | /// only be used at positions that precede the origin of the information | ||||||||
2764 | /// with regards to the program flow. More practically, information can | ||||||||
2765 | /// *now* be propagated from instructions to their enclosing function, but | ||||||||
2766 | /// *not* from call sites to the called function. The mechanisms to allow | ||||||||
2767 | /// both directions will be added in the future. | ||||||||
2768 | /// NOTE: The mechanics of adding a new "concrete" abstract attribute are | ||||||||
2769 | /// described in the file comment. | ||||||||
2770 | struct AbstractAttribute : public IRPosition, public AADepGraphNode { | ||||||||
2771 | using StateType = AbstractState; | ||||||||
2772 | |||||||||
2773 | AbstractAttribute(const IRPosition &IRP) : IRPosition(IRP) {} | ||||||||
2774 | |||||||||
2775 | /// Virtual destructor. | ||||||||
2776 | virtual ~AbstractAttribute() {} | ||||||||
2777 | |||||||||
2778 | /// This function is used to identify if an \p DGN is of type | ||||||||
2779 | /// AbstractAttribute so that the dyn_cast and cast can use such information | ||||||||
2780 | /// to cast an AADepGraphNode to an AbstractAttribute. | ||||||||
2781 | /// | ||||||||
2782 | /// We eagerly return true here because all AADepGraphNodes except for the | ||||||||
2783 | /// Synthethis Node are of type AbstractAttribute | ||||||||
2784 | static bool classof(const AADepGraphNode *DGN) { return true; } | ||||||||
2785 | |||||||||
2786 | /// Initialize the state with the information in the Attributor \p A. | ||||||||
2787 | /// | ||||||||
2788 | /// This function is called by the Attributor once all abstract attributes | ||||||||
2789 | /// have been identified. It can and shall be used for task like: | ||||||||
2790 | /// - identify existing knowledge in the IR and use it for the "known state" | ||||||||
2791 | /// - perform any work that is not going to change over time, e.g., determine | ||||||||
2792 | /// a subset of the IR, or attributes in-flight, that have to be looked at | ||||||||
2793 | /// in the `updateImpl` method. | ||||||||
2794 | virtual void initialize(Attributor &A) {} | ||||||||
2795 | |||||||||
2796 | /// Return the internal abstract state for inspection. | ||||||||
2797 | virtual StateType &getState() = 0; | ||||||||
2798 | virtual const StateType &getState() const = 0; | ||||||||
2799 | |||||||||
2800 | /// Return an IR position, see struct IRPosition. | ||||||||
2801 | const IRPosition &getIRPosition() const { return *this; }; | ||||||||
2802 | IRPosition &getIRPosition() { return *this; }; | ||||||||
2803 | |||||||||
2804 | /// Helper functions, for debug purposes only. | ||||||||
2805 | ///{ | ||||||||
2806 | void print(raw_ostream &OS) const override; | ||||||||
2807 | virtual void printWithDeps(raw_ostream &OS) const; | ||||||||
2808 | void dump() const { print(dbgs()); } | ||||||||
2809 | |||||||||
2810 | /// This function should return the "summarized" assumed state as string. | ||||||||
2811 | virtual const std::string getAsStr() const = 0; | ||||||||
2812 | |||||||||
2813 | /// This function should return the name of the AbstractAttribute | ||||||||
2814 | virtual const std::string getName() const = 0; | ||||||||
2815 | |||||||||
2816 | /// This function should return the address of the ID of the AbstractAttribute | ||||||||
2817 | virtual const char *getIdAddr() const = 0; | ||||||||
2818 | ///} | ||||||||
2819 | |||||||||
2820 | /// Allow the Attributor access to the protected methods. | ||||||||
2821 | friend struct Attributor; | ||||||||
2822 | |||||||||
2823 | protected: | ||||||||
2824 | /// Hook for the Attributor to trigger an update of the internal state. | ||||||||
2825 | /// | ||||||||
2826 | /// If this attribute is already fixed, this method will return UNCHANGED, | ||||||||
2827 | /// otherwise it delegates to `AbstractAttribute::updateImpl`. | ||||||||
2828 | /// | ||||||||
2829 | /// \Return CHANGED if the internal state changed, otherwise UNCHANGED. | ||||||||
2830 | ChangeStatus update(Attributor &A); | ||||||||
2831 | |||||||||
2832 | /// Hook for the Attributor to trigger the manifestation of the information | ||||||||
2833 | /// represented by the abstract attribute in the LLVM-IR. | ||||||||
2834 | /// | ||||||||
2835 | /// \Return CHANGED if the IR was altered, otherwise UNCHANGED. | ||||||||
2836 | virtual ChangeStatus manifest(Attributor &A) { | ||||||||
2837 | return ChangeStatus::UNCHANGED; | ||||||||
2838 | } | ||||||||
2839 | |||||||||
2840 | /// Hook to enable custom statistic tracking, called after manifest that | ||||||||
2841 | /// resulted in a change if statistics are enabled. | ||||||||
2842 | /// | ||||||||
2843 | /// We require subclasses to provide an implementation so we remember to | ||||||||
2844 | /// add statistics for them. | ||||||||
2845 | virtual void trackStatistics() const = 0; | ||||||||
2846 | |||||||||
2847 | /// The actual update/transfer function which has to be implemented by the | ||||||||
2848 | /// derived classes. | ||||||||
2849 | /// | ||||||||
2850 | /// If it is called, the environment has changed and we have to determine if | ||||||||
2851 | /// the current information is still valid or adjust it otherwise. | ||||||||
2852 | /// | ||||||||
2853 | /// \Return CHANGED if the internal state changed, otherwise UNCHANGED. | ||||||||
2854 | virtual ChangeStatus updateImpl(Attributor &A) = 0; | ||||||||
2855 | }; | ||||||||
2856 | |||||||||
2857 | /// Forward declarations of output streams for debug purposes. | ||||||||
2858 | /// | ||||||||
2859 | ///{ | ||||||||
2860 | raw_ostream &operator<<(raw_ostream &OS, const AbstractAttribute &AA); | ||||||||
2861 | raw_ostream &operator<<(raw_ostream &OS, ChangeStatus S); | ||||||||
2862 | raw_ostream &operator<<(raw_ostream &OS, IRPosition::Kind); | ||||||||
2863 | raw_ostream &operator<<(raw_ostream &OS, const IRPosition &); | ||||||||
2864 | raw_ostream &operator<<(raw_ostream &OS, const AbstractState &State); | ||||||||
2865 | template <typename base_ty, base_ty BestState, base_ty WorstState> | ||||||||
2866 | raw_ostream & | ||||||||
2867 | operator<<(raw_ostream &OS, | ||||||||
2868 | const IntegerStateBase<base_ty, BestState, WorstState> &S) { | ||||||||
2869 | return OS << "(" << S.getKnown() << "-" << S.getAssumed() << ")" | ||||||||
2870 | << static_cast<const AbstractState &>(S); | ||||||||
2871 | } | ||||||||
2872 | raw_ostream &operator<<(raw_ostream &OS, const IntegerRangeState &State); | ||||||||
2873 | ///} | ||||||||
2874 | |||||||||
2875 | struct AttributorPass : public PassInfoMixin<AttributorPass> { | ||||||||
2876 | PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); | ||||||||
2877 | }; | ||||||||
2878 | struct AttributorCGSCCPass : public PassInfoMixin<AttributorCGSCCPass> { | ||||||||
2879 | PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM, | ||||||||
2880 | LazyCallGraph &CG, CGSCCUpdateResult &UR); | ||||||||
2881 | }; | ||||||||
2882 | |||||||||
2883 | Pass *createAttributorLegacyPass(); | ||||||||
2884 | Pass *createAttributorCGSCCLegacyPass(); | ||||||||
2885 | |||||||||
2886 | /// Helper function to clamp a state \p S of type \p StateType with the | ||||||||
2887 | /// information in \p R and indicate/return if \p S did change (as-in update is | ||||||||
2888 | /// required to be run again). | ||||||||
2889 | template <typename StateType> | ||||||||
2890 | ChangeStatus clampStateAndIndicateChange(StateType &S, const StateType &R) { | ||||||||
2891 | auto Assumed = S.getAssumed(); | ||||||||
2892 | S ^= R; | ||||||||
2893 | return Assumed == S.getAssumed() ? ChangeStatus::UNCHANGED | ||||||||
2894 | : ChangeStatus::CHANGED; | ||||||||
2895 | } | ||||||||
2896 | |||||||||
2897 | /// ---------------------------------------------------------------------------- | ||||||||
2898 | /// Abstract Attribute Classes | ||||||||
2899 | /// ---------------------------------------------------------------------------- | ||||||||
2900 | |||||||||
2901 | /// An abstract attribute for the returned values of a function. | ||||||||
2902 | struct AAReturnedValues | ||||||||
2903 | : public IRAttribute<Attribute::Returned, AbstractAttribute> { | ||||||||
2904 | AAReturnedValues(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {} | ||||||||
2905 | |||||||||
2906 | /// Return an assumed unique return value if a single candidate is found. If | ||||||||
2907 | /// there cannot be one, return a nullptr. If it is not clear yet, return the | ||||||||
2908 | /// Optional::NoneType. | ||||||||
2909 | Optional<Value *> getAssumedUniqueReturnValue(Attributor &A) const; | ||||||||
2910 | |||||||||
2911 | /// Check \p Pred on all returned values. | ||||||||
2912 | /// | ||||||||
2913 | /// This method will evaluate \p Pred on returned values and return | ||||||||
2914 | /// true if (1) all returned values are known, and (2) \p Pred returned true | ||||||||
2915 | /// for all returned values. | ||||||||
2916 | /// | ||||||||
2917 | /// Note: Unlike the Attributor::checkForAllReturnedValuesAndReturnInsts | ||||||||
2918 | /// method, this one will not filter dead return instructions. | ||||||||
2919 | virtual bool checkForAllReturnedValuesAndReturnInsts( | ||||||||
2920 | function_ref<bool(Value &, const SmallSetVector<ReturnInst *, 4> &)> Pred) | ||||||||
2921 | const = 0; | ||||||||
2922 | |||||||||
2923 | using iterator = | ||||||||
2924 | MapVector<Value *, SmallSetVector<ReturnInst *, 4>>::iterator; | ||||||||
2925 | using const_iterator = | ||||||||
2926 | MapVector<Value *, SmallSetVector<ReturnInst *, 4>>::const_iterator; | ||||||||
2927 | virtual llvm::iterator_range<iterator> returned_values() = 0; | ||||||||
2928 | virtual llvm::iterator_range<const_iterator> returned_values() const = 0; | ||||||||
2929 | |||||||||
2930 | virtual size_t getNumReturnValues() const = 0; | ||||||||
2931 | |||||||||
2932 | /// Create an abstract attribute view for the position \p IRP. | ||||||||
2933 | static AAReturnedValues &createForPosition(const IRPosition &IRP, | ||||||||
2934 | Attributor &A); | ||||||||
2935 | |||||||||
2936 | /// See AbstractAttribute::getName() | ||||||||
2937 | const std::string getName() const override { return "AAReturnedValues"; } | ||||||||
2938 | |||||||||
2939 | /// See AbstractAttribute::getIdAddr() | ||||||||
2940 | const char *getIdAddr() const override { return &ID; } | ||||||||
2941 | |||||||||
2942 | /// This function should return true if the type of the \p AA is | ||||||||
2943 | /// AAReturnedValues | ||||||||
2944 | static bool classof(const AbstractAttribute *AA) { | ||||||||
2945 | return (AA->getIdAddr() == &ID); | ||||||||
2946 | } | ||||||||
2947 | |||||||||
2948 | /// Unique ID (due to the unique address) | ||||||||
2949 | static const char ID; | ||||||||
2950 | }; | ||||||||
2951 | |||||||||
2952 | struct AANoUnwind | ||||||||
2953 | : public IRAttribute<Attribute::NoUnwind, | ||||||||
2954 | StateWrapper<BooleanState, AbstractAttribute>> { | ||||||||
2955 | AANoUnwind(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {} | ||||||||
2956 | |||||||||
2957 | /// Returns true if nounwind is assumed. | ||||||||
2958 | bool isAssumedNoUnwind() const { return getAssumed(); } | ||||||||
2959 | |||||||||
2960 | /// Returns true if nounwind is known. | ||||||||
2961 | bool isKnownNoUnwind() const { return getKnown(); } | ||||||||
2962 | |||||||||
2963 | /// Create an abstract attribute view for the position \p IRP. | ||||||||
2964 | static AANoUnwind &createForPosition(const IRPosition &IRP, Attributor &A); | ||||||||
2965 | |||||||||
2966 | /// See AbstractAttribute::getName() | ||||||||
2967 | const std::string getName() const override { return "AANoUnwind"; } | ||||||||
2968 | |||||||||
2969 | /// See AbstractAttribute::getIdAddr() | ||||||||
2970 | const char *getIdAddr() const override { return &ID; } | ||||||||
2971 | |||||||||
2972 | /// This function should return true if the type of the \p AA is AANoUnwind | ||||||||
2973 | static bool classof(const AbstractAttribute *AA) { | ||||||||
2974 | return (AA->getIdAddr() == &ID); | ||||||||
2975 | } | ||||||||
2976 | |||||||||
2977 | /// Unique ID (due to the unique address) | ||||||||
2978 | static const char ID; | ||||||||
2979 | }; | ||||||||
2980 | |||||||||
2981 | struct AANoSync | ||||||||
2982 | : public IRAttribute<Attribute::NoSync, | ||||||||
2983 | StateWrapper<BooleanState, AbstractAttribute>> { | ||||||||
2984 | AANoSync(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {} | ||||||||
2985 | |||||||||
2986 | /// Returns true if "nosync" is assumed. | ||||||||
2987 | bool isAssumedNoSync() const { return getAssumed(); } | ||||||||
2988 | |||||||||
2989 | /// Returns true if "nosync" is known. | ||||||||
2990 | bool isKnownNoSync() const { return getKnown(); } | ||||||||
2991 | |||||||||
2992 | /// Create an abstract attribute view for the position \p IRP. | ||||||||
2993 | static AANoSync &createForPosition(const IRPosition &IRP, Attributor &A); | ||||||||
2994 | |||||||||
2995 | /// See AbstractAttribute::getName() | ||||||||
2996 | const std::string getName() const override { return "AANoSync"; } | ||||||||
2997 | |||||||||
2998 | /// See AbstractAttribute::getIdAddr() | ||||||||
2999 | const char *getIdAddr() const override { return &ID; } | ||||||||
3000 | |||||||||
3001 | /// This function should return true if the type of the \p AA is AANoSync | ||||||||
3002 | static bool classof(const AbstractAttribute *AA) { | ||||||||
3003 | return (AA->getIdAddr() == &ID); | ||||||||
3004 | } | ||||||||
3005 | |||||||||
3006 | /// Unique ID (due to the unique address) | ||||||||
3007 | static const char ID; | ||||||||
3008 | }; | ||||||||
3009 | |||||||||
3010 | /// An abstract interface for all nonnull attributes. | ||||||||
3011 | struct AANonNull | ||||||||
3012 | : public IRAttribute<Attribute::NonNull, | ||||||||
3013 | StateWrapper<BooleanState, AbstractAttribute>> { | ||||||||
3014 | AANonNull(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {} | ||||||||
3015 | |||||||||
3016 | /// Return true if we assume that the underlying value is nonnull. | ||||||||
3017 | bool isAssumedNonNull() const { return getAssumed(); } | ||||||||
3018 | |||||||||
3019 | /// Return true if we know that underlying value is nonnull. | ||||||||
3020 | bool isKnownNonNull() const { return getKnown(); } | ||||||||
3021 | |||||||||
3022 | /// Create an abstract attribute view for the position \p IRP. | ||||||||
3023 | static AANonNull &createForPosition(const IRPosition &IRP, Attributor &A); | ||||||||
3024 | |||||||||
3025 | /// See AbstractAttribute::getName() | ||||||||
3026 | const std::string getName() const override { return "AANonNull"; } | ||||||||
3027 | |||||||||
3028 | /// See AbstractAttribute::getIdAddr() | ||||||||
3029 | const char *getIdAddr() const override { return &ID; } | ||||||||
3030 | |||||||||
3031 | /// This function should return true if the type of the \p AA is AANonNull | ||||||||
3032 | static bool classof(const AbstractAttribute *AA) { | ||||||||
3033 | return (AA->getIdAddr() == &ID); | ||||||||
3034 | } | ||||||||
3035 | |||||||||
3036 | /// Unique ID (due to the unique address) | ||||||||
3037 | static const char ID; | ||||||||
3038 | }; | ||||||||
3039 | |||||||||
3040 | /// An abstract attribute for norecurse. | ||||||||
3041 | struct AANoRecurse | ||||||||
3042 | : public IRAttribute<Attribute::NoRecurse, | ||||||||
3043 | StateWrapper<BooleanState, AbstractAttribute>> { | ||||||||
3044 | AANoRecurse(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {} | ||||||||
3045 | |||||||||
3046 | /// Return true if "norecurse" is assumed. | ||||||||
3047 | bool isAssumedNoRecurse() const { return getAssumed(); } | ||||||||
3048 | |||||||||
3049 | /// Return true if "norecurse" is known. | ||||||||
3050 | bool isKnownNoRecurse() const { return getKnown(); } | ||||||||
3051 | |||||||||
3052 | /// Create an abstract attribute view for the position \p IRP. | ||||||||
3053 | static AANoRecurse &createForPosition(const IRPosition &IRP, Attributor &A); | ||||||||
3054 | |||||||||
3055 | /// See AbstractAttribute::getName() | ||||||||
3056 | const std::string getName() const override { return "AANoRecurse"; } | ||||||||
3057 | |||||||||
3058 | /// See AbstractAttribute::getIdAddr() | ||||||||
3059 | const char *getIdAddr() const override { return &ID; } | ||||||||
3060 | |||||||||
3061 | /// This function should return true if the type of the \p AA is AANoRecurse | ||||||||
3062 | static bool classof(const AbstractAttribute *AA) { | ||||||||
3063 | return (AA->getIdAddr() == &ID); | ||||||||
3064 | } | ||||||||
3065 | |||||||||
3066 | /// Unique ID (due to the unique address) | ||||||||
3067 | static const char ID; | ||||||||
3068 | }; | ||||||||
3069 | |||||||||
3070 | /// An abstract attribute for willreturn. | ||||||||
3071 | struct AAWillReturn | ||||||||
3072 | : public IRAttribute<Attribute::WillReturn, | ||||||||
3073 | StateWrapper<BooleanState, AbstractAttribute>> { | ||||||||
3074 | AAWillReturn(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {} | ||||||||
3075 | |||||||||
3076 | /// Return true if "willreturn" is assumed. | ||||||||
3077 | bool isAssumedWillReturn() const { return getAssumed(); } | ||||||||
3078 | |||||||||
3079 | /// Return true if "willreturn" is known. | ||||||||
3080 | bool isKnownWillReturn() const { return getKnown(); } | ||||||||
3081 | |||||||||
3082 | /// Create an abstract attribute view for the position \p IRP. | ||||||||
3083 | static AAWillReturn &createForPosition(const IRPosition &IRP, Attributor &A); | ||||||||
3084 | |||||||||
3085 | /// See AbstractAttribute::getName() | ||||||||
3086 | const std::string getName() const override { return "AAWillReturn"; } | ||||||||
3087 | |||||||||
3088 | /// See AbstractAttribute::getIdAddr() | ||||||||
3089 | const char *getIdAddr() const override { return &ID; } | ||||||||
3090 | |||||||||
3091 | /// This function should return true if the type of the \p AA is AAWillReturn | ||||||||
3092 | static bool classof(const AbstractAttribute *AA) { | ||||||||
3093 | return (AA->getIdAddr() == &ID); | ||||||||
3094 | } | ||||||||
3095 | |||||||||
3096 | /// Unique ID (due to the unique address) | ||||||||
3097 | static const char ID; | ||||||||
3098 | }; | ||||||||
3099 | |||||||||
3100 | /// An abstract attribute for undefined behavior. | ||||||||
3101 | struct AAUndefinedBehavior | ||||||||
3102 | : public StateWrapper<BooleanState, AbstractAttribute> { | ||||||||
3103 | using Base = StateWrapper<BooleanState, AbstractAttribute>; | ||||||||
3104 | AAUndefinedBehavior(const IRPosition &IRP, Attributor &A) : Base(IRP) {} | ||||||||
3105 | |||||||||
3106 | /// Return true if "undefined behavior" is assumed. | ||||||||
3107 | bool isAssumedToCauseUB() const { return getAssumed(); } | ||||||||
3108 | |||||||||
3109 | /// Return true if "undefined behavior" is assumed for a specific instruction. | ||||||||
3110 | virtual bool isAssumedToCauseUB(Instruction *I) const = 0; | ||||||||
3111 | |||||||||
3112 | /// Return true if "undefined behavior" is known. | ||||||||
3113 | bool isKnownToCauseUB() const { return getKnown(); } | ||||||||
3114 | |||||||||
3115 | /// Return true if "undefined behavior" is known for a specific instruction. | ||||||||
3116 | virtual bool isKnownToCauseUB(Instruction *I) const = 0; | ||||||||
3117 | |||||||||
3118 | /// Create an abstract attribute view for the position \p IRP. | ||||||||
3119 | static AAUndefinedBehavior &createForPosition(const IRPosition &IRP, | ||||||||
3120 | Attributor &A); | ||||||||
3121 | |||||||||
3122 | /// See AbstractAttribute::getName() | ||||||||
3123 | const std::string getName() const override { return "AAUndefinedBehavior"; } | ||||||||
3124 | |||||||||
3125 | /// See AbstractAttribute::getIdAddr() | ||||||||
3126 | const char *getIdAddr() const override { return &ID; } | ||||||||
3127 | |||||||||
3128 | /// This function should return true if the type of the \p AA is | ||||||||
3129 | /// AAUndefineBehavior | ||||||||
3130 | static bool classof(const AbstractAttribute *AA) { | ||||||||
3131 | return (AA->getIdAddr() == &ID); | ||||||||
3132 | } | ||||||||
3133 | |||||||||
3134 | /// Unique ID (due to the unique address) | ||||||||
3135 | static const char ID; | ||||||||
3136 | }; | ||||||||
3137 | |||||||||
3138 | /// An abstract interface to determine reachability of point A to B. | ||||||||
3139 | struct AAReachability : public StateWrapper<BooleanState, AbstractAttribute> { | ||||||||
3140 | using Base = StateWrapper<BooleanState, AbstractAttribute>; | ||||||||
3141 | AAReachability(const IRPosition &IRP, Attributor &A) : Base(IRP) {} | ||||||||
3142 | |||||||||
3143 | /// Returns true if 'From' instruction is assumed to reach, 'To' instruction. | ||||||||
3144 | /// Users should provide two positions they are interested in, and the class | ||||||||
3145 | /// determines (and caches) reachability. | ||||||||
3146 | bool isAssumedReachable(Attributor &A, const Instruction &From, | ||||||||
3147 | const Instruction &To) const { | ||||||||
3148 | if (!getState().isValidState()) | ||||||||
3149 | return true; | ||||||||
3150 | return A.getInfoCache().getPotentiallyReachable(From, To); | ||||||||
3151 | } | ||||||||
3152 | |||||||||
3153 | /// Returns true if 'From' instruction is known to reach, 'To' instruction. | ||||||||
3154 | /// Users should provide two positions they are interested in, and the class | ||||||||
3155 | /// determines (and caches) reachability. | ||||||||
3156 | bool isKnownReachable(Attributor &A, const Instruction &From, | ||||||||
3157 | const Instruction &To) const { | ||||||||
3158 | if (!getState().isValidState()) | ||||||||
3159 | return false; | ||||||||
3160 | return A.getInfoCache().getPotentiallyReachable(From, To); | ||||||||
3161 | } | ||||||||
3162 | |||||||||
3163 | /// Create an abstract attribute view for the position \p IRP. | ||||||||
3164 | static AAReachability &createForPosition(const IRPosition &IRP, | ||||||||
3165 | Attributor &A); | ||||||||
3166 | |||||||||
3167 | /// See AbstractAttribute::getName() | ||||||||
3168 | const std::string getName() const override { return "AAReachability"; } | ||||||||
3169 | |||||||||
3170 | /// See AbstractAttribute::getIdAddr() | ||||||||
3171 | const char *getIdAddr() const override { return &ID; } | ||||||||
3172 | |||||||||
3173 | /// This function should return true if the type of the \p AA is | ||||||||
3174 | /// AAReachability | ||||||||
3175 | static bool classof(const AbstractAttribute *AA) { | ||||||||
3176 | return (AA->getIdAddr() == &ID); | ||||||||
3177 | } | ||||||||
3178 | |||||||||
3179 | /// Unique ID (due to the unique address) | ||||||||
3180 | static const char ID; | ||||||||
3181 | }; | ||||||||
3182 | |||||||||
3183 | /// An abstract interface for all noalias attributes. | ||||||||
3184 | struct AANoAlias | ||||||||
3185 | : public IRAttribute<Attribute::NoAlias, | ||||||||
3186 | StateWrapper<BooleanState, AbstractAttribute>> { | ||||||||
3187 | AANoAlias(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {} | ||||||||
3188 | |||||||||
3189 | /// Return true if we assume that the underlying value is alias. | ||||||||
3190 | bool isAssumedNoAlias() const { return getAssumed(); } | ||||||||
3191 | |||||||||
3192 | /// Return true if we know that underlying value is noalias. | ||||||||
3193 | bool isKnownNoAlias() const { return getKnown(); } | ||||||||
3194 | |||||||||
3195 | /// Create an abstract attribute view for the position \p IRP. | ||||||||
3196 | static AANoAlias &createForPosition(const IRPosition &IRP, Attributor &A); | ||||||||
3197 | |||||||||
3198 | /// See AbstractAttribute::getName() | ||||||||
3199 | const std::string getName() const override { return "AANoAlias"; } | ||||||||
3200 | |||||||||
3201 | /// See AbstractAttribute::getIdAddr() | ||||||||
3202 | const char *getIdAddr() const override { return &ID; } | ||||||||
3203 | |||||||||
3204 | /// This function should return true if the type of the \p AA is AANoAlias | ||||||||
3205 | static bool classof(const AbstractAttribute *AA) { | ||||||||
3206 | return (AA->getIdAddr() == &ID); | ||||||||
3207 | } | ||||||||
3208 | |||||||||
3209 | /// Unique ID (due to the unique address) | ||||||||
3210 | static const char ID; | ||||||||
3211 | }; | ||||||||
3212 | |||||||||
3213 | /// An AbstractAttribute for nofree. | ||||||||
3214 | struct AANoFree | ||||||||
3215 | : public IRAttribute<Attribute::NoFree, | ||||||||
3216 | StateWrapper<BooleanState, AbstractAttribute>> { | ||||||||
3217 | AANoFree(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {} | ||||||||
3218 | |||||||||
3219 | /// Return true if "nofree" is assumed. | ||||||||
3220 | bool isAssumedNoFree() const { return getAssumed(); } | ||||||||
3221 | |||||||||
3222 | /// Return true if "nofree" is known. | ||||||||
3223 | bool isKnownNoFree() const { return getKnown(); } | ||||||||
3224 | |||||||||
3225 | /// Create an abstract attribute view for the position \p IRP. | ||||||||
3226 | static AANoFree &createForPosition(const IRPosition &IRP, Attributor &A); | ||||||||
3227 | |||||||||
3228 | /// See AbstractAttribute::getName() | ||||||||
3229 | const std::string getName() const override { return "AANoFree"; } | ||||||||
3230 | |||||||||
3231 | /// See AbstractAttribute::getIdAddr() | ||||||||
3232 | const char *getIdAddr() const override { return &ID; } | ||||||||
3233 | |||||||||
3234 | /// This function should return true if the type of the \p AA is AANoFree | ||||||||
3235 | static bool classof(const AbstractAttribute *AA) { | ||||||||
3236 | return (AA->getIdAddr() == &ID); | ||||||||
3237 | } | ||||||||
3238 | |||||||||
3239 | /// Unique ID (due to the unique address) | ||||||||
3240 | static const char ID; | ||||||||
3241 | }; | ||||||||
3242 | |||||||||
3243 | /// An AbstractAttribute for noreturn. | ||||||||
3244 | struct AANoReturn | ||||||||
3245 | : public IRAttribute<Attribute::NoReturn, | ||||||||
3246 | StateWrapper<BooleanState, AbstractAttribute>> { | ||||||||
3247 | AANoReturn(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {} | ||||||||
3248 | |||||||||
3249 | /// Return true if the underlying object is assumed to never return. | ||||||||
3250 | bool isAssumedNoReturn() const { return getAssumed(); } | ||||||||
3251 | |||||||||
3252 | /// Return true if the underlying object is known to never return. | ||||||||
3253 | bool isKnownNoReturn() const { return getKnown(); } | ||||||||
3254 | |||||||||
3255 | /// Create an abstract attribute view for the position \p IRP. | ||||||||
3256 | static AANoReturn &createForPosition(const IRPosition &IRP, Attributor &A); | ||||||||
3257 | |||||||||
3258 | /// See AbstractAttribute::getName() | ||||||||
3259 | const std::string getName() const override { return "AANoReturn"; } | ||||||||
3260 | |||||||||
3261 | /// See AbstractAttribute::getIdAddr() | ||||||||
3262 | const char *getIdAddr() const override { return &ID; } | ||||||||
3263 | |||||||||
3264 | /// This function should return true if the type of the \p AA is AANoReturn | ||||||||
3265 | static bool classof(const AbstractAttribute *AA) { | ||||||||
3266 | return (AA->getIdAddr() == &ID); | ||||||||
3267 | } | ||||||||
3268 | |||||||||
3269 | /// Unique ID (due to the unique address) | ||||||||
3270 | static const char ID; | ||||||||
3271 | }; | ||||||||
3272 | |||||||||
3273 | /// An abstract interface for liveness abstract attribute. | ||||||||
3274 | struct AAIsDead | ||||||||
3275 | : public StateWrapper<BitIntegerState<uint8_t, 3, 0>, AbstractAttribute> { | ||||||||
3276 | using Base = StateWrapper<BitIntegerState<uint8_t, 3, 0>, AbstractAttribute>; | ||||||||
3277 | AAIsDead(const IRPosition &IRP, Attributor &A) : Base(IRP) {} | ||||||||
3278 | |||||||||
3279 | /// State encoding bits. A set bit in the state means the property holds. | ||||||||
3280 | enum { | ||||||||
3281 | HAS_NO_EFFECT = 1 << 0, | ||||||||
3282 | IS_REMOVABLE = 1 << 1, | ||||||||
3283 | |||||||||
3284 | IS_DEAD = HAS_NO_EFFECT | IS_REMOVABLE, | ||||||||
3285 | }; | ||||||||
3286 | static_assert(IS_DEAD == getBestState(), "Unexpected BEST_STATE value"); | ||||||||
3287 | |||||||||
3288 | protected: | ||||||||
3289 | /// The query functions are protected such that other attributes need to go | ||||||||
3290 | /// through the Attributor interfaces: `Attributor::isAssumedDead(...)` | ||||||||
3291 | |||||||||
3292 | /// Returns true if the underlying value is assumed dead. | ||||||||
3293 | virtual bool isAssumedDead() const = 0; | ||||||||
3294 | |||||||||
3295 | /// Returns true if the underlying value is known dead. | ||||||||
3296 | virtual bool isKnownDead() const = 0; | ||||||||
3297 | |||||||||
3298 | /// Returns true if \p BB is assumed dead. | ||||||||
3299 | virtual bool isAssumedDead(const BasicBlock *BB) const = 0; | ||||||||
3300 | |||||||||
3301 | /// Returns true if \p BB is known dead. | ||||||||
3302 | virtual bool isKnownDead(const BasicBlock *BB) const = 0; | ||||||||
3303 | |||||||||
3304 | /// Returns true if \p I is assumed dead. | ||||||||
3305 | virtual bool isAssumedDead(const Instruction *I) const = 0; | ||||||||
3306 | |||||||||
3307 | /// Returns true if \p I is known dead. | ||||||||
3308 | virtual bool isKnownDead(const Instruction *I) const = 0; | ||||||||
3309 | |||||||||
3310 | /// This method is used to check if at least one instruction in a collection | ||||||||
3311 | /// of instructions is live. | ||||||||
3312 | template <typename T> bool isLiveInstSet(T begin, T end) const { | ||||||||
3313 | for (const auto &I : llvm::make_range(begin, end)) { | ||||||||
3314 | assert(I->getFunction() == getIRPosition().getAssociatedFunction() &&(static_cast <bool> (I->getFunction() == getIRPosition ().getAssociatedFunction() && "Instruction must be in the same anchor scope function." ) ? void (0) : __assert_fail ("I->getFunction() == getIRPosition().getAssociatedFunction() && \"Instruction must be in the same anchor scope function.\"" , "llvm/include/llvm/Transforms/IPO/Attributor.h", 3315, __extension__ __PRETTY_FUNCTION__)) | ||||||||
3315 | "Instruction must be in the same anchor scope function.")(static_cast <bool> (I->getFunction() == getIRPosition ().getAssociatedFunction() && "Instruction must be in the same anchor scope function." ) ? void (0) : __assert_fail ("I->getFunction() == getIRPosition().getAssociatedFunction() && \"Instruction must be in the same anchor scope function.\"" , "llvm/include/llvm/Transforms/IPO/Attributor.h", 3315, __extension__ __PRETTY_FUNCTION__)); | ||||||||
3316 | |||||||||
3317 | if (!isAssumedDead(I)) | ||||||||
3318 | return true; | ||||||||
3319 | } | ||||||||
3320 | |||||||||
3321 | return false; | ||||||||
3322 | } | ||||||||
3323 | |||||||||
3324 | public: | ||||||||
3325 | /// Create an abstract attribute view for the position \p IRP. | ||||||||
3326 | static AAIsDead &createForPosition(const IRPosition &IRP, Attributor &A); | ||||||||
3327 | |||||||||
3328 | /// Determine if \p F might catch asynchronous exceptions. | ||||||||
3329 | static bool mayCatchAsynchronousExceptions(const Function &F) { | ||||||||
3330 | return F.hasPersonalityFn() && !canSimplifyInvokeNoUnwind(&F); | ||||||||
3331 | } | ||||||||
3332 | |||||||||
3333 | /// Return if the edge from \p From BB to \p To BB is assumed dead. | ||||||||
3334 | /// This is specifically useful in AAReachability. | ||||||||
3335 | virtual bool isEdgeDead(const BasicBlock *From, const BasicBlock *To) const { | ||||||||
3336 | return false; | ||||||||
3337 | } | ||||||||
3338 | |||||||||
3339 | /// See AbstractAttribute::getName() | ||||||||
3340 | const std::string getName() const override { return "AAIsDead"; } | ||||||||
3341 | |||||||||
3342 | /// See AbstractAttribute::getIdAddr() | ||||||||
3343 | const char *getIdAddr() const override { return &ID; } | ||||||||
3344 | |||||||||
3345 | /// This function should return true if the type of the \p AA is AAIsDead | ||||||||
3346 | static bool classof(const AbstractAttribute *AA) { | ||||||||
3347 | return (AA->getIdAddr() == &ID); | ||||||||
3348 | } | ||||||||
3349 | |||||||||
3350 | /// Unique ID (due to the unique address) | ||||||||
3351 | static const char ID; | ||||||||
3352 | |||||||||
3353 | friend struct Attributor; | ||||||||
3354 | }; | ||||||||
3355 | |||||||||
3356 | /// State for dereferenceable attribute | ||||||||
3357 | struct DerefState : AbstractState { | ||||||||
3358 | |||||||||
3359 | static DerefState getBestState() { return DerefState(); } | ||||||||
3360 | static DerefState getBestState(const DerefState &) { return getBestState(); } | ||||||||
3361 | |||||||||
3362 | /// Return the worst possible representable state. | ||||||||
3363 | static DerefState getWorstState() { | ||||||||
3364 | DerefState DS; | ||||||||
3365 | DS.indicatePessimisticFixpoint(); | ||||||||
3366 | return DS; | ||||||||
3367 | } | ||||||||
3368 | static DerefState getWorstState(const DerefState &) { | ||||||||
3369 | return getWorstState(); | ||||||||
3370 | } | ||||||||
3371 | |||||||||
3372 | /// State representing for dereferenceable bytes. | ||||||||
3373 | IncIntegerState<> DerefBytesState; | ||||||||
3374 | |||||||||
3375 | /// Map representing for accessed memory offsets and sizes. | ||||||||
3376 | /// A key is Offset and a value is size. | ||||||||
3377 | /// If there is a load/store instruction something like, | ||||||||
3378 | /// p[offset] = v; | ||||||||
3379 | /// (offset, sizeof(v)) will be inserted to this map. | ||||||||
3380 | /// std::map is used because we want to iterate keys in ascending order. | ||||||||
3381 | std::map<int64_t, uint64_t> AccessedBytesMap; | ||||||||
3382 | |||||||||
3383 | /// Helper function to calculate dereferenceable bytes from current known | ||||||||
3384 | /// bytes and accessed bytes. | ||||||||
3385 | /// | ||||||||
3386 | /// int f(int *A){ | ||||||||
3387 | /// *A = 0; | ||||||||
3388 | /// *(A+2) = 2; | ||||||||
3389 | /// *(A+1) = 1; | ||||||||
3390 | /// *(A+10) = 10; | ||||||||
3391 | /// } | ||||||||
3392 | /// ``` | ||||||||
3393 | /// In that case, AccessedBytesMap is `{0:4, 4:4, 8:4, 40:4}`. | ||||||||
3394 | /// AccessedBytesMap is std::map so it is iterated in accending order on | ||||||||
3395 | /// key(Offset). So KnownBytes will be updated like this: | ||||||||
3396 | /// | ||||||||
3397 | /// |Access | KnownBytes | ||||||||
3398 | /// |(0, 4)| 0 -> 4 | ||||||||
3399 | /// |(4, 4)| 4 -> 8 | ||||||||
3400 | /// |(8, 4)| 8 -> 12 | ||||||||
3401 | /// |(40, 4) | 12 (break) | ||||||||
3402 | void computeKnownDerefBytesFromAccessedMap() { | ||||||||
3403 | int64_t KnownBytes = DerefBytesState.getKnown(); | ||||||||
3404 | for (auto &Access : AccessedBytesMap) { | ||||||||
3405 | if (KnownBytes < Access.first) | ||||||||
3406 | break; | ||||||||
3407 | KnownBytes = std::max(KnownBytes, Access.first + (int64_t)Access.second); | ||||||||
3408 | } | ||||||||
3409 | |||||||||
3410 | DerefBytesState.takeKnownMaximum(KnownBytes); | ||||||||
3411 | } | ||||||||
3412 | |||||||||
3413 | /// State representing that whether the value is globaly dereferenceable. | ||||||||
3414 | BooleanState GlobalState; | ||||||||
3415 | |||||||||
3416 | /// See AbstractState::isValidState() | ||||||||
3417 | bool isValidState() const override { return DerefBytesState.isValidState(); } | ||||||||
3418 | |||||||||
3419 | /// See AbstractState::isAtFixpoint() | ||||||||
3420 | bool isAtFixpoint() const override { | ||||||||
3421 | return !isValidState() || | ||||||||
3422 | (DerefBytesState.isAtFixpoint() && GlobalState.isAtFixpoint()); | ||||||||
3423 | } | ||||||||
3424 | |||||||||
3425 | /// See AbstractState::indicateOptimisticFixpoint(...) | ||||||||
3426 | ChangeStatus indicateOptimisticFixpoint() override { | ||||||||
3427 | DerefBytesState.indicateOptimisticFixpoint(); | ||||||||
3428 | GlobalState.indicateOptimisticFixpoint(); | ||||||||
3429 | return ChangeStatus::UNCHANGED; | ||||||||
3430 | } | ||||||||
3431 | |||||||||
3432 | /// See AbstractState::indicatePessimisticFixpoint(...) | ||||||||
3433 | ChangeStatus indicatePessimisticFixpoint() override { | ||||||||
3434 | DerefBytesState.indicatePessimisticFixpoint(); | ||||||||
3435 | GlobalState.indicatePessimisticFixpoint(); | ||||||||
3436 | return ChangeStatus::CHANGED; | ||||||||
3437 | } | ||||||||
3438 | |||||||||
3439 | /// Update known dereferenceable bytes. | ||||||||
3440 | void takeKnownDerefBytesMaximum(uint64_t Bytes) { | ||||||||
3441 | DerefBytesState.takeKnownMaximum(Bytes); | ||||||||
3442 | |||||||||
3443 | // Known bytes might increase. | ||||||||
3444 | computeKnownDerefBytesFromAccessedMap(); | ||||||||
3445 | } | ||||||||
3446 | |||||||||
3447 | /// Update assumed dereferenceable bytes. | ||||||||
3448 | void takeAssumedDerefBytesMinimum(uint64_t Bytes) { | ||||||||
3449 | DerefBytesState.takeAssumedMinimum(Bytes); | ||||||||
3450 | } | ||||||||
3451 | |||||||||
3452 | /// Add accessed bytes to the map. | ||||||||
3453 | void addAccessedBytes(int64_t Offset, uint64_t Size) { | ||||||||
3454 | uint64_t &AccessedBytes = AccessedBytesMap[Offset]; | ||||||||
3455 | AccessedBytes = std::max(AccessedBytes, Size); | ||||||||
3456 | |||||||||
3457 | // Known bytes might increase. | ||||||||
3458 | computeKnownDerefBytesFromAccessedMap(); | ||||||||
3459 | } | ||||||||
3460 | |||||||||
3461 | /// Equality for DerefState. | ||||||||
3462 | bool operator==(const DerefState &R) const { | ||||||||
3463 | return this->DerefBytesState == R.DerefBytesState && | ||||||||
3464 | this->GlobalState == R.GlobalState; | ||||||||
3465 | } | ||||||||
3466 | |||||||||
3467 | /// Inequality for DerefState. | ||||||||
3468 | bool operator!=(const DerefState &R) const { return !(*this == R); } | ||||||||
3469 | |||||||||
3470 | /// See IntegerStateBase::operator^= | ||||||||
3471 | DerefState operator^=(const DerefState &R) { | ||||||||
3472 | DerefBytesState ^= R.DerefBytesState; | ||||||||
3473 | GlobalState ^= R.GlobalState; | ||||||||
3474 | return *this; | ||||||||
3475 | } | ||||||||
3476 | |||||||||
3477 | /// See IntegerStateBase::operator+= | ||||||||
3478 | DerefState operator+=(const DerefState &R) { | ||||||||
3479 | DerefBytesState += R.DerefBytesState; | ||||||||
3480 | GlobalState += R.GlobalState; | ||||||||
3481 | return *this; | ||||||||
3482 | } | ||||||||
3483 | |||||||||
3484 | /// See IntegerStateBase::operator&= | ||||||||
3485 | DerefState operator&=(const DerefState &R) { | ||||||||
3486 | DerefBytesState &= R.DerefBytesState; | ||||||||
3487 | GlobalState &= R.GlobalState; | ||||||||
3488 | return *this; | ||||||||
3489 | } | ||||||||
3490 | |||||||||
3491 | /// See IntegerStateBase::operator|= | ||||||||
3492 | DerefState operator|=(const DerefState &R) { | ||||||||
3493 | DerefBytesState |= R.DerefBytesState; | ||||||||
3494 | GlobalState |= R.GlobalState; | ||||||||
3495 | return *this; | ||||||||
3496 | } | ||||||||
3497 | |||||||||
3498 | protected: | ||||||||
3499 | const AANonNull *NonNullAA = nullptr; | ||||||||
3500 | }; | ||||||||
3501 | |||||||||
3502 | /// An abstract interface for all dereferenceable attribute. | ||||||||
3503 | struct AADereferenceable | ||||||||
3504 | : public IRAttribute<Attribute::Dereferenceable, | ||||||||
3505 | StateWrapper<DerefState, AbstractAttribute>> { | ||||||||
3506 | AADereferenceable(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {} | ||||||||
3507 | |||||||||
3508 | /// Return true if we assume that the underlying value is nonnull. | ||||||||
3509 | bool isAssumedNonNull() const { | ||||||||
3510 | return NonNullAA && NonNullAA->isAssumedNonNull(); | ||||||||
3511 | } | ||||||||
3512 | |||||||||
3513 | /// Return true if we know that the underlying value is nonnull. | ||||||||
3514 | bool isKnownNonNull() const { | ||||||||
3515 | return NonNullAA && NonNullAA->isKnownNonNull(); | ||||||||
3516 | } | ||||||||
3517 | |||||||||
3518 | /// Return true if we assume that underlying value is | ||||||||
3519 | /// dereferenceable(_or_null) globally. | ||||||||
3520 | bool isAssumedGlobal() const { return GlobalState.getAssumed(); } | ||||||||
3521 | |||||||||
3522 | /// Return true if we know that underlying value is | ||||||||
3523 | /// dereferenceable(_or_null) globally. | ||||||||
3524 | bool isKnownGlobal() const { return GlobalState.getKnown(); } | ||||||||
3525 | |||||||||
3526 | /// Return assumed dereferenceable bytes. | ||||||||
3527 | uint32_t getAssumedDereferenceableBytes() const { | ||||||||
3528 | return DerefBytesState.getAssumed(); | ||||||||
3529 | } | ||||||||
3530 | |||||||||
3531 | /// Return known dereferenceable bytes. | ||||||||
3532 | uint32_t getKnownDereferenceableBytes() const { | ||||||||
3533 | return DerefBytesState.getKnown(); | ||||||||
3534 | } | ||||||||
3535 | |||||||||
3536 | /// Create an abstract attribute view for the position \p IRP. | ||||||||
3537 | static AADereferenceable &createForPosition(const IRPosition &IRP, | ||||||||
3538 | Attributor &A); | ||||||||
3539 | |||||||||
3540 | /// See AbstractAttribute::getName() | ||||||||
3541 | const std::string getName() const override { return "AADereferenceable"; } | ||||||||
3542 | |||||||||
3543 | /// See AbstractAttribute::getIdAddr() | ||||||||
3544 | const char *getIdAddr() const override { return &ID; } | ||||||||
3545 | |||||||||
3546 | /// This function should return true if the type of the \p AA is | ||||||||
3547 | /// AADereferenceable | ||||||||
3548 | static bool classof(const AbstractAttribute *AA) { | ||||||||
3549 | return (AA->getIdAddr() == &ID); | ||||||||
3550 | } | ||||||||
3551 | |||||||||
3552 | /// Unique ID (due to the unique address) | ||||||||
3553 | static const char ID; | ||||||||
3554 | }; | ||||||||
3555 | |||||||||
3556 | using AAAlignmentStateType = | ||||||||
3557 | IncIntegerState<uint64_t, Value::MaximumAlignment, 1>; | ||||||||
3558 | /// An abstract interface for all align attributes. | ||||||||
3559 | struct AAAlign : public IRAttribute< | ||||||||
3560 | Attribute::Alignment, | ||||||||
3561 | StateWrapper<AAAlignmentStateType, AbstractAttribute>> { | ||||||||
3562 | AAAlign(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {} | ||||||||
3563 | |||||||||
3564 | /// Return assumed alignment. | ||||||||
3565 | uint64_t getAssumedAlign() const { return getAssumed(); } | ||||||||
3566 | |||||||||
3567 | /// Return known alignment. | ||||||||
3568 | uint64_t getKnownAlign() const { return getKnown(); } | ||||||||
3569 | |||||||||
3570 | /// See AbstractAttribute::getName() | ||||||||
3571 | const std::string getName() const override { return "AAAlign"; } | ||||||||
3572 | |||||||||
3573 | /// See AbstractAttribute::getIdAddr() | ||||||||
3574 | const char *getIdAddr() const override { return &ID; } | ||||||||
3575 | |||||||||
3576 | /// This function should return true if the type of the \p AA is AAAlign | ||||||||
3577 | static bool classof(const AbstractAttribute *AA) { | ||||||||
3578 | return (AA->getIdAddr() == &ID); | ||||||||
3579 | } | ||||||||
3580 | |||||||||
3581 | /// Create an abstract attribute view for the position \p IRP. | ||||||||
3582 | static AAAlign &createForPosition(const IRPosition &IRP, Attributor &A); | ||||||||
3583 | |||||||||
3584 | /// Unique ID (due to the unique address) | ||||||||
3585 | static const char ID; | ||||||||
3586 | }; | ||||||||
3587 | |||||||||
3588 | /// An abstract interface for all nocapture attributes. | ||||||||
3589 | struct AANoCapture | ||||||||
3590 | : public IRAttribute< | ||||||||
3591 | Attribute::NoCapture, | ||||||||
3592 | StateWrapper<BitIntegerState<uint16_t, 7, 0>, AbstractAttribute>> { | ||||||||
3593 | AANoCapture(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {} | ||||||||
3594 | |||||||||
3595 | /// State encoding bits. A set bit in the state means the property holds. | ||||||||
3596 | /// NO_CAPTURE is the best possible state, 0 the worst possible state. | ||||||||
3597 | enum { | ||||||||
3598 | NOT_CAPTURED_IN_MEM = 1 << 0, | ||||||||
3599 | NOT_CAPTURED_IN_INT = 1 << 1, | ||||||||
3600 | NOT_CAPTURED_IN_RET = 1 << 2, | ||||||||
3601 | |||||||||
3602 | /// If we do not capture the value in memory or through integers we can only | ||||||||
3603 | /// communicate it back as a derived pointer. | ||||||||
3604 | NO_CAPTURE_MAYBE_RETURNED = NOT_CAPTURED_IN_MEM | NOT_CAPTURED_IN_INT, | ||||||||
3605 | |||||||||
3606 | /// If we do not capture the value in memory, through integers, or as a | ||||||||
3607 | /// derived pointer we know it is not captured. | ||||||||
3608 | NO_CAPTURE = | ||||||||
3609 | NOT_CAPTURED_IN_MEM | NOT_CAPTURED_IN_INT | NOT_CAPTURED_IN_RET, | ||||||||
3610 | }; | ||||||||
3611 | |||||||||
3612 | /// Return true if we know that the underlying value is not captured in its | ||||||||
3613 | /// respective scope. | ||||||||
3614 | bool isKnownNoCapture() const { return isKnown(NO_CAPTURE); } | ||||||||
3615 | |||||||||
3616 | /// Return true if we assume that the underlying value is not captured in its | ||||||||
3617 | /// respective scope. | ||||||||
3618 | bool isAssumedNoCapture() const { return isAssumed(NO_CAPTURE); } | ||||||||
3619 | |||||||||
3620 | /// Return true if we know that the underlying value is not captured in its | ||||||||
3621 | /// respective scope but we allow it to escape through a "return". | ||||||||
3622 | bool isKnownNoCaptureMaybeReturned() const { | ||||||||
3623 | return isKnown(NO_CAPTURE_MAYBE_RETURNED); | ||||||||
3624 | } | ||||||||
3625 | |||||||||
3626 | /// Return true if we assume that the underlying value is not captured in its | ||||||||
3627 | /// respective scope but we allow it to escape through a "return". | ||||||||
3628 | bool isAssumedNoCaptureMaybeReturned() const { | ||||||||
3629 | return isAssumed(NO_CAPTURE_MAYBE_RETURNED); | ||||||||
3630 | } | ||||||||
3631 | |||||||||
3632 | /// Create an abstract attribute view for the position \p IRP. | ||||||||
3633 | static AANoCapture &createForPosition(const IRPosition &IRP, Attributor &A); | ||||||||
3634 | |||||||||
3635 | /// See AbstractAttribute::getName() | ||||||||
3636 | const std::string getName() const override { return "AANoCapture"; } | ||||||||
3637 | |||||||||
3638 | /// See AbstractAttribute::getIdAddr() | ||||||||
3639 | const char *getIdAddr() const override { return &ID; } | ||||||||
3640 | |||||||||
3641 | /// This function should return true if the type of the \p AA is AANoCapture | ||||||||
3642 | static bool classof(const AbstractAttribute *AA) { | ||||||||
3643 | return (AA->getIdAddr() == &ID); | ||||||||
3644 | } | ||||||||
3645 | |||||||||
3646 | /// Unique ID (due to the unique address) | ||||||||
3647 | static const char ID; | ||||||||
3648 | }; | ||||||||
3649 | |||||||||
3650 | struct ValueSimplifyStateType : public AbstractState { | ||||||||
3651 | |||||||||
3652 | ValueSimplifyStateType(Type *Ty) : Ty(Ty) {} | ||||||||
3653 | |||||||||
3654 | static ValueSimplifyStateType getBestState(Type *Ty) { | ||||||||
3655 | return ValueSimplifyStateType(Ty); | ||||||||
3656 | } | ||||||||
3657 | static ValueSimplifyStateType getBestState(const ValueSimplifyStateType &VS) { | ||||||||
3658 | return getBestState(VS.Ty); | ||||||||
3659 | } | ||||||||
3660 | |||||||||
3661 | /// Return the worst possible representable state. | ||||||||
3662 | static ValueSimplifyStateType getWorstState(Type *Ty) { | ||||||||
3663 | ValueSimplifyStateType DS(Ty); | ||||||||
3664 | DS.indicatePessimisticFixpoint(); | ||||||||
3665 | return DS; | ||||||||
3666 | } | ||||||||
3667 | static ValueSimplifyStateType | ||||||||
3668 | getWorstState(const ValueSimplifyStateType &VS) { | ||||||||
3669 | return getWorstState(VS.Ty); | ||||||||
3670 | } | ||||||||
3671 | |||||||||
3672 | /// See AbstractState::isValidState(...) | ||||||||
3673 | bool isValidState() const override { return BS.isValidState(); } | ||||||||
3674 | |||||||||
3675 | /// See AbstractState::isAtFixpoint(...) | ||||||||
3676 | bool isAtFixpoint() const override { return BS.isAtFixpoint(); } | ||||||||
3677 | |||||||||
3678 | /// Return the assumed state encoding. | ||||||||
3679 | ValueSimplifyStateType getAssumed() { return *this; } | ||||||||
3680 | const ValueSimplifyStateType &getAssumed() const { return *this; } | ||||||||
3681 | |||||||||
3682 | /// See AbstractState::indicatePessimisticFixpoint(...) | ||||||||
3683 | ChangeStatus indicatePessimisticFixpoint() override { | ||||||||
3684 | return BS.indicatePessimisticFixpoint(); | ||||||||
3685 | } | ||||||||
3686 | |||||||||
3687 | /// See AbstractState::indicateOptimisticFixpoint(...) | ||||||||
3688 | ChangeStatus indicateOptimisticFixpoint() override { | ||||||||
3689 | return BS.indicateOptimisticFixpoint(); | ||||||||
3690 | } | ||||||||
3691 | |||||||||
3692 | /// "Clamp" this state with \p PVS. | ||||||||
3693 | ValueSimplifyStateType operator^=(const ValueSimplifyStateType &VS) { | ||||||||
3694 | BS ^= VS.BS; | ||||||||
3695 | unionAssumed(VS.SimplifiedAssociatedValue); | ||||||||
3696 | return *this; | ||||||||
3697 | } | ||||||||
3698 | |||||||||
3699 | bool operator==(const ValueSimplifyStateType &RHS) const { | ||||||||
3700 | if (isValidState() != RHS.isValidState()) | ||||||||
3701 | return false; | ||||||||
3702 | if (!isValidState() && !RHS.isValidState()) | ||||||||
3703 | return true; | ||||||||
3704 | return SimplifiedAssociatedValue == RHS.SimplifiedAssociatedValue; | ||||||||
3705 | } | ||||||||
3706 | |||||||||
3707 | protected: | ||||||||
3708 | /// The type of the original value. | ||||||||
3709 | Type *Ty; | ||||||||
3710 | |||||||||
3711 | /// Merge \p Other into the currently assumed simplified value | ||||||||
3712 | bool unionAssumed(Optional<Value *> Other); | ||||||||
3713 | |||||||||
3714 | /// Helper to track validity and fixpoint | ||||||||
3715 | BooleanState BS; | ||||||||
3716 | |||||||||
3717 | /// An assumed simplified value. Initially, it is set to Optional::None, which | ||||||||
3718 | /// means that the value is not clear under current assumption. If in the | ||||||||
3719 | /// pessimistic state, getAssumedSimplifiedValue doesn't return this value but | ||||||||
3720 | /// returns orignal associated value. | ||||||||
3721 | Optional<Value *> SimplifiedAssociatedValue; | ||||||||
3722 | }; | ||||||||
3723 | |||||||||
3724 | /// An abstract interface for value simplify abstract attribute. | ||||||||
3725 | struct AAValueSimplify | ||||||||
3726 | : public StateWrapper<ValueSimplifyStateType, AbstractAttribute, Type *> { | ||||||||
3727 | using Base = StateWrapper<ValueSimplifyStateType, AbstractAttribute, Type *>; | ||||||||
3728 | AAValueSimplify(const IRPosition &IRP, Attributor &A) | ||||||||
3729 | : Base(IRP, IRP.getAssociatedType()) {} | ||||||||
3730 | |||||||||
3731 | /// Create an abstract attribute view for the position \p IRP. | ||||||||
3732 | static AAValueSimplify &createForPosition(const IRPosition &IRP, | ||||||||
3733 | Attributor &A); | ||||||||
3734 | |||||||||
3735 | /// See AbstractAttribute::getName() | ||||||||
3736 | const std::string getName() const override { return "AAValueSimplify"; } | ||||||||
3737 | |||||||||
3738 | /// See AbstractAttribute::getIdAddr() | ||||||||
3739 | const char *getIdAddr() const override { return &ID; } | ||||||||
3740 | |||||||||
3741 | /// This function should return true if the type of the \p AA is | ||||||||
3742 | /// AAValueSimplify | ||||||||
3743 | static bool classof(const AbstractAttribute *AA) { | ||||||||
3744 | return (AA->getIdAddr() == &ID); | ||||||||
3745 | } | ||||||||
3746 | |||||||||
3747 | /// Unique ID (due to the unique address) | ||||||||
3748 | static const char ID; | ||||||||
3749 | |||||||||
3750 | private: | ||||||||
3751 | /// Return an assumed simplified value if a single candidate is found. If | ||||||||
3752 | /// there cannot be one, return original value. If it is not clear yet, return | ||||||||
3753 | /// the Optional::NoneType. | ||||||||
3754 | /// | ||||||||
3755 | /// Use `Attributor::getAssumedSimplified` for value simplification. | ||||||||
3756 | virtual Optional<Value *> getAssumedSimplifiedValue(Attributor &A) const = 0; | ||||||||
3757 | |||||||||
3758 | friend struct Attributor; | ||||||||
3759 | }; | ||||||||
3760 | |||||||||
3761 | struct AAHeapToStack : public StateWrapper<BooleanState, AbstractAttribute> { | ||||||||
3762 | using Base = StateWrapper<BooleanState, AbstractAttribute>; | ||||||||
3763 | AAHeapToStack(const IRPosition &IRP, Attributor &A) : Base(IRP) {} | ||||||||
3764 | |||||||||
3765 | /// Returns true if HeapToStack conversion is assumed to be possible. | ||||||||
3766 | virtual bool isAssumedHeapToStack(const CallBase &CB) const = 0; | ||||||||
3767 | |||||||||
3768 | /// Returns true if HeapToStack conversion is assumed and the CB is a | ||||||||
3769 | /// callsite to a free operation to be removed. | ||||||||
3770 | virtual bool isAssumedHeapToStackRemovedFree(CallBase &CB) const = 0; | ||||||||
3771 | |||||||||
3772 | /// Create an abstract attribute view for the position \p IRP. | ||||||||
3773 | static AAHeapToStack &createForPosition(const IRPosition &IRP, Attributor &A); | ||||||||
3774 | |||||||||
3775 | /// See AbstractAttribute::getName() | ||||||||
3776 | const std::string getName() const override { return "AAHeapToStack"; } | ||||||||
3777 | |||||||||
3778 | /// See AbstractAttribute::getIdAddr() | ||||||||
3779 | const char *getIdAddr() const override { return &ID; } | ||||||||
3780 | |||||||||
3781 | /// This function should return true if the type of the \p AA is AAHeapToStack | ||||||||
3782 | static bool classof(const AbstractAttribute *AA) { | ||||||||
3783 | return (AA->getIdAddr() == &ID); | ||||||||
3784 | } | ||||||||
3785 | |||||||||
3786 | /// Unique ID (due to the unique address) | ||||||||
3787 | static const char ID; | ||||||||
3788 | }; | ||||||||
3789 | |||||||||
3790 | /// An abstract interface for privatizability. | ||||||||
3791 | /// | ||||||||
3792 | /// A pointer is privatizable if it can be replaced by a new, private one. | ||||||||
3793 | /// Privatizing pointer reduces the use count, interaction between unrelated | ||||||||
3794 | /// code parts. | ||||||||
3795 | /// | ||||||||
3796 | /// In order for a pointer to be privatizable its value cannot be observed | ||||||||
3797 | /// (=nocapture), it is (for now) not written (=readonly & noalias), we know | ||||||||
3798 | /// what values are necessary to make the private copy look like the original | ||||||||
3799 | /// one, and the values we need can be loaded (=dereferenceable). | ||||||||
3800 | struct AAPrivatizablePtr | ||||||||
3801 | : public StateWrapper<BooleanState, AbstractAttribute> { | ||||||||
3802 | using Base = StateWrapper<BooleanState, AbstractAttribute>; | ||||||||
3803 | AAPrivatizablePtr(const IRPosition &IRP, Attributor &A) : Base(IRP) {} | ||||||||
3804 | |||||||||
3805 | /// Returns true if pointer privatization is assumed to be possible. | ||||||||
3806 | bool isAssumedPrivatizablePtr() const { return getAssumed(); } | ||||||||
3807 | |||||||||
3808 | /// Returns true if pointer privatization is known to be possible. | ||||||||
3809 | bool isKnownPrivatizablePtr() const { return getKnown(); } | ||||||||
3810 | |||||||||
3811 | /// Return the type we can choose for a private copy of the underlying | ||||||||
3812 | /// value. None means it is not clear yet, nullptr means there is none. | ||||||||
3813 | virtual Optional<Type *> getPrivatizableType() const = 0; | ||||||||
3814 | |||||||||
3815 | /// Create an abstract attribute view for the position \p IRP. | ||||||||
3816 | static AAPrivatizablePtr &createForPosition(const IRPosition &IRP, | ||||||||
3817 | Attributor &A); | ||||||||
3818 | |||||||||
3819 | /// See AbstractAttribute::getName() | ||||||||
3820 | const std::string getName() const override { return "AAPrivatizablePtr"; } | ||||||||
3821 | |||||||||
3822 | /// See AbstractAttribute::getIdAddr() | ||||||||
3823 | const char *getIdAddr() const override { return &ID; } | ||||||||
3824 | |||||||||
3825 | /// This function should return true if the type of the \p AA is | ||||||||
3826 | /// AAPricatizablePtr | ||||||||
3827 | static bool classof(const AbstractAttribute *AA) { | ||||||||
3828 | return (AA->getIdAddr() == &ID); | ||||||||
3829 | } | ||||||||
3830 | |||||||||
3831 | /// Unique ID (due to the unique address) | ||||||||
3832 | static const char ID; | ||||||||
3833 | }; | ||||||||
3834 | |||||||||
3835 | /// An abstract interface for memory access kind related attributes | ||||||||
3836 | /// (readnone/readonly/writeonly). | ||||||||
3837 | struct AAMemoryBehavior | ||||||||
3838 | : public IRAttribute< | ||||||||
3839 | Attribute::ReadNone, | ||||||||
3840 | StateWrapper<BitIntegerState<uint8_t, 3>, AbstractAttribute>> { | ||||||||
3841 | AAMemoryBehavior(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {} | ||||||||
3842 | |||||||||
3843 | /// State encoding bits. A set bit in the state means the property holds. | ||||||||
3844 | /// BEST_STATE is the best possible state, 0 the worst possible state. | ||||||||
3845 | enum { | ||||||||
3846 | NO_READS = 1 << 0, | ||||||||
3847 | NO_WRITES = 1 << 1, | ||||||||
3848 | NO_ACCESSES = NO_READS | NO_WRITES, | ||||||||
3849 | |||||||||
3850 | BEST_STATE = NO_ACCESSES, | ||||||||
3851 | }; | ||||||||
3852 | static_assert(BEST_STATE == getBestState(), "Unexpected BEST_STATE value"); | ||||||||
3853 | |||||||||
3854 | /// Return true if we know that the underlying value is not read or accessed | ||||||||
3855 | /// in its respective scope. | ||||||||
3856 | bool isKnownReadNone() const { return isKnown(NO_ACCESSES); } | ||||||||
3857 | |||||||||
3858 | /// Return true if we assume that the underlying value is not read or accessed | ||||||||
3859 | /// in its respective scope. | ||||||||
3860 | bool isAssumedReadNone() const { return isAssumed(NO_ACCESSES); } | ||||||||
3861 | |||||||||
3862 | /// Return true if we know that the underlying value is not accessed | ||||||||
3863 | /// (=written) in its respective scope. | ||||||||
3864 | bool isKnownReadOnly() const { return isKnown(NO_WRITES); } | ||||||||
3865 | |||||||||
3866 | /// Return true if we assume that the underlying value is not accessed | ||||||||
3867 | /// (=written) in its respective scope. | ||||||||
3868 | bool isAssumedReadOnly() const { return isAssumed(NO_WRITES); } | ||||||||
3869 | |||||||||
3870 | /// Return true if we know that the underlying value is not read in its | ||||||||
3871 | /// respective scope. | ||||||||
3872 | bool isKnownWriteOnly() const { return isKnown(NO_READS); } | ||||||||
3873 | |||||||||
3874 | /// Return true if we assume that the underlying value is not read in its | ||||||||
3875 | /// respective scope. | ||||||||
3876 | bool isAssumedWriteOnly() const { return isAssumed(NO_READS); } | ||||||||
3877 | |||||||||
3878 | /// Create an abstract attribute view for the position \p IRP. | ||||||||
3879 | static AAMemoryBehavior &createForPosition(const IRPosition &IRP, | ||||||||
3880 | Attributor &A); | ||||||||
3881 | |||||||||
3882 | /// See AbstractAttribute::getName() | ||||||||
3883 | const std::string getName() const override { return "AAMemoryBehavior"; } | ||||||||
3884 | |||||||||
3885 | /// See AbstractAttribute::getIdAddr() | ||||||||
3886 | const char *getIdAddr() const override { return &ID; } | ||||||||
3887 | |||||||||
3888 | /// This function should return true if the type of the \p AA is | ||||||||
3889 | /// AAMemoryBehavior | ||||||||
3890 | static bool classof(const AbstractAttribute *AA) { | ||||||||
3891 | return (AA->getIdAddr() == &ID); | ||||||||
3892 | } | ||||||||
3893 | |||||||||
3894 | /// Unique ID (due to the unique address) | ||||||||
3895 | static const char ID; | ||||||||
3896 | }; | ||||||||
3897 | |||||||||
3898 | /// An abstract interface for all memory location attributes | ||||||||
3899 | /// (readnone/argmemonly/inaccessiblememonly/inaccessibleorargmemonly). | ||||||||
3900 | struct AAMemoryLocation | ||||||||
3901 | : public IRAttribute< | ||||||||
3902 | Attribute::ReadNone, | ||||||||
3903 | StateWrapper<BitIntegerState<uint32_t, 511>, AbstractAttribute>> { | ||||||||
3904 | using MemoryLocationsKind = StateType::base_t; | ||||||||
3905 | |||||||||
3906 | AAMemoryLocation(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {} | ||||||||
3907 | |||||||||
3908 | /// Encoding of different locations that could be accessed by a memory | ||||||||
3909 | /// access. | ||||||||
3910 | enum { | ||||||||
3911 | ALL_LOCATIONS = 0, | ||||||||
3912 | NO_LOCAL_MEM = 1 << 0, | ||||||||
3913 | NO_CONST_MEM = 1 << 1, | ||||||||
3914 | NO_GLOBAL_INTERNAL_MEM = 1 << 2, | ||||||||
3915 | NO_GLOBAL_EXTERNAL_MEM = 1 << 3, | ||||||||
3916 | NO_GLOBAL_MEM = NO_GLOBAL_INTERNAL_MEM | NO_GLOBAL_EXTERNAL_MEM, | ||||||||
3917 | NO_ARGUMENT_MEM = 1 << 4, | ||||||||
3918 | NO_INACCESSIBLE_MEM = 1 << 5, | ||||||||
3919 | NO_MALLOCED_MEM = 1 << 6, | ||||||||
3920 | NO_UNKOWN_MEM = 1 << 7, | ||||||||
3921 | NO_LOCATIONS = NO_LOCAL_MEM | NO_CONST_MEM | NO_GLOBAL_INTERNAL_MEM | | ||||||||
3922 | NO_GLOBAL_EXTERNAL_MEM | NO_ARGUMENT_MEM | | ||||||||
3923 | NO_INACCESSIBLE_MEM | NO_MALLOCED_MEM | NO_UNKOWN_MEM, | ||||||||
3924 | |||||||||
3925 | // Helper bit to track if we gave up or not. | ||||||||
3926 | VALID_STATE = NO_LOCATIONS + 1, | ||||||||
3927 | |||||||||
3928 | BEST_STATE = NO_LOCATIONS | VALID_STATE, | ||||||||
3929 | }; | ||||||||
3930 | static_assert(BEST_STATE == getBestState(), "Unexpected BEST_STATE value"); | ||||||||
3931 | |||||||||
3932 | /// Return true if we know that the associated functions has no observable | ||||||||
3933 | /// accesses. | ||||||||
3934 | bool isKnownReadNone() const { return isKnown(NO_LOCATIONS); } | ||||||||
3935 | |||||||||
3936 | /// Return true if we assume that the associated functions has no observable | ||||||||
3937 | /// accesses. | ||||||||
3938 | bool isAssumedReadNone() const { | ||||||||
3939 | return isAssumed(NO_LOCATIONS) || isAssumedStackOnly(); | ||||||||
3940 | } | ||||||||
3941 | |||||||||
3942 | /// Return true if we know that the associated functions has at most | ||||||||
3943 | /// local/stack accesses. | ||||||||
3944 | bool isKnowStackOnly() const { | ||||||||
3945 | return isKnown(inverseLocation(NO_LOCAL_MEM, true, true)); | ||||||||
3946 | } | ||||||||
3947 | |||||||||
3948 | /// Return true if we assume that the associated functions has at most | ||||||||
3949 | /// local/stack accesses. | ||||||||
3950 | bool isAssumedStackOnly() const { | ||||||||
3951 | return isAssumed(inverseLocation(NO_LOCAL_MEM, true, true)); | ||||||||
3952 | } | ||||||||
3953 | |||||||||
3954 | /// Return true if we know that the underlying value will only access | ||||||||
3955 | /// inaccesible memory only (see Attribute::InaccessibleMemOnly). | ||||||||
3956 | bool isKnownInaccessibleMemOnly() const { | ||||||||
3957 | return isKnown(inverseLocation(NO_INACCESSIBLE_MEM, true, true)); | ||||||||
3958 | } | ||||||||
3959 | |||||||||
3960 | /// Return true if we assume that the underlying value will only access | ||||||||
3961 | /// inaccesible memory only (see Attribute::InaccessibleMemOnly). | ||||||||
3962 | bool isAssumedInaccessibleMemOnly() const { | ||||||||
3963 | return isAssumed(inverseLocation(NO_INACCESSIBLE_MEM, true, true)); | ||||||||
3964 | } | ||||||||
3965 | |||||||||
3966 | /// Return true if we know that the underlying value will only access | ||||||||
3967 | /// argument pointees (see Attribute::ArgMemOnly). | ||||||||
3968 | bool isKnownArgMemOnly() const { | ||||||||
3969 | return isKnown(inverseLocation(NO_ARGUMENT_MEM, true, true)); | ||||||||
3970 | } | ||||||||
3971 | |||||||||
3972 | /// Return true if we assume that the underlying value will only access | ||||||||
3973 | /// argument pointees (see Attribute::ArgMemOnly). | ||||||||
3974 | bool isAssumedArgMemOnly() const { | ||||||||
3975 | return isAssumed(inverseLocation(NO_ARGUMENT_MEM, true, true)); | ||||||||
3976 | } | ||||||||
3977 | |||||||||
3978 | /// Return true if we know that the underlying value will only access | ||||||||
3979 | /// inaccesible memory or argument pointees (see | ||||||||
3980 | /// Attribute::InaccessibleOrArgMemOnly). | ||||||||
3981 | bool isKnownInaccessibleOrArgMemOnly() const { | ||||||||
3982 | return isKnown( | ||||||||
3983 | inverseLocation(NO_INACCESSIBLE_MEM | NO_ARGUMENT_MEM, true, true)); | ||||||||
3984 | } | ||||||||
3985 | |||||||||
3986 | /// Return true if we assume that the underlying value will only access | ||||||||
3987 | /// inaccesible memory or argument pointees (see | ||||||||
3988 | /// Attribute::InaccessibleOrArgMemOnly). | ||||||||
3989 | bool isAssumedInaccessibleOrArgMemOnly() const { | ||||||||
3990 | return isAssumed( | ||||||||
3991 | inverseLocation(NO_INACCESSIBLE_MEM | NO_ARGUMENT_MEM, true, true)); | ||||||||
3992 | } | ||||||||
3993 | |||||||||
3994 | /// Return true if the underlying value may access memory through arguement | ||||||||
3995 | /// pointers of the associated function, if any. | ||||||||
3996 | bool mayAccessArgMem() const { return !isAssumed(NO_ARGUMENT_MEM); } | ||||||||
3997 | |||||||||
3998 | /// Return true if only the memory locations specififed by \p MLK are assumed | ||||||||
3999 | /// to be accessed by the associated function. | ||||||||
4000 | bool isAssumedSpecifiedMemOnly(MemoryLocationsKind MLK) const { | ||||||||
4001 | return isAssumed(MLK); | ||||||||
4002 | } | ||||||||
4003 | |||||||||
4004 | /// Return the locations that are assumed to be not accessed by the associated | ||||||||
4005 | /// function, if any. | ||||||||
4006 | MemoryLocationsKind getAssumedNotAccessedLocation() const { | ||||||||
4007 | return getAssumed(); | ||||||||
4008 | } | ||||||||
4009 | |||||||||
4010 | /// Return the inverse of location \p Loc, thus for NO_XXX the return | ||||||||
4011 | /// describes ONLY_XXX. The flags \p AndLocalMem and \p AndConstMem determine | ||||||||
4012 | /// if local (=stack) and constant memory are allowed as well. Most of the | ||||||||
4013 | /// time we do want them to be included, e.g., argmemonly allows accesses via | ||||||||
4014 | /// argument pointers or local or constant memory accesses. | ||||||||
4015 | static MemoryLocationsKind | ||||||||
4016 | inverseLocation(MemoryLocationsKind Loc, bool AndLocalMem, bool AndConstMem) { | ||||||||
4017 | return NO_LOCATIONS & ~(Loc | (AndLocalMem ? NO_LOCAL_MEM : 0) | | ||||||||
4018 | (AndConstMem ? NO_CONST_MEM : 0)); | ||||||||
4019 | }; | ||||||||
4020 | |||||||||
4021 | /// Return the locations encoded by \p MLK as a readable string. | ||||||||
4022 | static std::string getMemoryLocationsAsStr(MemoryLocationsKind MLK); | ||||||||
4023 | |||||||||
4024 | /// Simple enum to distinguish read/write/read-write accesses. | ||||||||
4025 | enum AccessKind { | ||||||||
4026 | NONE = 0, | ||||||||
4027 | READ = 1 << 0, | ||||||||
4028 | WRITE = 1 << 1, | ||||||||
4029 | READ_WRITE = READ | WRITE, | ||||||||
4030 | }; | ||||||||
4031 | |||||||||
4032 | /// Check \p Pred on all accesses to the memory kinds specified by \p MLK. | ||||||||
4033 | /// | ||||||||
4034 | /// This method will evaluate \p Pred on all accesses (access instruction + | ||||||||
4035 | /// underlying accessed memory pointer) and it will return true if \p Pred | ||||||||
4036 | /// holds every time. | ||||||||
4037 | virtual bool checkForAllAccessesToMemoryKind( | ||||||||
4038 | function_ref<bool(const Instruction *, const Value *, AccessKind, | ||||||||
4039 | MemoryLocationsKind)> | ||||||||
4040 | Pred, | ||||||||
4041 | MemoryLocationsKind MLK) const = 0; | ||||||||
4042 | |||||||||
4043 | /// Create an abstract attribute view for the position \p IRP. | ||||||||
4044 | static AAMemoryLocation &createForPosition(const IRPosition &IRP, | ||||||||
4045 | Attributor &A); | ||||||||
4046 | |||||||||
4047 | /// See AbstractState::getAsStr(). | ||||||||
4048 | const std::string getAsStr() const override { | ||||||||
4049 | return getMemoryLocationsAsStr(getAssumedNotAccessedLocation()); | ||||||||
4050 | } | ||||||||
4051 | |||||||||
4052 | /// See AbstractAttribute::getName() | ||||||||
4053 | const std::string getName() const override { return "AAMemoryLocation"; } | ||||||||
4054 | |||||||||
4055 | /// See AbstractAttribute::getIdAddr() | ||||||||
4056 | const char *getIdAddr() const override { return &ID; } | ||||||||
4057 | |||||||||
4058 | /// This function should return true if the type of the \p AA is | ||||||||
4059 | /// AAMemoryLocation | ||||||||
4060 | static bool classof(const AbstractAttribute *AA) { | ||||||||
4061 | return (AA->getIdAddr() == &ID); | ||||||||
4062 | } | ||||||||
4063 | |||||||||
4064 | /// Unique ID (due to the unique address) | ||||||||
4065 | static const char ID; | ||||||||
4066 | }; | ||||||||
4067 | |||||||||
4068 | /// An abstract interface for range value analysis. | ||||||||
4069 | struct AAValueConstantRange | ||||||||
4070 | : public StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t> { | ||||||||
4071 | using Base = StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t>; | ||||||||
4072 | AAValueConstantRange(const IRPosition &IRP, Attributor &A) | ||||||||
4073 | : Base(IRP, IRP.getAssociatedType()->getIntegerBitWidth()) {} | ||||||||
4074 | |||||||||
4075 | /// See AbstractAttribute::getState(...). | ||||||||
4076 | IntegerRangeState &getState() override { return *this; } | ||||||||
4077 | const IntegerRangeState &getState() const override { return *this; } | ||||||||
4078 | |||||||||
4079 | /// Create an abstract attribute view for the position \p IRP. | ||||||||
4080 | static AAValueConstantRange &createForPosition(const IRPosition &IRP, | ||||||||
4081 | Attributor &A); | ||||||||
4082 | |||||||||
4083 | /// Return an assumed range for the associated value a program point \p CtxI. | ||||||||
4084 | /// If \p I is nullptr, simply return an assumed range. | ||||||||
4085 | virtual ConstantRange | ||||||||
4086 | getAssumedConstantRange(Attributor &A, | ||||||||
4087 | const Instruction *CtxI = nullptr) const = 0; | ||||||||
4088 | |||||||||
4089 | /// Return a known range for the associated value at a program point \p CtxI. | ||||||||
4090 | /// If \p I is nullptr, simply return a known range. | ||||||||
4091 | virtual ConstantRange | ||||||||
4092 | getKnownConstantRange(Attributor &A, | ||||||||
4093 | const Instruction *CtxI = nullptr) const = 0; | ||||||||
4094 | |||||||||
4095 | /// Return an assumed constant for the associated value a program point \p | ||||||||
4096 | /// CtxI. | ||||||||
4097 | Optional<ConstantInt *> | ||||||||
4098 | getAssumedConstantInt(Attributor &A, | ||||||||
4099 | const Instruction *CtxI = nullptr) const { | ||||||||
4100 | ConstantRange RangeV = getAssumedConstantRange(A, CtxI); | ||||||||
4101 | if (auto *C = RangeV.getSingleElement()) | ||||||||
4102 | return cast<ConstantInt>( | ||||||||
4103 | ConstantInt::get(getAssociatedValue().getType(), *C)); | ||||||||
4104 | if (RangeV.isEmptySet()) | ||||||||
4105 | return llvm::None; | ||||||||
4106 | return nullptr; | ||||||||
4107 | } | ||||||||
4108 | |||||||||
4109 | /// See AbstractAttribute::getName() | ||||||||
4110 | const std::string getName() const override { return "AAValueConstantRange"; } | ||||||||
4111 | |||||||||
4112 | /// See AbstractAttribute::getIdAddr() | ||||||||
4113 | const char *getIdAddr() const override { return &ID; } | ||||||||
4114 | |||||||||
4115 | /// This function should return true if the type of the \p AA is | ||||||||
4116 | /// AAValueConstantRange | ||||||||
4117 | static bool classof(const AbstractAttribute *AA) { | ||||||||
4118 | return (AA->getIdAddr() == &ID); | ||||||||
4119 | } | ||||||||
4120 | |||||||||
4121 | /// Unique ID (due to the unique address) | ||||||||
4122 | static const char ID; | ||||||||
4123 | }; | ||||||||
4124 | |||||||||
4125 | /// A class for a set state. | ||||||||
4126 | /// The assumed boolean state indicates whether the corresponding set is full | ||||||||
4127 | /// set or not. If the assumed state is false, this is the worst state. The | ||||||||
4128 | /// worst state (invalid state) of set of potential values is when the set | ||||||||
4129 | /// contains every possible value (i.e. we cannot in any way limit the value | ||||||||
4130 | /// that the target position can take). That never happens naturally, we only | ||||||||
4131 | /// force it. As for the conditions under which we force it, see | ||||||||
4132 | /// AAPotentialValues. | ||||||||
4133 | template <typename MemberTy, typename KeyInfo = DenseMapInfo<MemberTy>> | ||||||||
4134 | struct PotentialValuesState : AbstractState { | ||||||||
4135 | using SetTy = DenseSet<MemberTy, KeyInfo>; | ||||||||
4136 | |||||||||
4137 | PotentialValuesState() : IsValidState(true), UndefIsContained(false) {} | ||||||||
4138 | |||||||||
4139 | PotentialValuesState(bool IsValid) | ||||||||
4140 | : IsValidState(IsValid), UndefIsContained(false) {} | ||||||||
4141 | |||||||||
4142 | /// See AbstractState::isValidState(...) | ||||||||
4143 | bool isValidState() const override { return IsValidState.isValidState(); } | ||||||||
4144 | |||||||||
4145 | /// See AbstractState::isAtFixpoint(...) | ||||||||
4146 | bool isAtFixpoint() const override { return IsValidState.isAtFixpoint(); } | ||||||||
4147 | |||||||||
4148 | /// See AbstractState::indicatePessimisticFixpoint(...) | ||||||||
4149 | ChangeStatus indicatePessimisticFixpoint() override { | ||||||||
4150 | return IsValidState.indicatePessimisticFixpoint(); | ||||||||
4151 | } | ||||||||
4152 | |||||||||
4153 | /// See AbstractState::indicateOptimisticFixpoint(...) | ||||||||
4154 | ChangeStatus indicateOptimisticFixpoint() override { | ||||||||
4155 | return IsValidState.indicateOptimisticFixpoint(); | ||||||||
4156 | } | ||||||||
4157 | |||||||||
4158 | /// Return the assumed state | ||||||||
4159 | PotentialValuesState &getAssumed() { return *this; } | ||||||||
4160 | const PotentialValuesState &getAssumed() const { return *this; } | ||||||||
4161 | |||||||||
4162 | /// Return this set. We should check whether this set is valid or not by | ||||||||
4163 | /// isValidState() before calling this function. | ||||||||
4164 | const SetTy &getAssumedSet() const { | ||||||||
4165 | assert(isValidState() && "This set shoud not be used when it is invalid!")(static_cast <bool> (isValidState() && "This set shoud not be used when it is invalid!" ) ? void (0) : __assert_fail ("isValidState() && \"This set shoud not be used when it is invalid!\"" , "llvm/include/llvm/Transforms/IPO/Attributor.h", 4165, __extension__ __PRETTY_FUNCTION__)); | ||||||||
4166 | return Set; | ||||||||
4167 | } | ||||||||
4168 | |||||||||
4169 | /// Returns whether this state contains an undef value or not. | ||||||||
4170 | bool undefIsContained() const { | ||||||||
4171 | assert(isValidState() && "This flag shoud not be used when it is invalid!")(static_cast <bool> (isValidState() && "This flag shoud not be used when it is invalid!" ) ? void (0) : __assert_fail ("isValidState() && \"This flag shoud not be used when it is invalid!\"" , "llvm/include/llvm/Transforms/IPO/Attributor.h", 4171, __extension__ __PRETTY_FUNCTION__)); | ||||||||
4172 | return UndefIsContained; | ||||||||
4173 | } | ||||||||
4174 | |||||||||
4175 | bool operator==(const PotentialValuesState &RHS) const { | ||||||||
4176 | if (isValidState() != RHS.isValidState()) | ||||||||
4177 | return false; | ||||||||
4178 | if (!isValidState() && !RHS.isValidState()) | ||||||||
4179 | return true; | ||||||||
4180 | if (undefIsContained() != RHS.undefIsContained()) | ||||||||
4181 | return false; | ||||||||
4182 | return Set == RHS.getAssumedSet(); | ||||||||
4183 | } | ||||||||
4184 | |||||||||
4185 | /// Maximum number of potential values to be tracked. | ||||||||
4186 | /// This is set by -attributor-max-potential-values command line option | ||||||||
4187 | static unsigned MaxPotentialValues; | ||||||||
4188 | |||||||||
4189 | /// Return empty set as the best state of potential values. | ||||||||
4190 | static PotentialValuesState getBestState() { | ||||||||
4191 | return PotentialValuesState(true); | ||||||||
4192 | } | ||||||||
4193 | |||||||||
4194 | static PotentialValuesState getBestState(PotentialValuesState &PVS) { | ||||||||
4195 | return getBestState(); | ||||||||
4196 | } | ||||||||
4197 | |||||||||
4198 | /// Return full set as the worst state of potential values. | ||||||||
4199 | static PotentialValuesState getWorstState() { | ||||||||
4200 | return PotentialValuesState(false); | ||||||||
4201 | } | ||||||||
4202 | |||||||||
4203 | /// Union assumed set with the passed value. | ||||||||
4204 | void unionAssumed(const MemberTy &C) { insert(C); } | ||||||||
4205 | |||||||||
4206 | /// Union assumed set with assumed set of the passed state \p PVS. | ||||||||
4207 | void unionAssumed(const PotentialValuesState &PVS) { unionWith(PVS); } | ||||||||
4208 | |||||||||
4209 | /// Union assumed set with an undef value. | ||||||||
4210 | void unionAssumedWithUndef() { unionWithUndef(); } | ||||||||
4211 | |||||||||
4212 | /// "Clamp" this state with \p PVS. | ||||||||
4213 | PotentialValuesState operator^=(const PotentialValuesState &PVS) { | ||||||||
4214 | IsValidState ^= PVS.IsValidState; | ||||||||
4215 | unionAssumed(PVS); | ||||||||
4216 | return *this; | ||||||||
4217 | } | ||||||||
4218 | |||||||||
4219 | PotentialValuesState operator&=(const PotentialValuesState &PVS) { | ||||||||
4220 | IsValidState &= PVS.IsValidState; | ||||||||
4221 | unionAssumed(PVS); | ||||||||
4222 | return *this; | ||||||||
4223 | } | ||||||||
4224 | |||||||||
4225 | private: | ||||||||
4226 | /// Check the size of this set, and invalidate when the size is no | ||||||||
4227 | /// less than \p MaxPotentialValues threshold. | ||||||||
4228 | void checkAndInvalidate() { | ||||||||
4229 | if (Set.size() >= MaxPotentialValues) | ||||||||
4230 | indicatePessimisticFixpoint(); | ||||||||
4231 | else | ||||||||
4232 | reduceUndefValue(); | ||||||||
4233 | } | ||||||||
4234 | |||||||||
4235 | /// If this state contains both undef and not undef, we can reduce | ||||||||
4236 | /// undef to the not undef value. | ||||||||
4237 | void reduceUndefValue() { UndefIsContained = UndefIsContained & Set.empty(); } | ||||||||
4238 | |||||||||
4239 | /// Insert an element into this set. | ||||||||
4240 | void insert(const MemberTy &C) { | ||||||||
4241 | if (!isValidState()) | ||||||||
4242 | return; | ||||||||
4243 | Set.insert(C); | ||||||||
4244 | checkAndInvalidate(); | ||||||||
4245 | } | ||||||||
4246 | |||||||||
4247 | /// Take union with R. | ||||||||
4248 | void unionWith(const PotentialValuesState &R) { | ||||||||
4249 | /// If this is a full set, do nothing. | ||||||||
4250 | if (!isValidState()) | ||||||||
4251 | return; | ||||||||
4252 | /// If R is full set, change L to a full set. | ||||||||
4253 | if (!R.isValidState()) { | ||||||||
4254 | indicatePessimisticFixpoint(); | ||||||||
4255 | return; | ||||||||
4256 | } | ||||||||
4257 | for (const MemberTy &C : R.Set) | ||||||||
4258 | Set.insert(C); | ||||||||
4259 | UndefIsContained |= R.undefIsContained(); | ||||||||
4260 | checkAndInvalidate(); | ||||||||
4261 | } | ||||||||
4262 | |||||||||
4263 | /// Take union with an undef value. | ||||||||
4264 | void unionWithUndef() { | ||||||||
4265 | UndefIsContained = true; | ||||||||
4266 | reduceUndefValue(); | ||||||||
4267 | } | ||||||||
4268 | |||||||||
4269 | /// Take intersection with R. | ||||||||
4270 | void intersectWith(const PotentialValuesState &R) { | ||||||||
4271 | /// If R is a full set, do nothing. | ||||||||
4272 | if (!R.isValidState()) | ||||||||
4273 | return; | ||||||||
4274 | /// If this is a full set, change this to R. | ||||||||
4275 | if (!isValidState()) { | ||||||||
4276 | *this = R; | ||||||||
4277 | return; | ||||||||
4278 | } | ||||||||
4279 | SetTy IntersectSet; | ||||||||
4280 | for (const MemberTy &C : Set) { | ||||||||
4281 | if (R.Set.count(C)) | ||||||||
4282 | IntersectSet.insert(C); | ||||||||
4283 | } | ||||||||
4284 | Set = IntersectSet; | ||||||||
4285 | UndefIsContained &= R.undefIsContained(); | ||||||||
4286 | reduceUndefValue(); | ||||||||
4287 | } | ||||||||
4288 | |||||||||
4289 | /// A helper state which indicate whether this state is valid or not. | ||||||||
4290 | BooleanState IsValidState; | ||||||||
4291 | |||||||||
4292 | /// Container for potential values | ||||||||
4293 | SetTy Set; | ||||||||
4294 | |||||||||
4295 | /// Flag for undef value | ||||||||
4296 | bool UndefIsContained; | ||||||||
4297 | }; | ||||||||
4298 | |||||||||
4299 | using PotentialConstantIntValuesState = PotentialValuesState<APInt>; | ||||||||
4300 | |||||||||
4301 | raw_ostream &operator<<(raw_ostream &OS, | ||||||||
4302 | const PotentialConstantIntValuesState &R); | ||||||||
4303 | |||||||||
4304 | /// An abstract interface for potential values analysis. | ||||||||
4305 | /// | ||||||||
4306 | /// This AA collects potential values for each IR position. | ||||||||
4307 | /// An assumed set of potential values is initialized with the empty set (the | ||||||||
4308 | /// best state) and it will grow monotonically as we find more potential values | ||||||||
4309 | /// for this position. | ||||||||
4310 | /// The set might be forced to the worst state, that is, to contain every | ||||||||
4311 | /// possible value for this position in 2 cases. | ||||||||
4312 | /// 1. We surpassed the \p MaxPotentialValues threshold. This includes the | ||||||||
4313 | /// case that this position is affected (e.g. because of an operation) by a | ||||||||
4314 | /// Value that is in the worst state. | ||||||||
4315 | /// 2. We tried to initialize on a Value that we cannot handle (e.g. an | ||||||||
4316 | /// operator we do not currently handle). | ||||||||
4317 | /// | ||||||||
4318 | /// TODO: Support values other than constant integers. | ||||||||
4319 | struct AAPotentialValues | ||||||||
4320 | : public StateWrapper<PotentialConstantIntValuesState, AbstractAttribute> { | ||||||||
4321 | using Base = StateWrapper<PotentialConstantIntValuesState, AbstractAttribute>; | ||||||||
4322 | AAPotentialValues(const IRPosition &IRP, Attributor &A) : Base(IRP) {} | ||||||||
4323 | |||||||||
4324 | /// See AbstractAttribute::getState(...). | ||||||||
4325 | PotentialConstantIntValuesState &getState() override { return *this; } | ||||||||
4326 | const PotentialConstantIntValuesState &getState() const override { | ||||||||
4327 | return *this; | ||||||||
4328 | } | ||||||||
4329 | |||||||||
4330 | /// Create an abstract attribute view for the position \p IRP. | ||||||||
4331 | static AAPotentialValues &createForPosition(const IRPosition &IRP, | ||||||||
4332 | Attributor &A); | ||||||||
4333 | |||||||||
4334 | /// Return assumed constant for the associated value | ||||||||
4335 | Optional<ConstantInt *> | ||||||||
4336 | getAssumedConstantInt(Attributor &A, | ||||||||
4337 | const Instruction *CtxI = nullptr) const { | ||||||||
4338 | if (!isValidState()) | ||||||||
4339 | return nullptr; | ||||||||
4340 | if (getAssumedSet().size() == 1) | ||||||||
4341 | return cast<ConstantInt>(ConstantInt::get(getAssociatedValue().getType(), | ||||||||
4342 | *(getAssumedSet().begin()))); | ||||||||
4343 | if (getAssumedSet().size() == 0) { | ||||||||
4344 | if (undefIsContained()) | ||||||||
4345 | return cast<ConstantInt>( | ||||||||
4346 | ConstantInt::get(getAssociatedValue().getType(), 0)); | ||||||||
4347 | return llvm::None; | ||||||||
4348 | } | ||||||||
4349 | |||||||||
4350 | return nullptr; | ||||||||
4351 | } | ||||||||
4352 | |||||||||
4353 | /// See AbstractAttribute::getName() | ||||||||
4354 | const std::string getName() const override { return "AAPotentialValues"; } | ||||||||
4355 | |||||||||
4356 | /// See AbstractAttribute::getIdAddr() | ||||||||
4357 | const char *getIdAddr() const override { return &ID; } | ||||||||
4358 | |||||||||
4359 | /// This function should return true if the type of the \p AA is | ||||||||
4360 | /// AAPotentialValues | ||||||||
4361 | static bool classof(const AbstractAttribute *AA) { | ||||||||
4362 | return (AA->getIdAddr() == &ID); | ||||||||
4363 | } | ||||||||
4364 | |||||||||
4365 | /// Unique ID (due to the unique address) | ||||||||
4366 | static const char ID; | ||||||||
4367 | }; | ||||||||
4368 | |||||||||
4369 | /// An abstract interface for all noundef attributes. | ||||||||
4370 | struct AANoUndef | ||||||||
4371 | : public IRAttribute<Attribute::NoUndef, | ||||||||
4372 | StateWrapper<BooleanState, AbstractAttribute>> { | ||||||||
4373 | AANoUndef(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {} | ||||||||
4374 | |||||||||
4375 | /// Return true if we assume that the underlying value is noundef. | ||||||||
4376 | bool isAssumedNoUndef() const { return getAssumed(); } | ||||||||
4377 | |||||||||
4378 | /// Return true if we know that underlying value is noundef. | ||||||||
4379 | bool isKnownNoUndef() const { return getKnown(); } | ||||||||
4380 | |||||||||
4381 | /// Create an abstract attribute view for the position \p IRP. | ||||||||
4382 | static AANoUndef &createForPosition(const IRPosition &IRP, Attributor &A); | ||||||||
4383 | |||||||||
4384 | /// See AbstractAttribute::getName() | ||||||||
4385 | const std::string getName() const override { return "AANoUndef"; } | ||||||||
4386 | |||||||||
4387 | /// See AbstractAttribute::getIdAddr() | ||||||||
4388 | const char *getIdAddr() const override { return &ID; } | ||||||||
4389 | |||||||||
4390 | /// This function should return true if the type of the \p AA is AANoUndef | ||||||||
4391 | static bool classof(const AbstractAttribute *AA) { | ||||||||
4392 | return (AA->getIdAddr() == &ID); | ||||||||
4393 | } | ||||||||
4394 | |||||||||
4395 | /// Unique ID (due to the unique address) | ||||||||
4396 | static const char ID; | ||||||||
4397 | }; | ||||||||
4398 | |||||||||
4399 | struct AACallGraphNode; | ||||||||
4400 | struct AACallEdges; | ||||||||
4401 | |||||||||
4402 | /// An Iterator for call edges, creates AACallEdges attributes in a lazy way. | ||||||||
4403 | /// This iterator becomes invalid if the underlying edge list changes. | ||||||||
4404 | /// So This shouldn't outlive a iteration of Attributor. | ||||||||
4405 | class AACallEdgeIterator | ||||||||
4406 | : public iterator_adaptor_base<AACallEdgeIterator, | ||||||||
4407 | SetVector<Function *>::iterator> { | ||||||||
4408 | AACallEdgeIterator(Attributor &A, SetVector<Function *>::iterator Begin) | ||||||||
4409 | : iterator_adaptor_base(Begin), A(A) {} | ||||||||
4410 | |||||||||
4411 | public: | ||||||||
4412 | AACallGraphNode *operator*() const; | ||||||||
4413 | |||||||||
4414 | private: | ||||||||
4415 | Attributor &A; | ||||||||
4416 | friend AACallEdges; | ||||||||
4417 | friend AttributorCallGraph; | ||||||||
4418 | }; | ||||||||
4419 | |||||||||
4420 | struct AACallGraphNode { | ||||||||
4421 | AACallGraphNode(Attributor &A) : A(A) {} | ||||||||
4422 | virtual ~AACallGraphNode() {} | ||||||||
4423 | |||||||||
4424 | virtual AACallEdgeIterator optimisticEdgesBegin() const = 0; | ||||||||
4425 | virtual AACallEdgeIterator optimisticEdgesEnd() const = 0; | ||||||||
4426 | |||||||||
4427 | /// Iterator range for exploring the call graph. | ||||||||
4428 | iterator_range<AACallEdgeIterator> optimisticEdgesRange() const { | ||||||||
4429 | return iterator_range<AACallEdgeIterator>(optimisticEdgesBegin(), | ||||||||
4430 | optimisticEdgesEnd()); | ||||||||
4431 | } | ||||||||
4432 | |||||||||
4433 | protected: | ||||||||
4434 | /// Reference to Attributor needed for GraphTraits implementation. | ||||||||
4435 | Attributor &A; | ||||||||
4436 | }; | ||||||||
4437 | |||||||||
4438 | /// An abstract state for querying live call edges. | ||||||||
4439 | /// This interface uses the Attributor's optimistic liveness | ||||||||
4440 | /// information to compute the edges that are alive. | ||||||||
4441 | struct AACallEdges : public StateWrapper<BooleanState, AbstractAttribute>, | ||||||||
4442 | AACallGraphNode { | ||||||||
4443 | using Base = StateWrapper<BooleanState, AbstractAttribute>; | ||||||||
4444 | |||||||||
4445 | AACallEdges(const IRPosition &IRP, Attributor &A) | ||||||||
4446 | : Base(IRP), AACallGraphNode(A) {} | ||||||||
4447 | |||||||||
4448 | /// Get the optimistic edges. | ||||||||
4449 | virtual const SetVector<Function *> &getOptimisticEdges() const = 0; | ||||||||
4450 | |||||||||
4451 | /// Is there any call with a unknown callee. | ||||||||
4452 | virtual bool hasUnknownCallee() const = 0; | ||||||||
4453 | |||||||||
4454 | /// Is there any call with a unknown callee, excluding any inline asm. | ||||||||
4455 | virtual bool hasNonAsmUnknownCallee() const = 0; | ||||||||
4456 | |||||||||
4457 | /// Iterator for exploring the call graph. | ||||||||
4458 | AACallEdgeIterator optimisticEdgesBegin() const override { | ||||||||
4459 | return AACallEdgeIterator(A, getOptimisticEdges().begin()); | ||||||||
4460 | } | ||||||||
4461 | |||||||||
4462 | /// Iterator for exploring the call graph. | ||||||||
4463 | AACallEdgeIterator optimisticEdgesEnd() const override { | ||||||||
4464 | return AACallEdgeIterator(A, getOptimisticEdges().end()); | ||||||||
4465 | } | ||||||||
4466 | |||||||||
4467 | /// Create an abstract attribute view for the position \p IRP. | ||||||||
4468 | static AACallEdges &createForPosition(const IRPosition &IRP, Attributor &A); | ||||||||
4469 | |||||||||
4470 | /// See AbstractAttribute::getName() | ||||||||
4471 | const std::string getName() const override { return "AACallEdges"; } | ||||||||
4472 | |||||||||
4473 | /// See AbstractAttribute::getIdAddr() | ||||||||
4474 | const char *getIdAddr() const override { return &ID; } | ||||||||
4475 | |||||||||
4476 | /// This function should return true if the type of the \p AA is AACallEdges. | ||||||||
4477 | static bool classof(const AbstractAttribute *AA) { | ||||||||
4478 | return (AA->getIdAddr() == &ID); | ||||||||
4479 | } | ||||||||
4480 | |||||||||
4481 | /// Unique ID (due to the unique address) | ||||||||
4482 | static const char ID; | ||||||||
4483 | }; | ||||||||
4484 | |||||||||
4485 | // Synthetic root node for the Attributor's internal call graph. | ||||||||
4486 | struct AttributorCallGraph : public AACallGraphNode { | ||||||||
4487 | AttributorCallGraph(Attributor &A) : AACallGraphNode(A) {} | ||||||||
4488 | virtual ~AttributorCallGraph() {} | ||||||||
4489 | |||||||||
4490 | AACallEdgeIterator optimisticEdgesBegin() const override { | ||||||||
4491 | return AACallEdgeIterator(A, A.Functions.begin()); | ||||||||
4492 | } | ||||||||
4493 | |||||||||
4494 | AACallEdgeIterator optimisticEdgesEnd() const override { | ||||||||
4495 | return AACallEdgeIterator(A, A.Functions.end()); | ||||||||
4496 | } | ||||||||
4497 | |||||||||
4498 | /// Force populate the entire call graph. | ||||||||
4499 | void populateAll() const { | ||||||||
4500 | for (const AACallGraphNode *AA : optimisticEdgesRange()) { | ||||||||
4501 | // Nothing else to do here. | ||||||||
4502 | (void)AA; | ||||||||
4503 | } | ||||||||
4504 | } | ||||||||
4505 | |||||||||
4506 | void print(); | ||||||||
4507 | }; | ||||||||
4508 | |||||||||
4509 | template <> struct GraphTraits<AACallGraphNode *> { | ||||||||
4510 | using NodeRef = AACallGraphNode *; | ||||||||
4511 | using ChildIteratorType = AACallEdgeIterator; | ||||||||
4512 | |||||||||
4513 | static AACallEdgeIterator child_begin(AACallGraphNode *Node) { | ||||||||
4514 | return Node->optimisticEdgesBegin(); | ||||||||
4515 | } | ||||||||
4516 | |||||||||
4517 | static AACallEdgeIterator child_end(AACallGraphNode *Node) { | ||||||||
4518 | return Node->optimisticEdgesEnd(); | ||||||||
4519 | } | ||||||||
4520 | }; | ||||||||
4521 | |||||||||
4522 | template <> | ||||||||
4523 | struct GraphTraits<AttributorCallGraph *> | ||||||||
4524 | : public GraphTraits<AACallGraphNode *> { | ||||||||
4525 | using nodes_iterator = AACallEdgeIterator; | ||||||||
4526 | |||||||||
4527 | static AACallGraphNode *getEntryNode(AttributorCallGraph *G) { | ||||||||
4528 | return static_cast<AACallGraphNode *>(G); | ||||||||
4529 | } | ||||||||
4530 | |||||||||
4531 | static AACallEdgeIterator nodes_begin(const AttributorCallGraph *G) { | ||||||||
4532 | return G->optimisticEdgesBegin(); | ||||||||
4533 | } | ||||||||
4534 | |||||||||
4535 | static AACallEdgeIterator nodes_end(const AttributorCallGraph *G) { | ||||||||
4536 | return G->optimisticEdgesEnd(); | ||||||||
4537 | } | ||||||||
4538 | }; | ||||||||
4539 | |||||||||
4540 | template <> | ||||||||
4541 | struct DOTGraphTraits<AttributorCallGraph *> : public DefaultDOTGraphTraits { | ||||||||
4542 | DOTGraphTraits(bool Simple = false) : DefaultDOTGraphTraits(Simple) {} | ||||||||
4543 | |||||||||
4544 | std::string getNodeLabel(const AACallGraphNode *Node, | ||||||||
4545 | const AttributorCallGraph *Graph) { | ||||||||
4546 | const AACallEdges *AACE = static_cast<const AACallEdges *>(Node); | ||||||||
4547 | return AACE->getAssociatedFunction()->getName().str(); | ||||||||
4548 | } | ||||||||
4549 | |||||||||
4550 | static bool isNodeHidden(const AACallGraphNode *Node, | ||||||||
4551 | const AttributorCallGraph *Graph) { | ||||||||
4552 | // Hide the synth root. | ||||||||
4553 | return static_cast<const AACallGraphNode *>(Graph) == Node; | ||||||||
4554 | } | ||||||||
4555 | }; | ||||||||
4556 | |||||||||
4557 | struct AAExecutionDomain | ||||||||
4558 | : public StateWrapper<BooleanState, AbstractAttribute> { | ||||||||
4559 | using Base = StateWrapper<BooleanState, AbstractAttribute>; | ||||||||
4560 | AAExecutionDomain(const IRPosition &IRP, Attributor &A) : Base(IRP) {} | ||||||||
4561 | |||||||||
4562 | /// Create an abstract attribute view for the position \p IRP. | ||||||||
4563 | static AAExecutionDomain &createForPosition(const IRPosition &IRP, | ||||||||
4564 | Attributor &A); | ||||||||
4565 | |||||||||
4566 | /// See AbstractAttribute::getName(). | ||||||||
4567 | const std::string getName() const override { return "AAExecutionDomain"; } | ||||||||
4568 | |||||||||
4569 | /// See AbstractAttribute::getIdAddr(). | ||||||||
4570 | const char *getIdAddr() const override { return &ID; } | ||||||||
4571 | |||||||||
4572 | /// Check if an instruction is executed only by the initial thread. | ||||||||
4573 | virtual bool isExecutedByInitialThreadOnly(const Instruction &) const = 0; | ||||||||
4574 | |||||||||
4575 | /// Check if a basic block is executed only by the initial thread. | ||||||||
4576 | virtual bool isExecutedByInitialThreadOnly(const BasicBlock &) const = 0; | ||||||||
4577 | |||||||||
4578 | /// This function should return true if the type of the \p AA is | ||||||||
4579 | /// AAExecutionDomain. | ||||||||
4580 | static bool classof(const AbstractAttribute *AA) { | ||||||||
4581 | return (AA->getIdAddr() == &ID); | ||||||||
4582 | } | ||||||||
4583 | |||||||||
4584 | /// Unique ID (due to the unique address) | ||||||||
4585 | static const char ID; | ||||||||
4586 | }; | ||||||||
4587 | |||||||||
4588 | /// An abstract Attribute for computing reachability between functions. | ||||||||
4589 | struct AAFunctionReachability | ||||||||
4590 | : public StateWrapper<BooleanState, AbstractAttribute> { | ||||||||
4591 | using Base = StateWrapper<BooleanState, AbstractAttribute>; | ||||||||
4592 | |||||||||
4593 | AAFunctionReachability(const IRPosition &IRP, Attributor &A) : Base(IRP) {} | ||||||||
4594 | |||||||||
4595 | /// If the function represented by this possition can reach \p Fn. | ||||||||
4596 | virtual bool canReach(Attributor &A, Function *Fn) const = 0; | ||||||||
4597 | |||||||||
4598 | /// Can \p CB reach \p Fn | ||||||||
4599 | virtual bool canReach(Attributor &A, CallBase &CB, Function *Fn) const = 0; | ||||||||
4600 | |||||||||
4601 | /// Create an abstract attribute view for the position \p IRP. | ||||||||
4602 | static AAFunctionReachability &createForPosition(const IRPosition &IRP, | ||||||||
4603 | Attributor &A); | ||||||||
4604 | |||||||||
4605 | /// See AbstractAttribute::getName() | ||||||||
4606 | const std::string getName() const override { return "AAFuncitonReacability"; } | ||||||||
4607 | |||||||||
4608 | /// See AbstractAttribute::getIdAddr() | ||||||||
4609 | const char *getIdAddr() const override { return &ID; } | ||||||||
4610 | |||||||||
4611 | /// This function should return true if the type of the \p AA is AACallEdges. | ||||||||
4612 | static bool classof(const AbstractAttribute *AA) { | ||||||||
4613 | return (AA->getIdAddr() == &ID); | ||||||||
4614 | } | ||||||||
4615 | |||||||||
4616 | /// Unique ID (due to the unique address) | ||||||||
4617 | static const char ID; | ||||||||
4618 | |||||||||
4619 | private: | ||||||||
4620 | /// Can this function reach a call with unknown calee. | ||||||||
4621 | virtual bool canReachUnknownCallee() const = 0; | ||||||||
4622 | }; | ||||||||
4623 | |||||||||
4624 | /// An abstract interface for struct information. | ||||||||
4625 | struct AAPointerInfo : public AbstractAttribute { | ||||||||
4626 | AAPointerInfo(const IRPosition &IRP) : AbstractAttribute(IRP) {} | ||||||||
4627 | |||||||||
4628 | enum AccessKind { | ||||||||
4629 | AK_READ = 1 << 0, | ||||||||
4630 | AK_WRITE = 1 << 1, | ||||||||
4631 | AK_READ_WRITE = AK_READ | AK_WRITE, | ||||||||
4632 | }; | ||||||||
4633 | |||||||||
4634 | /// An access description. | ||||||||
4635 | struct Access { | ||||||||
4636 | Access(Instruction *I, Optional<Value *> Content, AccessKind Kind, Type *Ty) | ||||||||
4637 | : LocalI(I), RemoteI(I), Content(Content), Kind(Kind), Ty(Ty) {} | ||||||||
4638 | Access(Instruction *LocalI, Instruction *RemoteI, Optional<Value *> Content, | ||||||||
4639 | AccessKind Kind, Type *Ty) | ||||||||
4640 | : LocalI(LocalI), RemoteI(RemoteI), Content(Content), Kind(Kind), | ||||||||
4641 | Ty(Ty) {} | ||||||||
4642 | Access(const Access &Other) | ||||||||
4643 | : LocalI(Other.LocalI), RemoteI(Other.RemoteI), Content(Other.Content), | ||||||||
4644 | Kind(Other.Kind), Ty(Other.Ty) {} | ||||||||
4645 | Access(const Access &&Other) | ||||||||
4646 | : LocalI(Other.LocalI), RemoteI(Other.RemoteI), Content(Other.Content), | ||||||||
4647 | Kind(Other.Kind), Ty(Other.Ty) {} | ||||||||
4648 | |||||||||
4649 | Access &operator=(const Access &Other) { | ||||||||
4650 | LocalI = Other.LocalI; | ||||||||
4651 | RemoteI = Other.RemoteI; | ||||||||
4652 | Content = Other.Content; | ||||||||
4653 | Kind = Other.Kind; | ||||||||
4654 | Ty = Other.Ty; | ||||||||
4655 | return *this; | ||||||||
4656 | } | ||||||||
4657 | bool operator==(const Access &R) const { | ||||||||
4658 | return LocalI == R.LocalI && RemoteI == R.RemoteI && | ||||||||
4659 | Content == R.Content && Kind == R.Kind; | ||||||||
4660 | } | ||||||||
4661 | bool operator!=(const Access &R) const { return !(*this == R); } | ||||||||
4662 | |||||||||
4663 | Access &operator&=(const Access &R) { | ||||||||
4664 | assert(RemoteI == R.RemoteI && "Expected same instruction!")(static_cast <bool> (RemoteI == R.RemoteI && "Expected same instruction!" ) ? void (0) : __assert_fail ("RemoteI == R.RemoteI && \"Expected same instruction!\"" , "llvm/include/llvm/Transforms/IPO/Attributor.h", 4664, __extension__ __PRETTY_FUNCTION__)); | ||||||||
4665 | Content = | ||||||||
4666 | AA::combineOptionalValuesInAAValueLatice(Content, R.Content, Ty); | ||||||||
4667 | Kind = AccessKind(Kind | R.Kind); | ||||||||
4668 | return *this; | ||||||||
4669 | } | ||||||||
4670 | |||||||||
4671 | /// Return the access kind. | ||||||||
4672 | AccessKind getKind() const { return Kind; } | ||||||||
4673 | |||||||||
4674 | /// Return true if this is a read access. | ||||||||
4675 | bool isRead() const { return Kind & AK_READ; } | ||||||||
4676 | |||||||||
4677 | /// Return true if this is a write access. | ||||||||
4678 | bool isWrite() const { return Kind & AK_WRITE; } | ||||||||
4679 | |||||||||
4680 | /// Return the instruction that causes the access with respect to the local | ||||||||
4681 | /// scope of the associated attribute. | ||||||||
4682 | Instruction *getLocalInst() const { return LocalI; } | ||||||||
4683 | |||||||||
4684 | /// Return the actual instruction that causes the access. | ||||||||
4685 | Instruction *getRemoteInst() const { return RemoteI; } | ||||||||
4686 | |||||||||
4687 | /// Return true if the value written is not known yet. | ||||||||
4688 | bool isWrittenValueYetUndetermined() const { return !Content.hasValue(); } | ||||||||
4689 | |||||||||
4690 | /// Return true if the value written cannot be determined at all. | ||||||||
4691 | bool isWrittenValueUnknown() const { | ||||||||
4692 | return Content.hasValue() && !*Content; | ||||||||
4693 | } | ||||||||
4694 | |||||||||
4695 | /// Return the type associated with the access, if known. | ||||||||
4696 | Type *getType() const { return Ty; } | ||||||||
4697 | |||||||||
4698 | /// Return the value writen, if any. As long as | ||||||||
4699 | /// isWrittenValueYetUndetermined return true this function shall not be | ||||||||
4700 | /// called. | ||||||||
4701 | Value *getWrittenValue() const { return *Content; } | ||||||||
4702 | |||||||||
4703 | /// Return the written value which can be `llvm::null` if it is not yet | ||||||||
4704 | /// determined. | ||||||||
4705 | Optional<Value *> getContent() const { return Content; } | ||||||||
4706 | |||||||||
4707 | private: | ||||||||
4708 | /// The instruction responsible for the access with respect to the local | ||||||||
4709 | /// scope of the associated attribute. | ||||||||
4710 | Instruction *LocalI; | ||||||||
4711 | |||||||||
4712 | /// The instruction responsible for the access. | ||||||||
4713 | Instruction *RemoteI; | ||||||||
4714 | |||||||||
4715 | /// The value written, if any. `llvm::none` means "not known yet", `nullptr` | ||||||||
4716 | /// cannot be determined. | ||||||||
4717 | Optional<Value *> Content; | ||||||||
4718 | |||||||||
4719 | /// The access kind, e.g., READ, as bitset (could be more than one). | ||||||||
4720 | AccessKind Kind; | ||||||||
4721 | |||||||||
4722 | /// The type of the content, thus the type read/written, can be null if not | ||||||||
4723 | /// available. | ||||||||
4724 | Type *Ty; | ||||||||
4725 | }; | ||||||||
4726 | |||||||||
4727 | /// Create an abstract attribute view for the position \p IRP. | ||||||||
4728 | static AAPointerInfo &createForPosition(const IRPosition &IRP, Attributor &A); | ||||||||
4729 | |||||||||
4730 | /// See AbstractAttribute::getName() | ||||||||
4731 | const std::string getName() const override { return "AAPointerInfo"; } | ||||||||
4732 | |||||||||
4733 | /// See AbstractAttribute::getIdAddr() | ||||||||
4734 | const char *getIdAddr() const override { return &ID; } | ||||||||
4735 | |||||||||
4736 | /// Call \p CB on all accesses that might interfere with \p LI and return true | ||||||||
4737 | /// if all such accesses were known and the callback returned true for all of | ||||||||
4738 | /// them, false otherwise. | ||||||||
4739 | virtual bool forallInterferingAccesses( | ||||||||
4740 | LoadInst &LI, function_ref<bool(const Access &, bool)> CB) const = 0; | ||||||||
4741 | virtual bool forallInterferingAccesses( | ||||||||
4742 | StoreInst &SI, function_ref<bool(const Access &, bool)> CB) const = 0; | ||||||||
4743 | |||||||||
4744 | /// This function should return true if the type of the \p AA is AAPointerInfo | ||||||||
4745 | static bool classof(const AbstractAttribute *AA) { | ||||||||
4746 | return (AA->getIdAddr() == &ID); | ||||||||
4747 | } | ||||||||
4748 | |||||||||
4749 | /// Unique ID (due to the unique address) | ||||||||
4750 | static const char ID; | ||||||||
4751 | }; | ||||||||
4752 | |||||||||
4753 | /// An abstract attribute for getting assumption information. | ||||||||
4754 | struct AAAssumptionInfo | ||||||||
4755 | : public StateWrapper<SetState<StringRef>, AbstractAttribute, | ||||||||
4756 | DenseSet<StringRef>> { | ||||||||
4757 | using Base = | ||||||||
4758 | StateWrapper<SetState<StringRef>, AbstractAttribute, DenseSet<StringRef>>; | ||||||||
4759 | |||||||||
4760 | AAAssumptionInfo(const IRPosition &IRP, Attributor &A, | ||||||||
4761 | const DenseSet<StringRef> &Known) | ||||||||
4762 | : Base(IRP, Known) {} | ||||||||
4763 | |||||||||
4764 | /// Returns true if the assumption set contains the assumption \p Assumption. | ||||||||
4765 | virtual bool hasAssumption(const StringRef Assumption) const = 0; | ||||||||
4766 | |||||||||
4767 | /// Create an abstract attribute view for the position \p IRP. | ||||||||
4768 | static AAAssumptionInfo &createForPosition(const IRPosition &IRP, | ||||||||
4769 | Attributor &A); | ||||||||
4770 | |||||||||
4771 | /// See AbstractAttribute::getName() | ||||||||
4772 | const std::string getName() const override { return "AAAssumptionInfo"; } | ||||||||
4773 | |||||||||
4774 | /// See AbstractAttribute::getIdAddr() | ||||||||
4775 | const char *getIdAddr() const override { return &ID; } | ||||||||
4776 | |||||||||
4777 | /// This function should return true if the type of the \p AA is | ||||||||
4778 | /// AAAssumptionInfo | ||||||||
4779 | static bool classof(const AbstractAttribute *AA) { | ||||||||
4780 | return (AA->getIdAddr() == &ID); | ||||||||
4781 | } | ||||||||
4782 | |||||||||
4783 | /// Unique ID (due to the unique address) | ||||||||
4784 | static const char ID; | ||||||||
4785 | }; | ||||||||
4786 | |||||||||
4787 | raw_ostream &operator<<(raw_ostream &, const AAPointerInfo::Access &); | ||||||||
4788 | |||||||||
4789 | /// Run options, used by the pass manager. | ||||||||
4790 | enum AttributorRunOption { | ||||||||
4791 | NONE = 0, | ||||||||
4792 | MODULE = 1 << 0, | ||||||||
4793 | CGSCC = 1 << 1, | ||||||||
4794 | ALL = MODULE | CGSCC | ||||||||
4795 | }; | ||||||||
4796 | |||||||||
4797 | } // end namespace llvm | ||||||||
4798 | |||||||||
4799 | #endif // LLVM_TRANSFORMS_IPO_ATTRIBUTOR_H |