File: | llvm/lib/Transforms/IPO/OpenMPOpt.cpp |
Warning: | line 3771, column 9 Called C++ object pointer is null |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===-- IPO/OpenMPOpt.cpp - Collection of OpenMP specific optimizations ---===// | |||
2 | // | |||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
4 | // See https://llvm.org/LICENSE.txt for license information. | |||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
6 | // | |||
7 | //===----------------------------------------------------------------------===// | |||
8 | // | |||
9 | // OpenMP specific optimizations: | |||
10 | // | |||
11 | // - Deduplication of runtime calls, e.g., omp_get_thread_num. | |||
12 | // - Replacing globalized device memory with stack memory. | |||
13 | // - Replacing globalized device memory with shared memory. | |||
14 | // - Parallel region merging. | |||
15 | // - Transforming generic-mode device kernels to SPMD mode. | |||
16 | // - Specializing the state machine for generic-mode device kernels. | |||
17 | // | |||
18 | //===----------------------------------------------------------------------===// | |||
19 | ||||
20 | #include "llvm/Transforms/IPO/OpenMPOpt.h" | |||
21 | ||||
22 | #include "llvm/ADT/EnumeratedArray.h" | |||
23 | #include "llvm/ADT/PostOrderIterator.h" | |||
24 | #include "llvm/ADT/Statistic.h" | |||
25 | #include "llvm/ADT/StringRef.h" | |||
26 | #include "llvm/Analysis/CallGraph.h" | |||
27 | #include "llvm/Analysis/CallGraphSCCPass.h" | |||
28 | #include "llvm/Analysis/OptimizationRemarkEmitter.h" | |||
29 | #include "llvm/Analysis/ValueTracking.h" | |||
30 | #include "llvm/Frontend/OpenMP/OMPConstants.h" | |||
31 | #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" | |||
32 | #include "llvm/IR/Assumptions.h" | |||
33 | #include "llvm/IR/DiagnosticInfo.h" | |||
34 | #include "llvm/IR/GlobalValue.h" | |||
35 | #include "llvm/IR/Instruction.h" | |||
36 | #include "llvm/IR/IntrinsicInst.h" | |||
37 | #include "llvm/IR/IntrinsicsAMDGPU.h" | |||
38 | #include "llvm/IR/IntrinsicsNVPTX.h" | |||
39 | #include "llvm/InitializePasses.h" | |||
40 | #include "llvm/Support/CommandLine.h" | |||
41 | #include "llvm/Transforms/IPO.h" | |||
42 | #include "llvm/Transforms/IPO/Attributor.h" | |||
43 | #include "llvm/Transforms/Utils/BasicBlockUtils.h" | |||
44 | #include "llvm/Transforms/Utils/CallGraphUpdater.h" | |||
45 | #include "llvm/Transforms/Utils/CodeExtractor.h" | |||
46 | ||||
47 | #include <algorithm> | |||
48 | ||||
49 | using namespace llvm; | |||
50 | using namespace omp; | |||
51 | ||||
52 | #define DEBUG_TYPE"openmp-opt" "openmp-opt" | |||
53 | ||||
54 | static cl::opt<bool> DisableOpenMPOptimizations( | |||
55 | "openmp-opt-disable", cl::ZeroOrMore, | |||
56 | cl::desc("Disable OpenMP specific optimizations."), cl::Hidden, | |||
57 | cl::init(false)); | |||
58 | ||||
59 | static cl::opt<bool> EnableParallelRegionMerging( | |||
60 | "openmp-opt-enable-merging", cl::ZeroOrMore, | |||
61 | cl::desc("Enable the OpenMP region merging optimization."), cl::Hidden, | |||
62 | cl::init(false)); | |||
63 | ||||
64 | static cl::opt<bool> | |||
65 | DisableInternalization("openmp-opt-disable-internalization", cl::ZeroOrMore, | |||
66 | cl::desc("Disable function internalization."), | |||
67 | cl::Hidden, cl::init(false)); | |||
68 | ||||
69 | static cl::opt<bool> PrintICVValues("openmp-print-icv-values", cl::init(false), | |||
70 | cl::Hidden); | |||
71 | static cl::opt<bool> PrintOpenMPKernels("openmp-print-gpu-kernels", | |||
72 | cl::init(false), cl::Hidden); | |||
73 | ||||
74 | static cl::opt<bool> HideMemoryTransferLatency( | |||
75 | "openmp-hide-memory-transfer-latency", | |||
76 | cl::desc("[WIP] Tries to hide the latency of host to device memory" | |||
77 | " transfers"), | |||
78 | cl::Hidden, cl::init(false)); | |||
79 | ||||
80 | static cl::opt<bool> DisableOpenMPOptDeglobalization( | |||
81 | "openmp-opt-disable-deglobalization", cl::ZeroOrMore, | |||
82 | cl::desc("Disable OpenMP optimizations involving deglobalization."), | |||
83 | cl::Hidden, cl::init(false)); | |||
84 | ||||
85 | static cl::opt<bool> DisableOpenMPOptSPMDization( | |||
86 | "openmp-opt-disable-spmdization", cl::ZeroOrMore, | |||
87 | cl::desc("Disable OpenMP optimizations involving SPMD-ization."), | |||
88 | cl::Hidden, cl::init(false)); | |||
89 | ||||
90 | static cl::opt<bool> DisableOpenMPOptFolding( | |||
91 | "openmp-opt-disable-folding", cl::ZeroOrMore, | |||
92 | cl::desc("Disable OpenMP optimizations involving folding."), cl::Hidden, | |||
93 | cl::init(false)); | |||
94 | ||||
95 | static cl::opt<bool> DisableOpenMPOptStateMachineRewrite( | |||
96 | "openmp-opt-disable-state-machine-rewrite", cl::ZeroOrMore, | |||
97 | cl::desc("Disable OpenMP optimizations that replace the state machine."), | |||
98 | cl::Hidden, cl::init(false)); | |||
99 | ||||
100 | static cl::opt<bool> PrintModuleAfterOptimizations( | |||
101 | "openmp-opt-print-module", cl::ZeroOrMore, | |||
102 | cl::desc("Print the current module after OpenMP optimizations."), | |||
103 | cl::Hidden, cl::init(false)); | |||
104 | ||||
105 | static cl::opt<bool> AlwaysInlineDeviceFunctions( | |||
106 | "openmp-opt-inline-device", cl::ZeroOrMore, | |||
107 | cl::desc("Inline all applicible functions on the device."), cl::Hidden, | |||
108 | cl::init(false)); | |||
109 | ||||
110 | static cl::opt<bool> | |||
111 | EnableVerboseRemarks("openmp-opt-verbose-remarks", cl::ZeroOrMore, | |||
112 | cl::desc("Enables more verbose remarks."), cl::Hidden, | |||
113 | cl::init(false)); | |||
114 | ||||
115 | static cl::opt<unsigned> | |||
116 | SetFixpointIterations("openmp-opt-max-iterations", cl::Hidden, | |||
117 | cl::desc("Maximal number of attributor iterations."), | |||
118 | cl::init(256)); | |||
119 | ||||
120 | STATISTIC(NumOpenMPRuntimeCallsDeduplicated,static llvm::Statistic NumOpenMPRuntimeCallsDeduplicated = {"openmp-opt" , "NumOpenMPRuntimeCallsDeduplicated", "Number of OpenMP runtime calls deduplicated" } | |||
121 | "Number of OpenMP runtime calls deduplicated")static llvm::Statistic NumOpenMPRuntimeCallsDeduplicated = {"openmp-opt" , "NumOpenMPRuntimeCallsDeduplicated", "Number of OpenMP runtime calls deduplicated" }; | |||
122 | STATISTIC(NumOpenMPParallelRegionsDeleted,static llvm::Statistic NumOpenMPParallelRegionsDeleted = {"openmp-opt" , "NumOpenMPParallelRegionsDeleted", "Number of OpenMP parallel regions deleted" } | |||
123 | "Number of OpenMP parallel regions deleted")static llvm::Statistic NumOpenMPParallelRegionsDeleted = {"openmp-opt" , "NumOpenMPParallelRegionsDeleted", "Number of OpenMP parallel regions deleted" }; | |||
124 | STATISTIC(NumOpenMPRuntimeFunctionsIdentified,static llvm::Statistic NumOpenMPRuntimeFunctionsIdentified = { "openmp-opt", "NumOpenMPRuntimeFunctionsIdentified", "Number of OpenMP runtime functions identified" } | |||
125 | "Number of OpenMP runtime functions identified")static llvm::Statistic NumOpenMPRuntimeFunctionsIdentified = { "openmp-opt", "NumOpenMPRuntimeFunctionsIdentified", "Number of OpenMP runtime functions identified" }; | |||
126 | STATISTIC(NumOpenMPRuntimeFunctionUsesIdentified,static llvm::Statistic NumOpenMPRuntimeFunctionUsesIdentified = {"openmp-opt", "NumOpenMPRuntimeFunctionUsesIdentified", "Number of OpenMP runtime function uses identified" } | |||
127 | "Number of OpenMP runtime function uses identified")static llvm::Statistic NumOpenMPRuntimeFunctionUsesIdentified = {"openmp-opt", "NumOpenMPRuntimeFunctionUsesIdentified", "Number of OpenMP runtime function uses identified" }; | |||
128 | STATISTIC(NumOpenMPTargetRegionKernels,static llvm::Statistic NumOpenMPTargetRegionKernels = {"openmp-opt" , "NumOpenMPTargetRegionKernels", "Number of OpenMP target region entry points (=kernels) identified" } | |||
129 | "Number of OpenMP target region entry points (=kernels) identified")static llvm::Statistic NumOpenMPTargetRegionKernels = {"openmp-opt" , "NumOpenMPTargetRegionKernels", "Number of OpenMP target region entry points (=kernels) identified" }; | |||
130 | STATISTIC(NumOpenMPTargetRegionKernelsSPMD,static llvm::Statistic NumOpenMPTargetRegionKernelsSPMD = {"openmp-opt" , "NumOpenMPTargetRegionKernelsSPMD", "Number of OpenMP target region entry points (=kernels) executed in " "SPMD-mode instead of generic-mode"} | |||
131 | "Number of OpenMP target region entry points (=kernels) executed in "static llvm::Statistic NumOpenMPTargetRegionKernelsSPMD = {"openmp-opt" , "NumOpenMPTargetRegionKernelsSPMD", "Number of OpenMP target region entry points (=kernels) executed in " "SPMD-mode instead of generic-mode"} | |||
132 | "SPMD-mode instead of generic-mode")static llvm::Statistic NumOpenMPTargetRegionKernelsSPMD = {"openmp-opt" , "NumOpenMPTargetRegionKernelsSPMD", "Number of OpenMP target region entry points (=kernels) executed in " "SPMD-mode instead of generic-mode"}; | |||
133 | STATISTIC(NumOpenMPTargetRegionKernelsWithoutStateMachine,static llvm::Statistic NumOpenMPTargetRegionKernelsWithoutStateMachine = {"openmp-opt", "NumOpenMPTargetRegionKernelsWithoutStateMachine" , "Number of OpenMP target region entry points (=kernels) executed in " "generic-mode without a state machines"} | |||
134 | "Number of OpenMP target region entry points (=kernels) executed in "static llvm::Statistic NumOpenMPTargetRegionKernelsWithoutStateMachine = {"openmp-opt", "NumOpenMPTargetRegionKernelsWithoutStateMachine" , "Number of OpenMP target region entry points (=kernels) executed in " "generic-mode without a state machines"} | |||
135 | "generic-mode without a state machines")static llvm::Statistic NumOpenMPTargetRegionKernelsWithoutStateMachine = {"openmp-opt", "NumOpenMPTargetRegionKernelsWithoutStateMachine" , "Number of OpenMP target region entry points (=kernels) executed in " "generic-mode without a state machines"}; | |||
136 | STATISTIC(NumOpenMPTargetRegionKernelsCustomStateMachineWithFallback,static llvm::Statistic NumOpenMPTargetRegionKernelsCustomStateMachineWithFallback = {"openmp-opt", "NumOpenMPTargetRegionKernelsCustomStateMachineWithFallback" , "Number of OpenMP target region entry points (=kernels) executed in " "generic-mode with customized state machines with fallback"} | |||
137 | "Number of OpenMP target region entry points (=kernels) executed in "static llvm::Statistic NumOpenMPTargetRegionKernelsCustomStateMachineWithFallback = {"openmp-opt", "NumOpenMPTargetRegionKernelsCustomStateMachineWithFallback" , "Number of OpenMP target region entry points (=kernels) executed in " "generic-mode with customized state machines with fallback"} | |||
138 | "generic-mode with customized state machines with fallback")static llvm::Statistic NumOpenMPTargetRegionKernelsCustomStateMachineWithFallback = {"openmp-opt", "NumOpenMPTargetRegionKernelsCustomStateMachineWithFallback" , "Number of OpenMP target region entry points (=kernels) executed in " "generic-mode with customized state machines with fallback"}; | |||
139 | STATISTIC(NumOpenMPTargetRegionKernelsCustomStateMachineWithoutFallback,static llvm::Statistic NumOpenMPTargetRegionKernelsCustomStateMachineWithoutFallback = {"openmp-opt", "NumOpenMPTargetRegionKernelsCustomStateMachineWithoutFallback" , "Number of OpenMP target region entry points (=kernels) executed in " "generic-mode with customized state machines without fallback" } | |||
140 | "Number of OpenMP target region entry points (=kernels) executed in "static llvm::Statistic NumOpenMPTargetRegionKernelsCustomStateMachineWithoutFallback = {"openmp-opt", "NumOpenMPTargetRegionKernelsCustomStateMachineWithoutFallback" , "Number of OpenMP target region entry points (=kernels) executed in " "generic-mode with customized state machines without fallback" } | |||
141 | "generic-mode with customized state machines without fallback")static llvm::Statistic NumOpenMPTargetRegionKernelsCustomStateMachineWithoutFallback = {"openmp-opt", "NumOpenMPTargetRegionKernelsCustomStateMachineWithoutFallback" , "Number of OpenMP target region entry points (=kernels) executed in " "generic-mode with customized state machines without fallback" }; | |||
142 | STATISTIC(static llvm::Statistic NumOpenMPParallelRegionsReplacedInGPUStateMachine = {"openmp-opt", "NumOpenMPParallelRegionsReplacedInGPUStateMachine" , "Number of OpenMP parallel regions replaced with ID in GPU state machines" } | |||
143 | NumOpenMPParallelRegionsReplacedInGPUStateMachine,static llvm::Statistic NumOpenMPParallelRegionsReplacedInGPUStateMachine = {"openmp-opt", "NumOpenMPParallelRegionsReplacedInGPUStateMachine" , "Number of OpenMP parallel regions replaced with ID in GPU state machines" } | |||
144 | "Number of OpenMP parallel regions replaced with ID in GPU state machines")static llvm::Statistic NumOpenMPParallelRegionsReplacedInGPUStateMachine = {"openmp-opt", "NumOpenMPParallelRegionsReplacedInGPUStateMachine" , "Number of OpenMP parallel regions replaced with ID in GPU state machines" }; | |||
145 | STATISTIC(NumOpenMPParallelRegionsMerged,static llvm::Statistic NumOpenMPParallelRegionsMerged = {"openmp-opt" , "NumOpenMPParallelRegionsMerged", "Number of OpenMP parallel regions merged" } | |||
146 | "Number of OpenMP parallel regions merged")static llvm::Statistic NumOpenMPParallelRegionsMerged = {"openmp-opt" , "NumOpenMPParallelRegionsMerged", "Number of OpenMP parallel regions merged" }; | |||
147 | STATISTIC(NumBytesMovedToSharedMemory,static llvm::Statistic NumBytesMovedToSharedMemory = {"openmp-opt" , "NumBytesMovedToSharedMemory", "Amount of memory pushed to shared memory" } | |||
148 | "Amount of memory pushed to shared memory")static llvm::Statistic NumBytesMovedToSharedMemory = {"openmp-opt" , "NumBytesMovedToSharedMemory", "Amount of memory pushed to shared memory" }; | |||
149 | ||||
150 | #if !defined(NDEBUG) | |||
151 | static constexpr auto TAG = "[" DEBUG_TYPE"openmp-opt" "]"; | |||
152 | #endif | |||
153 | ||||
154 | namespace { | |||
155 | ||||
156 | enum class AddressSpace : unsigned { | |||
157 | Generic = 0, | |||
158 | Global = 1, | |||
159 | Shared = 3, | |||
160 | Constant = 4, | |||
161 | Local = 5, | |||
162 | }; | |||
163 | ||||
164 | struct AAHeapToShared; | |||
165 | ||||
166 | struct AAICVTracker; | |||
167 | ||||
168 | /// OpenMP specific information. For now, stores RFIs and ICVs also needed for | |||
169 | /// Attributor runs. | |||
170 | struct OMPInformationCache : public InformationCache { | |||
171 | OMPInformationCache(Module &M, AnalysisGetter &AG, | |||
172 | BumpPtrAllocator &Allocator, SetVector<Function *> &CGSCC, | |||
173 | SmallPtrSetImpl<Kernel> &Kernels) | |||
174 | : InformationCache(M, AG, Allocator, &CGSCC), OMPBuilder(M), | |||
175 | Kernels(Kernels) { | |||
176 | ||||
177 | OMPBuilder.initialize(); | |||
178 | initializeRuntimeFunctions(); | |||
179 | initializeInternalControlVars(); | |||
180 | } | |||
181 | ||||
182 | /// Generic information that describes an internal control variable. | |||
183 | struct InternalControlVarInfo { | |||
184 | /// The kind, as described by InternalControlVar enum. | |||
185 | InternalControlVar Kind; | |||
186 | ||||
187 | /// The name of the ICV. | |||
188 | StringRef Name; | |||
189 | ||||
190 | /// Environment variable associated with this ICV. | |||
191 | StringRef EnvVarName; | |||
192 | ||||
193 | /// Initial value kind. | |||
194 | ICVInitValue InitKind; | |||
195 | ||||
196 | /// Initial value. | |||
197 | ConstantInt *InitValue; | |||
198 | ||||
199 | /// Setter RTL function associated with this ICV. | |||
200 | RuntimeFunction Setter; | |||
201 | ||||
202 | /// Getter RTL function associated with this ICV. | |||
203 | RuntimeFunction Getter; | |||
204 | ||||
205 | /// RTL Function corresponding to the override clause of this ICV | |||
206 | RuntimeFunction Clause; | |||
207 | }; | |||
208 | ||||
209 | /// Generic information that describes a runtime function | |||
210 | struct RuntimeFunctionInfo { | |||
211 | ||||
212 | /// The kind, as described by the RuntimeFunction enum. | |||
213 | RuntimeFunction Kind; | |||
214 | ||||
215 | /// The name of the function. | |||
216 | StringRef Name; | |||
217 | ||||
218 | /// Flag to indicate a variadic function. | |||
219 | bool IsVarArg; | |||
220 | ||||
221 | /// The return type of the function. | |||
222 | Type *ReturnType; | |||
223 | ||||
224 | /// The argument types of the function. | |||
225 | SmallVector<Type *, 8> ArgumentTypes; | |||
226 | ||||
227 | /// The declaration if available. | |||
228 | Function *Declaration = nullptr; | |||
229 | ||||
230 | /// Uses of this runtime function per function containing the use. | |||
231 | using UseVector = SmallVector<Use *, 16>; | |||
232 | ||||
233 | /// Clear UsesMap for runtime function. | |||
234 | void clearUsesMap() { UsesMap.clear(); } | |||
235 | ||||
236 | /// Boolean conversion that is true if the runtime function was found. | |||
237 | operator bool() const { return Declaration; } | |||
238 | ||||
239 | /// Return the vector of uses in function \p F. | |||
240 | UseVector &getOrCreateUseVector(Function *F) { | |||
241 | std::shared_ptr<UseVector> &UV = UsesMap[F]; | |||
242 | if (!UV) | |||
243 | UV = std::make_shared<UseVector>(); | |||
244 | return *UV; | |||
245 | } | |||
246 | ||||
247 | /// Return the vector of uses in function \p F or `nullptr` if there are | |||
248 | /// none. | |||
249 | const UseVector *getUseVector(Function &F) const { | |||
250 | auto I = UsesMap.find(&F); | |||
251 | if (I != UsesMap.end()) | |||
252 | return I->second.get(); | |||
253 | return nullptr; | |||
254 | } | |||
255 | ||||
256 | /// Return how many functions contain uses of this runtime function. | |||
257 | size_t getNumFunctionsWithUses() const { return UsesMap.size(); } | |||
258 | ||||
259 | /// Return the number of arguments (or the minimal number for variadic | |||
260 | /// functions). | |||
261 | size_t getNumArgs() const { return ArgumentTypes.size(); } | |||
262 | ||||
263 | /// Run the callback \p CB on each use and forget the use if the result is | |||
264 | /// true. The callback will be fed the function in which the use was | |||
265 | /// encountered as second argument. | |||
266 | void foreachUse(SmallVectorImpl<Function *> &SCC, | |||
267 | function_ref<bool(Use &, Function &)> CB) { | |||
268 | for (Function *F : SCC) | |||
269 | foreachUse(CB, F); | |||
270 | } | |||
271 | ||||
272 | /// Run the callback \p CB on each use within the function \p F and forget | |||
273 | /// the use if the result is true. | |||
274 | void foreachUse(function_ref<bool(Use &, Function &)> CB, Function *F) { | |||
275 | SmallVector<unsigned, 8> ToBeDeleted; | |||
276 | ToBeDeleted.clear(); | |||
277 | ||||
278 | unsigned Idx = 0; | |||
279 | UseVector &UV = getOrCreateUseVector(F); | |||
280 | ||||
281 | for (Use *U : UV) { | |||
282 | if (CB(*U, *F)) | |||
283 | ToBeDeleted.push_back(Idx); | |||
284 | ++Idx; | |||
285 | } | |||
286 | ||||
287 | // Remove the to-be-deleted indices in reverse order as prior | |||
288 | // modifications will not modify the smaller indices. | |||
289 | while (!ToBeDeleted.empty()) { | |||
290 | unsigned Idx = ToBeDeleted.pop_back_val(); | |||
291 | UV[Idx] = UV.back(); | |||
292 | UV.pop_back(); | |||
293 | } | |||
294 | } | |||
295 | ||||
296 | private: | |||
297 | /// Map from functions to all uses of this runtime function contained in | |||
298 | /// them. | |||
299 | DenseMap<Function *, std::shared_ptr<UseVector>> UsesMap; | |||
300 | ||||
301 | public: | |||
302 | /// Iterators for the uses of this runtime function. | |||
303 | decltype(UsesMap)::iterator begin() { return UsesMap.begin(); } | |||
304 | decltype(UsesMap)::iterator end() { return UsesMap.end(); } | |||
305 | }; | |||
306 | ||||
307 | /// An OpenMP-IR-Builder instance | |||
308 | OpenMPIRBuilder OMPBuilder; | |||
309 | ||||
310 | /// Map from runtime function kind to the runtime function description. | |||
311 | EnumeratedArray<RuntimeFunctionInfo, RuntimeFunction, | |||
312 | RuntimeFunction::OMPRTL___last> | |||
313 | RFIs; | |||
314 | ||||
315 | /// Map from function declarations/definitions to their runtime enum type. | |||
316 | DenseMap<Function *, RuntimeFunction> RuntimeFunctionIDMap; | |||
317 | ||||
318 | /// Map from ICV kind to the ICV description. | |||
319 | EnumeratedArray<InternalControlVarInfo, InternalControlVar, | |||
320 | InternalControlVar::ICV___last> | |||
321 | ICVs; | |||
322 | ||||
323 | /// Helper to initialize all internal control variable information for those | |||
324 | /// defined in OMPKinds.def. | |||
325 | void initializeInternalControlVars() { | |||
326 | #define ICV_RT_SET(_Name, RTL) \ | |||
327 | { \ | |||
328 | auto &ICV = ICVs[_Name]; \ | |||
329 | ICV.Setter = RTL; \ | |||
330 | } | |||
331 | #define ICV_RT_GET(Name, RTL) \ | |||
332 | { \ | |||
333 | auto &ICV = ICVs[Name]; \ | |||
334 | ICV.Getter = RTL; \ | |||
335 | } | |||
336 | #define ICV_DATA_ENV(Enum, _Name, _EnvVarName, Init) \ | |||
337 | { \ | |||
338 | auto &ICV = ICVs[Enum]; \ | |||
339 | ICV.Name = _Name; \ | |||
340 | ICV.Kind = Enum; \ | |||
341 | ICV.InitKind = Init; \ | |||
342 | ICV.EnvVarName = _EnvVarName; \ | |||
343 | switch (ICV.InitKind) { \ | |||
344 | case ICV_IMPLEMENTATION_DEFINED: \ | |||
345 | ICV.InitValue = nullptr; \ | |||
346 | break; \ | |||
347 | case ICV_ZERO: \ | |||
348 | ICV.InitValue = ConstantInt::get( \ | |||
349 | Type::getInt32Ty(OMPBuilder.Int32->getContext()), 0); \ | |||
350 | break; \ | |||
351 | case ICV_FALSE: \ | |||
352 | ICV.InitValue = ConstantInt::getFalse(OMPBuilder.Int1->getContext()); \ | |||
353 | break; \ | |||
354 | case ICV_LAST: \ | |||
355 | break; \ | |||
356 | } \ | |||
357 | } | |||
358 | #include "llvm/Frontend/OpenMP/OMPKinds.def" | |||
359 | } | |||
360 | ||||
361 | /// Returns true if the function declaration \p F matches the runtime | |||
362 | /// function types, that is, return type \p RTFRetType, and argument types | |||
363 | /// \p RTFArgTypes. | |||
364 | static bool declMatchesRTFTypes(Function *F, Type *RTFRetType, | |||
365 | SmallVector<Type *, 8> &RTFArgTypes) { | |||
366 | // TODO: We should output information to the user (under debug output | |||
367 | // and via remarks). | |||
368 | ||||
369 | if (!F) | |||
370 | return false; | |||
371 | if (F->getReturnType() != RTFRetType) | |||
372 | return false; | |||
373 | if (F->arg_size() != RTFArgTypes.size()) | |||
374 | return false; | |||
375 | ||||
376 | auto *RTFTyIt = RTFArgTypes.begin(); | |||
377 | for (Argument &Arg : F->args()) { | |||
378 | if (Arg.getType() != *RTFTyIt) | |||
379 | return false; | |||
380 | ||||
381 | ++RTFTyIt; | |||
382 | } | |||
383 | ||||
384 | return true; | |||
385 | } | |||
386 | ||||
387 | // Helper to collect all uses of the declaration in the UsesMap. | |||
388 | unsigned collectUses(RuntimeFunctionInfo &RFI, bool CollectStats = true) { | |||
389 | unsigned NumUses = 0; | |||
390 | if (!RFI.Declaration) | |||
391 | return NumUses; | |||
392 | OMPBuilder.addAttributes(RFI.Kind, *RFI.Declaration); | |||
393 | ||||
394 | if (CollectStats) { | |||
395 | NumOpenMPRuntimeFunctionsIdentified += 1; | |||
396 | NumOpenMPRuntimeFunctionUsesIdentified += RFI.Declaration->getNumUses(); | |||
397 | } | |||
398 | ||||
399 | // TODO: We directly convert uses into proper calls and unknown uses. | |||
400 | for (Use &U : RFI.Declaration->uses()) { | |||
401 | if (Instruction *UserI = dyn_cast<Instruction>(U.getUser())) { | |||
402 | if (ModuleSlice.count(UserI->getFunction())) { | |||
403 | RFI.getOrCreateUseVector(UserI->getFunction()).push_back(&U); | |||
404 | ++NumUses; | |||
405 | } | |||
406 | } else { | |||
407 | RFI.getOrCreateUseVector(nullptr).push_back(&U); | |||
408 | ++NumUses; | |||
409 | } | |||
410 | } | |||
411 | return NumUses; | |||
412 | } | |||
413 | ||||
414 | // Helper function to recollect uses of a runtime function. | |||
415 | void recollectUsesForFunction(RuntimeFunction RTF) { | |||
416 | auto &RFI = RFIs[RTF]; | |||
417 | RFI.clearUsesMap(); | |||
418 | collectUses(RFI, /*CollectStats*/ false); | |||
419 | } | |||
420 | ||||
421 | // Helper function to recollect uses of all runtime functions. | |||
422 | void recollectUses() { | |||
423 | for (int Idx = 0; Idx < RFIs.size(); ++Idx) | |||
424 | recollectUsesForFunction(static_cast<RuntimeFunction>(Idx)); | |||
425 | } | |||
426 | ||||
427 | /// Helper to initialize all runtime function information for those defined | |||
428 | /// in OpenMPKinds.def. | |||
429 | void initializeRuntimeFunctions() { | |||
430 | Module &M = *((*ModuleSlice.begin())->getParent()); | |||
431 | ||||
432 | // Helper macros for handling __VA_ARGS__ in OMP_RTL | |||
433 | #define OMP_TYPE(VarName, ...) \ | |||
434 | Type *VarName = OMPBuilder.VarName; \ | |||
435 | (void)VarName; | |||
436 | ||||
437 | #define OMP_ARRAY_TYPE(VarName, ...) \ | |||
438 | ArrayType *VarName##Ty = OMPBuilder.VarName##Ty; \ | |||
439 | (void)VarName##Ty; \ | |||
440 | PointerType *VarName##PtrTy = OMPBuilder.VarName##PtrTy; \ | |||
441 | (void)VarName##PtrTy; | |||
442 | ||||
443 | #define OMP_FUNCTION_TYPE(VarName, ...) \ | |||
444 | FunctionType *VarName = OMPBuilder.VarName; \ | |||
445 | (void)VarName; \ | |||
446 | PointerType *VarName##Ptr = OMPBuilder.VarName##Ptr; \ | |||
447 | (void)VarName##Ptr; | |||
448 | ||||
449 | #define OMP_STRUCT_TYPE(VarName, ...) \ | |||
450 | StructType *VarName = OMPBuilder.VarName; \ | |||
451 | (void)VarName; \ | |||
452 | PointerType *VarName##Ptr = OMPBuilder.VarName##Ptr; \ | |||
453 | (void)VarName##Ptr; | |||
454 | ||||
455 | #define OMP_RTL(_Enum, _Name, _IsVarArg, _ReturnType, ...) \ | |||
456 | { \ | |||
457 | SmallVector<Type *, 8> ArgsTypes({__VA_ARGS__}); \ | |||
458 | Function *F = M.getFunction(_Name); \ | |||
459 | RTLFunctions.insert(F); \ | |||
460 | if (declMatchesRTFTypes(F, OMPBuilder._ReturnType, ArgsTypes)) { \ | |||
461 | RuntimeFunctionIDMap[F] = _Enum; \ | |||
462 | F->removeFnAttr(Attribute::NoInline); \ | |||
463 | auto &RFI = RFIs[_Enum]; \ | |||
464 | RFI.Kind = _Enum; \ | |||
465 | RFI.Name = _Name; \ | |||
466 | RFI.IsVarArg = _IsVarArg; \ | |||
467 | RFI.ReturnType = OMPBuilder._ReturnType; \ | |||
468 | RFI.ArgumentTypes = std::move(ArgsTypes); \ | |||
469 | RFI.Declaration = F; \ | |||
470 | unsigned NumUses = collectUses(RFI); \ | |||
471 | (void)NumUses; \ | |||
472 | LLVM_DEBUG({ \do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { { dbgs() << TAG << RFI.Name << (RFI.Declaration ? "" : " not") << " found\n"; if (RFI .Declaration) dbgs() << TAG << "-> got " << NumUses << " uses in " << RFI.getNumFunctionsWithUses () << " different functions.\n"; }; } } while (false) | |||
473 | dbgs() << TAG << RFI.Name << (RFI.Declaration ? "" : " not") \do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { { dbgs() << TAG << RFI.Name << (RFI.Declaration ? "" : " not") << " found\n"; if (RFI .Declaration) dbgs() << TAG << "-> got " << NumUses << " uses in " << RFI.getNumFunctionsWithUses () << " different functions.\n"; }; } } while (false) | |||
474 | << " found\n"; \do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { { dbgs() << TAG << RFI.Name << (RFI.Declaration ? "" : " not") << " found\n"; if (RFI .Declaration) dbgs() << TAG << "-> got " << NumUses << " uses in " << RFI.getNumFunctionsWithUses () << " different functions.\n"; }; } } while (false) | |||
475 | if (RFI.Declaration) \do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { { dbgs() << TAG << RFI.Name << (RFI.Declaration ? "" : " not") << " found\n"; if (RFI .Declaration) dbgs() << TAG << "-> got " << NumUses << " uses in " << RFI.getNumFunctionsWithUses () << " different functions.\n"; }; } } while (false) | |||
476 | dbgs() << TAG << "-> got " << NumUses << " uses in " \do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { { dbgs() << TAG << RFI.Name << (RFI.Declaration ? "" : " not") << " found\n"; if (RFI .Declaration) dbgs() << TAG << "-> got " << NumUses << " uses in " << RFI.getNumFunctionsWithUses () << " different functions.\n"; }; } } while (false) | |||
477 | << RFI.getNumFunctionsWithUses() \do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { { dbgs() << TAG << RFI.Name << (RFI.Declaration ? "" : " not") << " found\n"; if (RFI .Declaration) dbgs() << TAG << "-> got " << NumUses << " uses in " << RFI.getNumFunctionsWithUses () << " different functions.\n"; }; } } while (false) | |||
478 | << " different functions.\n"; \do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { { dbgs() << TAG << RFI.Name << (RFI.Declaration ? "" : " not") << " found\n"; if (RFI .Declaration) dbgs() << TAG << "-> got " << NumUses << " uses in " << RFI.getNumFunctionsWithUses () << " different functions.\n"; }; } } while (false) | |||
479 | })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { { dbgs() << TAG << RFI.Name << (RFI.Declaration ? "" : " not") << " found\n"; if (RFI .Declaration) dbgs() << TAG << "-> got " << NumUses << " uses in " << RFI.getNumFunctionsWithUses () << " different functions.\n"; }; } } while (false); \ | |||
480 | } \ | |||
481 | } | |||
482 | #include "llvm/Frontend/OpenMP/OMPKinds.def" | |||
483 | ||||
484 | // TODO: We should attach the attributes defined in OMPKinds.def. | |||
485 | } | |||
486 | ||||
487 | /// Collection of known kernels (\see Kernel) in the module. | |||
488 | SmallPtrSetImpl<Kernel> &Kernels; | |||
489 | ||||
490 | /// Collection of known OpenMP runtime functions.. | |||
491 | DenseSet<const Function *> RTLFunctions; | |||
492 | }; | |||
493 | ||||
494 | template <typename Ty, bool InsertInvalidates = true> | |||
495 | struct BooleanStateWithSetVector : public BooleanState { | |||
496 | bool contains(const Ty &Elem) const { return Set.contains(Elem); } | |||
497 | bool insert(const Ty &Elem) { | |||
498 | if (InsertInvalidates) | |||
499 | BooleanState::indicatePessimisticFixpoint(); | |||
500 | return Set.insert(Elem); | |||
501 | } | |||
502 | ||||
503 | const Ty &operator[](int Idx) const { return Set[Idx]; } | |||
504 | bool operator==(const BooleanStateWithSetVector &RHS) const { | |||
505 | return BooleanState::operator==(RHS) && Set == RHS.Set; | |||
506 | } | |||
507 | bool operator!=(const BooleanStateWithSetVector &RHS) const { | |||
508 | return !(*this == RHS); | |||
509 | } | |||
510 | ||||
511 | bool empty() const { return Set.empty(); } | |||
512 | size_t size() const { return Set.size(); } | |||
513 | ||||
514 | /// "Clamp" this state with \p RHS. | |||
515 | BooleanStateWithSetVector &operator^=(const BooleanStateWithSetVector &RHS) { | |||
516 | BooleanState::operator^=(RHS); | |||
517 | Set.insert(RHS.Set.begin(), RHS.Set.end()); | |||
518 | return *this; | |||
519 | } | |||
520 | ||||
521 | private: | |||
522 | /// A set to keep track of elements. | |||
523 | SetVector<Ty> Set; | |||
524 | ||||
525 | public: | |||
526 | typename decltype(Set)::iterator begin() { return Set.begin(); } | |||
527 | typename decltype(Set)::iterator end() { return Set.end(); } | |||
528 | typename decltype(Set)::const_iterator begin() const { return Set.begin(); } | |||
529 | typename decltype(Set)::const_iterator end() const { return Set.end(); } | |||
530 | }; | |||
531 | ||||
532 | template <typename Ty, bool InsertInvalidates = true> | |||
533 | using BooleanStateWithPtrSetVector = | |||
534 | BooleanStateWithSetVector<Ty *, InsertInvalidates>; | |||
535 | ||||
536 | struct KernelInfoState : AbstractState { | |||
537 | /// Flag to track if we reached a fixpoint. | |||
538 | bool IsAtFixpoint = false; | |||
539 | ||||
540 | /// The parallel regions (identified by the outlined parallel functions) that | |||
541 | /// can be reached from the associated function. | |||
542 | BooleanStateWithPtrSetVector<Function, /* InsertInvalidates */ false> | |||
543 | ReachedKnownParallelRegions; | |||
544 | ||||
545 | /// State to track what parallel region we might reach. | |||
546 | BooleanStateWithPtrSetVector<CallBase> ReachedUnknownParallelRegions; | |||
547 | ||||
548 | /// State to track if we are in SPMD-mode, assumed or know, and why we decided | |||
549 | /// we cannot be. If it is assumed, then RequiresFullRuntime should also be | |||
550 | /// false. | |||
551 | BooleanStateWithPtrSetVector<Instruction, false> SPMDCompatibilityTracker; | |||
552 | ||||
553 | /// The __kmpc_target_init call in this kernel, if any. If we find more than | |||
554 | /// one we abort as the kernel is malformed. | |||
555 | CallBase *KernelInitCB = nullptr; | |||
556 | ||||
557 | /// The __kmpc_target_deinit call in this kernel, if any. If we find more than | |||
558 | /// one we abort as the kernel is malformed. | |||
559 | CallBase *KernelDeinitCB = nullptr; | |||
560 | ||||
561 | /// Flag to indicate if the associated function is a kernel entry. | |||
562 | bool IsKernelEntry = false; | |||
563 | ||||
564 | /// State to track what kernel entries can reach the associated function. | |||
565 | BooleanStateWithPtrSetVector<Function, false> ReachingKernelEntries; | |||
566 | ||||
567 | /// State to indicate if we can track parallel level of the associated | |||
568 | /// function. We will give up tracking if we encounter unknown caller or the | |||
569 | /// caller is __kmpc_parallel_51. | |||
570 | BooleanStateWithSetVector<uint8_t> ParallelLevels; | |||
571 | ||||
572 | /// Abstract State interface | |||
573 | ///{ | |||
574 | ||||
575 | KernelInfoState() {} | |||
576 | KernelInfoState(bool BestState) { | |||
577 | if (!BestState) | |||
578 | indicatePessimisticFixpoint(); | |||
579 | } | |||
580 | ||||
581 | /// See AbstractState::isValidState(...) | |||
582 | bool isValidState() const override { return true; } | |||
583 | ||||
584 | /// See AbstractState::isAtFixpoint(...) | |||
585 | bool isAtFixpoint() const override { return IsAtFixpoint; } | |||
586 | ||||
587 | /// See AbstractState::indicatePessimisticFixpoint(...) | |||
588 | ChangeStatus indicatePessimisticFixpoint() override { | |||
589 | IsAtFixpoint = true; | |||
590 | ReachingKernelEntries.indicatePessimisticFixpoint(); | |||
591 | SPMDCompatibilityTracker.indicatePessimisticFixpoint(); | |||
592 | ReachedKnownParallelRegions.indicatePessimisticFixpoint(); | |||
593 | ReachedUnknownParallelRegions.indicatePessimisticFixpoint(); | |||
594 | return ChangeStatus::CHANGED; | |||
595 | } | |||
596 | ||||
597 | /// See AbstractState::indicateOptimisticFixpoint(...) | |||
598 | ChangeStatus indicateOptimisticFixpoint() override { | |||
599 | IsAtFixpoint = true; | |||
600 | ReachingKernelEntries.indicateOptimisticFixpoint(); | |||
601 | SPMDCompatibilityTracker.indicateOptimisticFixpoint(); | |||
602 | ReachedKnownParallelRegions.indicateOptimisticFixpoint(); | |||
603 | ReachedUnknownParallelRegions.indicateOptimisticFixpoint(); | |||
604 | return ChangeStatus::UNCHANGED; | |||
605 | } | |||
606 | ||||
607 | /// Return the assumed state | |||
608 | KernelInfoState &getAssumed() { return *this; } | |||
609 | const KernelInfoState &getAssumed() const { return *this; } | |||
610 | ||||
611 | bool operator==(const KernelInfoState &RHS) const { | |||
612 | if (SPMDCompatibilityTracker != RHS.SPMDCompatibilityTracker) | |||
613 | return false; | |||
614 | if (ReachedKnownParallelRegions != RHS.ReachedKnownParallelRegions) | |||
615 | return false; | |||
616 | if (ReachedUnknownParallelRegions != RHS.ReachedUnknownParallelRegions) | |||
617 | return false; | |||
618 | if (ReachingKernelEntries != RHS.ReachingKernelEntries) | |||
619 | return false; | |||
620 | return true; | |||
621 | } | |||
622 | ||||
623 | /// Returns true if this kernel contains any OpenMP parallel regions. | |||
624 | bool mayContainParallelRegion() { | |||
625 | return !ReachedKnownParallelRegions.empty() || | |||
626 | !ReachedUnknownParallelRegions.empty(); | |||
627 | } | |||
628 | ||||
629 | /// Return empty set as the best state of potential values. | |||
630 | static KernelInfoState getBestState() { return KernelInfoState(true); } | |||
631 | ||||
632 | static KernelInfoState getBestState(KernelInfoState &KIS) { | |||
633 | return getBestState(); | |||
634 | } | |||
635 | ||||
636 | /// Return full set as the worst state of potential values. | |||
637 | static KernelInfoState getWorstState() { return KernelInfoState(false); } | |||
638 | ||||
639 | /// "Clamp" this state with \p KIS. | |||
640 | KernelInfoState operator^=(const KernelInfoState &KIS) { | |||
641 | // Do not merge two different _init and _deinit call sites. | |||
642 | if (KIS.KernelInitCB) { | |||
643 | if (KernelInitCB && KernelInitCB != KIS.KernelInitCB) | |||
644 | llvm_unreachable("Kernel that calls another kernel violates OpenMP-Opt "::llvm::llvm_unreachable_internal("Kernel that calls another kernel violates OpenMP-Opt " "assumptions.", "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp" , 645) | |||
645 | "assumptions.")::llvm::llvm_unreachable_internal("Kernel that calls another kernel violates OpenMP-Opt " "assumptions.", "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp" , 645); | |||
646 | KernelInitCB = KIS.KernelInitCB; | |||
647 | } | |||
648 | if (KIS.KernelDeinitCB) { | |||
649 | if (KernelDeinitCB && KernelDeinitCB != KIS.KernelDeinitCB) | |||
650 | llvm_unreachable("Kernel that calls another kernel violates OpenMP-Opt "::llvm::llvm_unreachable_internal("Kernel that calls another kernel violates OpenMP-Opt " "assumptions.", "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp" , 651) | |||
651 | "assumptions.")::llvm::llvm_unreachable_internal("Kernel that calls another kernel violates OpenMP-Opt " "assumptions.", "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp" , 651); | |||
652 | KernelDeinitCB = KIS.KernelDeinitCB; | |||
653 | } | |||
654 | SPMDCompatibilityTracker ^= KIS.SPMDCompatibilityTracker; | |||
655 | ReachedKnownParallelRegions ^= KIS.ReachedKnownParallelRegions; | |||
656 | ReachedUnknownParallelRegions ^= KIS.ReachedUnknownParallelRegions; | |||
657 | return *this; | |||
658 | } | |||
659 | ||||
660 | KernelInfoState operator&=(const KernelInfoState &KIS) { | |||
661 | return (*this ^= KIS); | |||
662 | } | |||
663 | ||||
664 | ///} | |||
665 | }; | |||
666 | ||||
667 | /// Used to map the values physically (in the IR) stored in an offload | |||
668 | /// array, to a vector in memory. | |||
669 | struct OffloadArray { | |||
670 | /// Physical array (in the IR). | |||
671 | AllocaInst *Array = nullptr; | |||
672 | /// Mapped values. | |||
673 | SmallVector<Value *, 8> StoredValues; | |||
674 | /// Last stores made in the offload array. | |||
675 | SmallVector<StoreInst *, 8> LastAccesses; | |||
676 | ||||
677 | OffloadArray() = default; | |||
678 | ||||
679 | /// Initializes the OffloadArray with the values stored in \p Array before | |||
680 | /// instruction \p Before is reached. Returns false if the initialization | |||
681 | /// fails. | |||
682 | /// This MUST be used immediately after the construction of the object. | |||
683 | bool initialize(AllocaInst &Array, Instruction &Before) { | |||
684 | if (!Array.getAllocatedType()->isArrayTy()) | |||
685 | return false; | |||
686 | ||||
687 | if (!getValues(Array, Before)) | |||
688 | return false; | |||
689 | ||||
690 | this->Array = &Array; | |||
691 | return true; | |||
692 | } | |||
693 | ||||
694 | static const unsigned DeviceIDArgNum = 1; | |||
695 | static const unsigned BasePtrsArgNum = 3; | |||
696 | static const unsigned PtrsArgNum = 4; | |||
697 | static const unsigned SizesArgNum = 5; | |||
698 | ||||
699 | private: | |||
700 | /// Traverses the BasicBlock where \p Array is, collecting the stores made to | |||
701 | /// \p Array, leaving StoredValues with the values stored before the | |||
702 | /// instruction \p Before is reached. | |||
703 | bool getValues(AllocaInst &Array, Instruction &Before) { | |||
704 | // Initialize container. | |||
705 | const uint64_t NumValues = Array.getAllocatedType()->getArrayNumElements(); | |||
706 | StoredValues.assign(NumValues, nullptr); | |||
707 | LastAccesses.assign(NumValues, nullptr); | |||
708 | ||||
709 | // TODO: This assumes the instruction \p Before is in the same | |||
710 | // BasicBlock as Array. Make it general, for any control flow graph. | |||
711 | BasicBlock *BB = Array.getParent(); | |||
712 | if (BB != Before.getParent()) | |||
713 | return false; | |||
714 | ||||
715 | const DataLayout &DL = Array.getModule()->getDataLayout(); | |||
716 | const unsigned int PointerSize = DL.getPointerSize(); | |||
717 | ||||
718 | for (Instruction &I : *BB) { | |||
719 | if (&I == &Before) | |||
720 | break; | |||
721 | ||||
722 | if (!isa<StoreInst>(&I)) | |||
723 | continue; | |||
724 | ||||
725 | auto *S = cast<StoreInst>(&I); | |||
726 | int64_t Offset = -1; | |||
727 | auto *Dst = | |||
728 | GetPointerBaseWithConstantOffset(S->getPointerOperand(), Offset, DL); | |||
729 | if (Dst == &Array) { | |||
730 | int64_t Idx = Offset / PointerSize; | |||
731 | StoredValues[Idx] = getUnderlyingObject(S->getValueOperand()); | |||
732 | LastAccesses[Idx] = S; | |||
733 | } | |||
734 | } | |||
735 | ||||
736 | return isFilled(); | |||
737 | } | |||
738 | ||||
739 | /// Returns true if all values in StoredValues and | |||
740 | /// LastAccesses are not nullptrs. | |||
741 | bool isFilled() { | |||
742 | const unsigned NumValues = StoredValues.size(); | |||
743 | for (unsigned I = 0; I < NumValues; ++I) { | |||
744 | if (!StoredValues[I] || !LastAccesses[I]) | |||
745 | return false; | |||
746 | } | |||
747 | ||||
748 | return true; | |||
749 | } | |||
750 | }; | |||
751 | ||||
752 | struct OpenMPOpt { | |||
753 | ||||
754 | using OptimizationRemarkGetter = | |||
755 | function_ref<OptimizationRemarkEmitter &(Function *)>; | |||
756 | ||||
757 | OpenMPOpt(SmallVectorImpl<Function *> &SCC, CallGraphUpdater &CGUpdater, | |||
758 | OptimizationRemarkGetter OREGetter, | |||
759 | OMPInformationCache &OMPInfoCache, Attributor &A) | |||
760 | : M(*(*SCC.begin())->getParent()), SCC(SCC), CGUpdater(CGUpdater), | |||
761 | OREGetter(OREGetter), OMPInfoCache(OMPInfoCache), A(A) {} | |||
762 | ||||
763 | /// Check if any remarks are enabled for openmp-opt | |||
764 | bool remarksEnabled() { | |||
765 | auto &Ctx = M.getContext(); | |||
766 | return Ctx.getDiagHandlerPtr()->isAnyRemarkEnabled(DEBUG_TYPE"openmp-opt"); | |||
767 | } | |||
768 | ||||
769 | /// Run all OpenMP optimizations on the underlying SCC/ModuleSlice. | |||
770 | bool run(bool IsModulePass) { | |||
771 | if (SCC.empty()) | |||
772 | return false; | |||
773 | ||||
774 | bool Changed = false; | |||
775 | ||||
776 | LLVM_DEBUG(dbgs() << TAG << "Run on SCC with " << SCC.size()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { dbgs() << TAG << "Run on SCC with " << SCC.size() << " functions in a slice with " << OMPInfoCache.ModuleSlice.size() << " functions\n"; } } while (false) | |||
777 | << " functions in a slice with "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { dbgs() << TAG << "Run on SCC with " << SCC.size() << " functions in a slice with " << OMPInfoCache.ModuleSlice.size() << " functions\n"; } } while (false) | |||
778 | << OMPInfoCache.ModuleSlice.size() << " functions\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { dbgs() << TAG << "Run on SCC with " << SCC.size() << " functions in a slice with " << OMPInfoCache.ModuleSlice.size() << " functions\n"; } } while (false); | |||
779 | ||||
780 | if (IsModulePass) { | |||
781 | Changed |= runAttributor(IsModulePass); | |||
782 | ||||
783 | // Recollect uses, in case Attributor deleted any. | |||
784 | OMPInfoCache.recollectUses(); | |||
785 | ||||
786 | // TODO: This should be folded into buildCustomStateMachine. | |||
787 | Changed |= rewriteDeviceCodeStateMachine(); | |||
788 | ||||
789 | if (remarksEnabled()) | |||
790 | analysisGlobalization(); | |||
791 | } else { | |||
792 | if (PrintICVValues) | |||
793 | printICVs(); | |||
794 | if (PrintOpenMPKernels) | |||
795 | printKernels(); | |||
796 | ||||
797 | Changed |= runAttributor(IsModulePass); | |||
798 | ||||
799 | // Recollect uses, in case Attributor deleted any. | |||
800 | OMPInfoCache.recollectUses(); | |||
801 | ||||
802 | Changed |= deleteParallelRegions(); | |||
803 | ||||
804 | if (HideMemoryTransferLatency) | |||
805 | Changed |= hideMemTransfersLatency(); | |||
806 | Changed |= deduplicateRuntimeCalls(); | |||
807 | if (EnableParallelRegionMerging) { | |||
808 | if (mergeParallelRegions()) { | |||
809 | deduplicateRuntimeCalls(); | |||
810 | Changed = true; | |||
811 | } | |||
812 | } | |||
813 | } | |||
814 | ||||
815 | return Changed; | |||
816 | } | |||
817 | ||||
818 | /// Print initial ICV values for testing. | |||
819 | /// FIXME: This should be done from the Attributor once it is added. | |||
820 | void printICVs() const { | |||
821 | InternalControlVar ICVs[] = {ICV_nthreads, ICV_active_levels, ICV_cancel, | |||
822 | ICV_proc_bind}; | |||
823 | ||||
824 | for (Function *F : OMPInfoCache.ModuleSlice) { | |||
825 | for (auto ICV : ICVs) { | |||
826 | auto ICVInfo = OMPInfoCache.ICVs[ICV]; | |||
827 | auto Remark = [&](OptimizationRemarkAnalysis ORA) { | |||
828 | return ORA << "OpenMP ICV " << ore::NV("OpenMPICV", ICVInfo.Name) | |||
829 | << " Value: " | |||
830 | << (ICVInfo.InitValue | |||
831 | ? toString(ICVInfo.InitValue->getValue(), 10, true) | |||
832 | : "IMPLEMENTATION_DEFINED"); | |||
833 | }; | |||
834 | ||||
835 | emitRemark<OptimizationRemarkAnalysis>(F, "OpenMPICVTracker", Remark); | |||
836 | } | |||
837 | } | |||
838 | } | |||
839 | ||||
840 | /// Print OpenMP GPU kernels for testing. | |||
841 | void printKernels() const { | |||
842 | for (Function *F : SCC) { | |||
843 | if (!OMPInfoCache.Kernels.count(F)) | |||
844 | continue; | |||
845 | ||||
846 | auto Remark = [&](OptimizationRemarkAnalysis ORA) { | |||
847 | return ORA << "OpenMP GPU kernel " | |||
848 | << ore::NV("OpenMPGPUKernel", F->getName()) << "\n"; | |||
849 | }; | |||
850 | ||||
851 | emitRemark<OptimizationRemarkAnalysis>(F, "OpenMPGPU", Remark); | |||
852 | } | |||
853 | } | |||
854 | ||||
855 | /// Return the call if \p U is a callee use in a regular call. If \p RFI is | |||
856 | /// given it has to be the callee or a nullptr is returned. | |||
857 | static CallInst *getCallIfRegularCall( | |||
858 | Use &U, OMPInformationCache::RuntimeFunctionInfo *RFI = nullptr) { | |||
859 | CallInst *CI = dyn_cast<CallInst>(U.getUser()); | |||
860 | if (CI && CI->isCallee(&U) && !CI->hasOperandBundles() && | |||
861 | (!RFI || | |||
862 | (RFI->Declaration && CI->getCalledFunction() == RFI->Declaration))) | |||
863 | return CI; | |||
864 | return nullptr; | |||
865 | } | |||
866 | ||||
867 | /// Return the call if \p V is a regular call. If \p RFI is given it has to be | |||
868 | /// the callee or a nullptr is returned. | |||
869 | static CallInst *getCallIfRegularCall( | |||
870 | Value &V, OMPInformationCache::RuntimeFunctionInfo *RFI = nullptr) { | |||
871 | CallInst *CI = dyn_cast<CallInst>(&V); | |||
872 | if (CI && !CI->hasOperandBundles() && | |||
873 | (!RFI || | |||
874 | (RFI->Declaration && CI->getCalledFunction() == RFI->Declaration))) | |||
875 | return CI; | |||
876 | return nullptr; | |||
877 | } | |||
878 | ||||
879 | private: | |||
880 | /// Merge parallel regions when it is safe. | |||
881 | bool mergeParallelRegions() { | |||
882 | const unsigned CallbackCalleeOperand = 2; | |||
883 | const unsigned CallbackFirstArgOperand = 3; | |||
884 | using InsertPointTy = OpenMPIRBuilder::InsertPointTy; | |||
885 | ||||
886 | // Check if there are any __kmpc_fork_call calls to merge. | |||
887 | OMPInformationCache::RuntimeFunctionInfo &RFI = | |||
888 | OMPInfoCache.RFIs[OMPRTL___kmpc_fork_call]; | |||
889 | ||||
890 | if (!RFI.Declaration) | |||
891 | return false; | |||
892 | ||||
893 | // Unmergable calls that prevent merging a parallel region. | |||
894 | OMPInformationCache::RuntimeFunctionInfo UnmergableCallsInfo[] = { | |||
895 | OMPInfoCache.RFIs[OMPRTL___kmpc_push_proc_bind], | |||
896 | OMPInfoCache.RFIs[OMPRTL___kmpc_push_num_threads], | |||
897 | }; | |||
898 | ||||
899 | bool Changed = false; | |||
900 | LoopInfo *LI = nullptr; | |||
901 | DominatorTree *DT = nullptr; | |||
902 | ||||
903 | SmallDenseMap<BasicBlock *, SmallPtrSet<Instruction *, 4>> BB2PRMap; | |||
904 | ||||
905 | BasicBlock *StartBB = nullptr, *EndBB = nullptr; | |||
906 | auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, | |||
907 | BasicBlock &ContinuationIP) { | |||
908 | BasicBlock *CGStartBB = CodeGenIP.getBlock(); | |||
909 | BasicBlock *CGEndBB = | |||
910 | SplitBlock(CGStartBB, &*CodeGenIP.getPoint(), DT, LI); | |||
911 | assert(StartBB != nullptr && "StartBB should not be null")(static_cast <bool> (StartBB != nullptr && "StartBB should not be null" ) ? void (0) : __assert_fail ("StartBB != nullptr && \"StartBB should not be null\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp" , 911, __extension__ __PRETTY_FUNCTION__)); | |||
912 | CGStartBB->getTerminator()->setSuccessor(0, StartBB); | |||
913 | assert(EndBB != nullptr && "EndBB should not be null")(static_cast <bool> (EndBB != nullptr && "EndBB should not be null" ) ? void (0) : __assert_fail ("EndBB != nullptr && \"EndBB should not be null\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp" , 913, __extension__ __PRETTY_FUNCTION__)); | |||
914 | EndBB->getTerminator()->setSuccessor(0, CGEndBB); | |||
915 | }; | |||
916 | ||||
917 | auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &, | |||
918 | Value &Inner, Value *&ReplacementValue) -> InsertPointTy { | |||
919 | ReplacementValue = &Inner; | |||
920 | return CodeGenIP; | |||
921 | }; | |||
922 | ||||
923 | auto FiniCB = [&](InsertPointTy CodeGenIP) {}; | |||
924 | ||||
925 | /// Create a sequential execution region within a merged parallel region, | |||
926 | /// encapsulated in a master construct with a barrier for synchronization. | |||
927 | auto CreateSequentialRegion = [&](Function *OuterFn, | |||
928 | BasicBlock *OuterPredBB, | |||
929 | Instruction *SeqStartI, | |||
930 | Instruction *SeqEndI) { | |||
931 | // Isolate the instructions of the sequential region to a separate | |||
932 | // block. | |||
933 | BasicBlock *ParentBB = SeqStartI->getParent(); | |||
934 | BasicBlock *SeqEndBB = | |||
935 | SplitBlock(ParentBB, SeqEndI->getNextNode(), DT, LI); | |||
936 | BasicBlock *SeqAfterBB = | |||
937 | SplitBlock(SeqEndBB, &*SeqEndBB->getFirstInsertionPt(), DT, LI); | |||
938 | BasicBlock *SeqStartBB = | |||
939 | SplitBlock(ParentBB, SeqStartI, DT, LI, nullptr, "seq.par.merged"); | |||
940 | ||||
941 | assert(ParentBB->getUniqueSuccessor() == SeqStartBB &&(static_cast <bool> (ParentBB->getUniqueSuccessor() == SeqStartBB && "Expected a different CFG") ? void (0) : __assert_fail ("ParentBB->getUniqueSuccessor() == SeqStartBB && \"Expected a different CFG\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp" , 942, __extension__ __PRETTY_FUNCTION__)) | |||
942 | "Expected a different CFG")(static_cast <bool> (ParentBB->getUniqueSuccessor() == SeqStartBB && "Expected a different CFG") ? void (0) : __assert_fail ("ParentBB->getUniqueSuccessor() == SeqStartBB && \"Expected a different CFG\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp" , 942, __extension__ __PRETTY_FUNCTION__)); | |||
943 | const DebugLoc DL = ParentBB->getTerminator()->getDebugLoc(); | |||
944 | ParentBB->getTerminator()->eraseFromParent(); | |||
945 | ||||
946 | auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, | |||
947 | BasicBlock &ContinuationIP) { | |||
948 | BasicBlock *CGStartBB = CodeGenIP.getBlock(); | |||
949 | BasicBlock *CGEndBB = | |||
950 | SplitBlock(CGStartBB, &*CodeGenIP.getPoint(), DT, LI); | |||
951 | assert(SeqStartBB != nullptr && "SeqStartBB should not be null")(static_cast <bool> (SeqStartBB != nullptr && "SeqStartBB should not be null" ) ? void (0) : __assert_fail ("SeqStartBB != nullptr && \"SeqStartBB should not be null\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp" , 951, __extension__ __PRETTY_FUNCTION__)); | |||
952 | CGStartBB->getTerminator()->setSuccessor(0, SeqStartBB); | |||
953 | assert(SeqEndBB != nullptr && "SeqEndBB should not be null")(static_cast <bool> (SeqEndBB != nullptr && "SeqEndBB should not be null" ) ? void (0) : __assert_fail ("SeqEndBB != nullptr && \"SeqEndBB should not be null\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp" , 953, __extension__ __PRETTY_FUNCTION__)); | |||
954 | SeqEndBB->getTerminator()->setSuccessor(0, CGEndBB); | |||
955 | }; | |||
956 | auto FiniCB = [&](InsertPointTy CodeGenIP) {}; | |||
957 | ||||
958 | // Find outputs from the sequential region to outside users and | |||
959 | // broadcast their values to them. | |||
960 | for (Instruction &I : *SeqStartBB) { | |||
961 | SmallPtrSet<Instruction *, 4> OutsideUsers; | |||
962 | for (User *Usr : I.users()) { | |||
963 | Instruction &UsrI = *cast<Instruction>(Usr); | |||
964 | // Ignore outputs to LT intrinsics, code extraction for the merged | |||
965 | // parallel region will fix them. | |||
966 | if (UsrI.isLifetimeStartOrEnd()) | |||
967 | continue; | |||
968 | ||||
969 | if (UsrI.getParent() != SeqStartBB) | |||
970 | OutsideUsers.insert(&UsrI); | |||
971 | } | |||
972 | ||||
973 | if (OutsideUsers.empty()) | |||
974 | continue; | |||
975 | ||||
976 | // Emit an alloca in the outer region to store the broadcasted | |||
977 | // value. | |||
978 | const DataLayout &DL = M.getDataLayout(); | |||
979 | AllocaInst *AllocaI = new AllocaInst( | |||
980 | I.getType(), DL.getAllocaAddrSpace(), nullptr, | |||
981 | I.getName() + ".seq.output.alloc", &OuterFn->front().front()); | |||
982 | ||||
983 | // Emit a store instruction in the sequential BB to update the | |||
984 | // value. | |||
985 | new StoreInst(&I, AllocaI, SeqStartBB->getTerminator()); | |||
986 | ||||
987 | // Emit a load instruction and replace the use of the output value | |||
988 | // with it. | |||
989 | for (Instruction *UsrI : OutsideUsers) { | |||
990 | LoadInst *LoadI = new LoadInst( | |||
991 | I.getType(), AllocaI, I.getName() + ".seq.output.load", UsrI); | |||
992 | UsrI->replaceUsesOfWith(&I, LoadI); | |||
993 | } | |||
994 | } | |||
995 | ||||
996 | OpenMPIRBuilder::LocationDescription Loc( | |||
997 | InsertPointTy(ParentBB, ParentBB->end()), DL); | |||
998 | InsertPointTy SeqAfterIP = | |||
999 | OMPInfoCache.OMPBuilder.createMaster(Loc, BodyGenCB, FiniCB); | |||
1000 | ||||
1001 | OMPInfoCache.OMPBuilder.createBarrier(SeqAfterIP, OMPD_parallel); | |||
1002 | ||||
1003 | BranchInst::Create(SeqAfterBB, SeqAfterIP.getBlock()); | |||
1004 | ||||
1005 | LLVM_DEBUG(dbgs() << TAG << "After sequential inlining " << *OuterFndo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { dbgs() << TAG << "After sequential inlining " << *OuterFn << "\n"; } } while (false) | |||
1006 | << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { dbgs() << TAG << "After sequential inlining " << *OuterFn << "\n"; } } while (false); | |||
1007 | }; | |||
1008 | ||||
1009 | // Helper to merge the __kmpc_fork_call calls in MergableCIs. They are all | |||
1010 | // contained in BB and only separated by instructions that can be | |||
1011 | // redundantly executed in parallel. The block BB is split before the first | |||
1012 | // call (in MergableCIs) and after the last so the entire region we merge | |||
1013 | // into a single parallel region is contained in a single basic block | |||
1014 | // without any other instructions. We use the OpenMPIRBuilder to outline | |||
1015 | // that block and call the resulting function via __kmpc_fork_call. | |||
1016 | auto Merge = [&](SmallVectorImpl<CallInst *> &MergableCIs, BasicBlock *BB) { | |||
1017 | // TODO: Change the interface to allow single CIs expanded, e.g, to | |||
1018 | // include an outer loop. | |||
1019 | assert(MergableCIs.size() > 1 && "Assumed multiple mergable CIs")(static_cast <bool> (MergableCIs.size() > 1 && "Assumed multiple mergable CIs") ? void (0) : __assert_fail ( "MergableCIs.size() > 1 && \"Assumed multiple mergable CIs\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp" , 1019, __extension__ __PRETTY_FUNCTION__)); | |||
1020 | ||||
1021 | auto Remark = [&](OptimizationRemark OR) { | |||
1022 | OR << "Parallel region merged with parallel region" | |||
1023 | << (MergableCIs.size() > 2 ? "s" : "") << " at "; | |||
1024 | for (auto *CI : llvm::drop_begin(MergableCIs)) { | |||
1025 | OR << ore::NV("OpenMPParallelMerge", CI->getDebugLoc()); | |||
1026 | if (CI != MergableCIs.back()) | |||
1027 | OR << ", "; | |||
1028 | } | |||
1029 | return OR << "."; | |||
1030 | }; | |||
1031 | ||||
1032 | emitRemark<OptimizationRemark>(MergableCIs.front(), "OMP150", Remark); | |||
1033 | ||||
1034 | Function *OriginalFn = BB->getParent(); | |||
1035 | LLVM_DEBUG(dbgs() << TAG << "Merge " << MergableCIs.size()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { dbgs() << TAG << "Merge " << MergableCIs.size() << " parallel regions in " << OriginalFn->getName() << "\n"; } } while (false) | |||
1036 | << " parallel regions in " << OriginalFn->getName()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { dbgs() << TAG << "Merge " << MergableCIs.size() << " parallel regions in " << OriginalFn->getName() << "\n"; } } while (false) | |||
1037 | << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { dbgs() << TAG << "Merge " << MergableCIs.size() << " parallel regions in " << OriginalFn->getName() << "\n"; } } while (false); | |||
1038 | ||||
1039 | // Isolate the calls to merge in a separate block. | |||
1040 | EndBB = SplitBlock(BB, MergableCIs.back()->getNextNode(), DT, LI); | |||
1041 | BasicBlock *AfterBB = | |||
1042 | SplitBlock(EndBB, &*EndBB->getFirstInsertionPt(), DT, LI); | |||
1043 | StartBB = SplitBlock(BB, MergableCIs.front(), DT, LI, nullptr, | |||
1044 | "omp.par.merged"); | |||
1045 | ||||
1046 | assert(BB->getUniqueSuccessor() == StartBB && "Expected a different CFG")(static_cast <bool> (BB->getUniqueSuccessor() == StartBB && "Expected a different CFG") ? void (0) : __assert_fail ("BB->getUniqueSuccessor() == StartBB && \"Expected a different CFG\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp" , 1046, __extension__ __PRETTY_FUNCTION__)); | |||
1047 | const DebugLoc DL = BB->getTerminator()->getDebugLoc(); | |||
1048 | BB->getTerminator()->eraseFromParent(); | |||
1049 | ||||
1050 | // Create sequential regions for sequential instructions that are | |||
1051 | // in-between mergable parallel regions. | |||
1052 | for (auto *It = MergableCIs.begin(), *End = MergableCIs.end() - 1; | |||
1053 | It != End; ++It) { | |||
1054 | Instruction *ForkCI = *It; | |||
1055 | Instruction *NextForkCI = *(It + 1); | |||
1056 | ||||
1057 | // Continue if there are not in-between instructions. | |||
1058 | if (ForkCI->getNextNode() == NextForkCI) | |||
1059 | continue; | |||
1060 | ||||
1061 | CreateSequentialRegion(OriginalFn, BB, ForkCI->getNextNode(), | |||
1062 | NextForkCI->getPrevNode()); | |||
1063 | } | |||
1064 | ||||
1065 | OpenMPIRBuilder::LocationDescription Loc(InsertPointTy(BB, BB->end()), | |||
1066 | DL); | |||
1067 | IRBuilder<>::InsertPoint AllocaIP( | |||
1068 | &OriginalFn->getEntryBlock(), | |||
1069 | OriginalFn->getEntryBlock().getFirstInsertionPt()); | |||
1070 | // Create the merged parallel region with default proc binding, to | |||
1071 | // avoid overriding binding settings, and without explicit cancellation. | |||
1072 | InsertPointTy AfterIP = OMPInfoCache.OMPBuilder.createParallel( | |||
1073 | Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, nullptr, nullptr, | |||
1074 | OMP_PROC_BIND_default, /* IsCancellable */ false); | |||
1075 | BranchInst::Create(AfterBB, AfterIP.getBlock()); | |||
1076 | ||||
1077 | // Perform the actual outlining. | |||
1078 | OMPInfoCache.OMPBuilder.finalize(OriginalFn, | |||
1079 | /* AllowExtractorSinking */ true); | |||
1080 | ||||
1081 | Function *OutlinedFn = MergableCIs.front()->getCaller(); | |||
1082 | ||||
1083 | // Replace the __kmpc_fork_call calls with direct calls to the outlined | |||
1084 | // callbacks. | |||
1085 | SmallVector<Value *, 8> Args; | |||
1086 | for (auto *CI : MergableCIs) { | |||
1087 | Value *Callee = | |||
1088 | CI->getArgOperand(CallbackCalleeOperand)->stripPointerCasts(); | |||
1089 | FunctionType *FT = | |||
1090 | cast<FunctionType>(Callee->getType()->getPointerElementType()); | |||
1091 | Args.clear(); | |||
1092 | Args.push_back(OutlinedFn->getArg(0)); | |||
1093 | Args.push_back(OutlinedFn->getArg(1)); | |||
1094 | for (unsigned U = CallbackFirstArgOperand, E = CI->arg_size(); U < E; | |||
1095 | ++U) | |||
1096 | Args.push_back(CI->getArgOperand(U)); | |||
1097 | ||||
1098 | CallInst *NewCI = CallInst::Create(FT, Callee, Args, "", CI); | |||
1099 | if (CI->getDebugLoc()) | |||
1100 | NewCI->setDebugLoc(CI->getDebugLoc()); | |||
1101 | ||||
1102 | // Forward parameter attributes from the callback to the callee. | |||
1103 | for (unsigned U = CallbackFirstArgOperand, E = CI->arg_size(); U < E; | |||
1104 | ++U) | |||
1105 | for (const Attribute &A : CI->getAttributes().getParamAttrs(U)) | |||
1106 | NewCI->addParamAttr( | |||
1107 | U - (CallbackFirstArgOperand - CallbackCalleeOperand), A); | |||
1108 | ||||
1109 | // Emit an explicit barrier to replace the implicit fork-join barrier. | |||
1110 | if (CI != MergableCIs.back()) { | |||
1111 | // TODO: Remove barrier if the merged parallel region includes the | |||
1112 | // 'nowait' clause. | |||
1113 | OMPInfoCache.OMPBuilder.createBarrier( | |||
1114 | InsertPointTy(NewCI->getParent(), | |||
1115 | NewCI->getNextNode()->getIterator()), | |||
1116 | OMPD_parallel); | |||
1117 | } | |||
1118 | ||||
1119 | CI->eraseFromParent(); | |||
1120 | } | |||
1121 | ||||
1122 | assert(OutlinedFn != OriginalFn && "Outlining failed")(static_cast <bool> (OutlinedFn != OriginalFn && "Outlining failed") ? void (0) : __assert_fail ("OutlinedFn != OriginalFn && \"Outlining failed\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp" , 1122, __extension__ __PRETTY_FUNCTION__)); | |||
1123 | CGUpdater.registerOutlinedFunction(*OriginalFn, *OutlinedFn); | |||
1124 | CGUpdater.reanalyzeFunction(*OriginalFn); | |||
1125 | ||||
1126 | NumOpenMPParallelRegionsMerged += MergableCIs.size(); | |||
1127 | ||||
1128 | return true; | |||
1129 | }; | |||
1130 | ||||
1131 | // Helper function that identifes sequences of | |||
1132 | // __kmpc_fork_call uses in a basic block. | |||
1133 | auto DetectPRsCB = [&](Use &U, Function &F) { | |||
1134 | CallInst *CI = getCallIfRegularCall(U, &RFI); | |||
1135 | BB2PRMap[CI->getParent()].insert(CI); | |||
1136 | ||||
1137 | return false; | |||
1138 | }; | |||
1139 | ||||
1140 | BB2PRMap.clear(); | |||
1141 | RFI.foreachUse(SCC, DetectPRsCB); | |||
1142 | SmallVector<SmallVector<CallInst *, 4>, 4> MergableCIsVector; | |||
1143 | // Find mergable parallel regions within a basic block that are | |||
1144 | // safe to merge, that is any in-between instructions can safely | |||
1145 | // execute in parallel after merging. | |||
1146 | // TODO: support merging across basic-blocks. | |||
1147 | for (auto &It : BB2PRMap) { | |||
1148 | auto &CIs = It.getSecond(); | |||
1149 | if (CIs.size() < 2) | |||
1150 | continue; | |||
1151 | ||||
1152 | BasicBlock *BB = It.getFirst(); | |||
1153 | SmallVector<CallInst *, 4> MergableCIs; | |||
1154 | ||||
1155 | /// Returns true if the instruction is mergable, false otherwise. | |||
1156 | /// A terminator instruction is unmergable by definition since merging | |||
1157 | /// works within a BB. Instructions before the mergable region are | |||
1158 | /// mergable if they are not calls to OpenMP runtime functions that may | |||
1159 | /// set different execution parameters for subsequent parallel regions. | |||
1160 | /// Instructions in-between parallel regions are mergable if they are not | |||
1161 | /// calls to any non-intrinsic function since that may call a non-mergable | |||
1162 | /// OpenMP runtime function. | |||
1163 | auto IsMergable = [&](Instruction &I, bool IsBeforeMergableRegion) { | |||
1164 | // We do not merge across BBs, hence return false (unmergable) if the | |||
1165 | // instruction is a terminator. | |||
1166 | if (I.isTerminator()) | |||
1167 | return false; | |||
1168 | ||||
1169 | if (!isa<CallInst>(&I)) | |||
1170 | return true; | |||
1171 | ||||
1172 | CallInst *CI = cast<CallInst>(&I); | |||
1173 | if (IsBeforeMergableRegion) { | |||
1174 | Function *CalledFunction = CI->getCalledFunction(); | |||
1175 | if (!CalledFunction) | |||
1176 | return false; | |||
1177 | // Return false (unmergable) if the call before the parallel | |||
1178 | // region calls an explicit affinity (proc_bind) or number of | |||
1179 | // threads (num_threads) compiler-generated function. Those settings | |||
1180 | // may be incompatible with following parallel regions. | |||
1181 | // TODO: ICV tracking to detect compatibility. | |||
1182 | for (const auto &RFI : UnmergableCallsInfo) { | |||
1183 | if (CalledFunction == RFI.Declaration) | |||
1184 | return false; | |||
1185 | } | |||
1186 | } else { | |||
1187 | // Return false (unmergable) if there is a call instruction | |||
1188 | // in-between parallel regions when it is not an intrinsic. It | |||
1189 | // may call an unmergable OpenMP runtime function in its callpath. | |||
1190 | // TODO: Keep track of possible OpenMP calls in the callpath. | |||
1191 | if (!isa<IntrinsicInst>(CI)) | |||
1192 | return false; | |||
1193 | } | |||
1194 | ||||
1195 | return true; | |||
1196 | }; | |||
1197 | // Find maximal number of parallel region CIs that are safe to merge. | |||
1198 | for (auto It = BB->begin(), End = BB->end(); It != End;) { | |||
1199 | Instruction &I = *It; | |||
1200 | ++It; | |||
1201 | ||||
1202 | if (CIs.count(&I)) { | |||
1203 | MergableCIs.push_back(cast<CallInst>(&I)); | |||
1204 | continue; | |||
1205 | } | |||
1206 | ||||
1207 | // Continue expanding if the instruction is mergable. | |||
1208 | if (IsMergable(I, MergableCIs.empty())) | |||
1209 | continue; | |||
1210 | ||||
1211 | // Forward the instruction iterator to skip the next parallel region | |||
1212 | // since there is an unmergable instruction which can affect it. | |||
1213 | for (; It != End; ++It) { | |||
1214 | Instruction &SkipI = *It; | |||
1215 | if (CIs.count(&SkipI)) { | |||
1216 | LLVM_DEBUG(dbgs() << TAG << "Skip parallel region " << SkipIdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { dbgs() << TAG << "Skip parallel region " << SkipI << " due to " << I << "\n"; } } while (false) | |||
1217 | << " due to " << I << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { dbgs() << TAG << "Skip parallel region " << SkipI << " due to " << I << "\n"; } } while (false); | |||
1218 | ++It; | |||
1219 | break; | |||
1220 | } | |||
1221 | } | |||
1222 | ||||
1223 | // Store mergable regions found. | |||
1224 | if (MergableCIs.size() > 1) { | |||
1225 | MergableCIsVector.push_back(MergableCIs); | |||
1226 | LLVM_DEBUG(dbgs() << TAG << "Found " << MergableCIs.size()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { dbgs() << TAG << "Found " << MergableCIs.size() << " parallel regions in block " << BB->getName() << " of function " << BB->getParent ()->getName() << "\n";; } } while (false) | |||
1227 | << " parallel regions in block " << BB->getName()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { dbgs() << TAG << "Found " << MergableCIs.size() << " parallel regions in block " << BB->getName() << " of function " << BB->getParent ()->getName() << "\n";; } } while (false) | |||
1228 | << " of function " << BB->getParent()->getName()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { dbgs() << TAG << "Found " << MergableCIs.size() << " parallel regions in block " << BB->getName() << " of function " << BB->getParent ()->getName() << "\n";; } } while (false) | |||
1229 | << "\n";)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { dbgs() << TAG << "Found " << MergableCIs.size() << " parallel regions in block " << BB->getName() << " of function " << BB->getParent ()->getName() << "\n";; } } while (false); | |||
1230 | } | |||
1231 | ||||
1232 | MergableCIs.clear(); | |||
1233 | } | |||
1234 | ||||
1235 | if (!MergableCIsVector.empty()) { | |||
1236 | Changed = true; | |||
1237 | ||||
1238 | for (auto &MergableCIs : MergableCIsVector) | |||
1239 | Merge(MergableCIs, BB); | |||
1240 | MergableCIsVector.clear(); | |||
1241 | } | |||
1242 | } | |||
1243 | ||||
1244 | if (Changed) { | |||
1245 | /// Re-collect use for fork calls, emitted barrier calls, and | |||
1246 | /// any emitted master/end_master calls. | |||
1247 | OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_fork_call); | |||
1248 | OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_barrier); | |||
1249 | OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_master); | |||
1250 | OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_end_master); | |||
1251 | } | |||
1252 | ||||
1253 | return Changed; | |||
1254 | } | |||
1255 | ||||
1256 | /// Try to delete parallel regions if possible. | |||
1257 | bool deleteParallelRegions() { | |||
1258 | const unsigned CallbackCalleeOperand = 2; | |||
1259 | ||||
1260 | OMPInformationCache::RuntimeFunctionInfo &RFI = | |||
1261 | OMPInfoCache.RFIs[OMPRTL___kmpc_fork_call]; | |||
1262 | ||||
1263 | if (!RFI.Declaration) | |||
1264 | return false; | |||
1265 | ||||
1266 | bool Changed = false; | |||
1267 | auto DeleteCallCB = [&](Use &U, Function &) { | |||
1268 | CallInst *CI = getCallIfRegularCall(U); | |||
1269 | if (!CI) | |||
1270 | return false; | |||
1271 | auto *Fn = dyn_cast<Function>( | |||
1272 | CI->getArgOperand(CallbackCalleeOperand)->stripPointerCasts()); | |||
1273 | if (!Fn) | |||
1274 | return false; | |||
1275 | if (!Fn->onlyReadsMemory()) | |||
1276 | return false; | |||
1277 | if (!Fn->hasFnAttribute(Attribute::WillReturn)) | |||
1278 | return false; | |||
1279 | ||||
1280 | LLVM_DEBUG(dbgs() << TAG << "Delete read-only parallel region in "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { dbgs() << TAG << "Delete read-only parallel region in " << CI->getCaller()->getName() << "\n"; } } while (false) | |||
1281 | << CI->getCaller()->getName() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { dbgs() << TAG << "Delete read-only parallel region in " << CI->getCaller()->getName() << "\n"; } } while (false); | |||
1282 | ||||
1283 | auto Remark = [&](OptimizationRemark OR) { | |||
1284 | return OR << "Removing parallel region with no side-effects."; | |||
1285 | }; | |||
1286 | emitRemark<OptimizationRemark>(CI, "OMP160", Remark); | |||
1287 | ||||
1288 | CGUpdater.removeCallSite(*CI); | |||
1289 | CI->eraseFromParent(); | |||
1290 | Changed = true; | |||
1291 | ++NumOpenMPParallelRegionsDeleted; | |||
1292 | return true; | |||
1293 | }; | |||
1294 | ||||
1295 | RFI.foreachUse(SCC, DeleteCallCB); | |||
1296 | ||||
1297 | return Changed; | |||
1298 | } | |||
1299 | ||||
1300 | /// Try to eliminate runtime calls by reusing existing ones. | |||
1301 | bool deduplicateRuntimeCalls() { | |||
1302 | bool Changed = false; | |||
1303 | ||||
1304 | RuntimeFunction DeduplicableRuntimeCallIDs[] = { | |||
1305 | OMPRTL_omp_get_num_threads, | |||
1306 | OMPRTL_omp_in_parallel, | |||
1307 | OMPRTL_omp_get_cancellation, | |||
1308 | OMPRTL_omp_get_thread_limit, | |||
1309 | OMPRTL_omp_get_supported_active_levels, | |||
1310 | OMPRTL_omp_get_level, | |||
1311 | OMPRTL_omp_get_ancestor_thread_num, | |||
1312 | OMPRTL_omp_get_team_size, | |||
1313 | OMPRTL_omp_get_active_level, | |||
1314 | OMPRTL_omp_in_final, | |||
1315 | OMPRTL_omp_get_proc_bind, | |||
1316 | OMPRTL_omp_get_num_places, | |||
1317 | OMPRTL_omp_get_num_procs, | |||
1318 | OMPRTL_omp_get_place_num, | |||
1319 | OMPRTL_omp_get_partition_num_places, | |||
1320 | OMPRTL_omp_get_partition_place_nums}; | |||
1321 | ||||
1322 | // Global-tid is handled separately. | |||
1323 | SmallSetVector<Value *, 16> GTIdArgs; | |||
1324 | collectGlobalThreadIdArguments(GTIdArgs); | |||
1325 | LLVM_DEBUG(dbgs() << TAG << "Found " << GTIdArgs.size()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { dbgs() << TAG << "Found " << GTIdArgs.size() << " global thread ID arguments\n"; } } while (false) | |||
1326 | << " global thread ID arguments\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { dbgs() << TAG << "Found " << GTIdArgs.size() << " global thread ID arguments\n"; } } while (false); | |||
1327 | ||||
1328 | for (Function *F : SCC) { | |||
1329 | for (auto DeduplicableRuntimeCallID : DeduplicableRuntimeCallIDs) | |||
1330 | Changed |= deduplicateRuntimeCalls( | |||
1331 | *F, OMPInfoCache.RFIs[DeduplicableRuntimeCallID]); | |||
1332 | ||||
1333 | // __kmpc_global_thread_num is special as we can replace it with an | |||
1334 | // argument in enough cases to make it worth trying. | |||
1335 | Value *GTIdArg = nullptr; | |||
1336 | for (Argument &Arg : F->args()) | |||
1337 | if (GTIdArgs.count(&Arg)) { | |||
1338 | GTIdArg = &Arg; | |||
1339 | break; | |||
1340 | } | |||
1341 | Changed |= deduplicateRuntimeCalls( | |||
1342 | *F, OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num], GTIdArg); | |||
1343 | } | |||
1344 | ||||
1345 | return Changed; | |||
1346 | } | |||
1347 | ||||
1348 | /// Tries to hide the latency of runtime calls that involve host to | |||
1349 | /// device memory transfers by splitting them into their "issue" and "wait" | |||
1350 | /// versions. The "issue" is moved upwards as much as possible. The "wait" is | |||
1351 | /// moved downards as much as possible. The "issue" issues the memory transfer | |||
1352 | /// asynchronously, returning a handle. The "wait" waits in the returned | |||
1353 | /// handle for the memory transfer to finish. | |||
1354 | bool hideMemTransfersLatency() { | |||
1355 | auto &RFI = OMPInfoCache.RFIs[OMPRTL___tgt_target_data_begin_mapper]; | |||
1356 | bool Changed = false; | |||
1357 | auto SplitMemTransfers = [&](Use &U, Function &Decl) { | |||
1358 | auto *RTCall = getCallIfRegularCall(U, &RFI); | |||
1359 | if (!RTCall) | |||
1360 | return false; | |||
1361 | ||||
1362 | OffloadArray OffloadArrays[3]; | |||
1363 | if (!getValuesInOffloadArrays(*RTCall, OffloadArrays)) | |||
1364 | return false; | |||
1365 | ||||
1366 | LLVM_DEBUG(dumpValuesInOffloadArrays(OffloadArrays))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { dumpValuesInOffloadArrays(OffloadArrays); } } while (false); | |||
1367 | ||||
1368 | // TODO: Check if can be moved upwards. | |||
1369 | bool WasSplit = false; | |||
1370 | Instruction *WaitMovementPoint = canBeMovedDownwards(*RTCall); | |||
1371 | if (WaitMovementPoint) | |||
1372 | WasSplit = splitTargetDataBeginRTC(*RTCall, *WaitMovementPoint); | |||
1373 | ||||
1374 | Changed |= WasSplit; | |||
1375 | return WasSplit; | |||
1376 | }; | |||
1377 | RFI.foreachUse(SCC, SplitMemTransfers); | |||
1378 | ||||
1379 | return Changed; | |||
1380 | } | |||
1381 | ||||
1382 | void analysisGlobalization() { | |||
1383 | auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared]; | |||
1384 | ||||
1385 | auto CheckGlobalization = [&](Use &U, Function &Decl) { | |||
1386 | if (CallInst *CI = getCallIfRegularCall(U, &RFI)) { | |||
1387 | auto Remark = [&](OptimizationRemarkMissed ORM) { | |||
1388 | return ORM | |||
1389 | << "Found thread data sharing on the GPU. " | |||
1390 | << "Expect degraded performance due to data globalization."; | |||
1391 | }; | |||
1392 | emitRemark<OptimizationRemarkMissed>(CI, "OMP112", Remark); | |||
1393 | } | |||
1394 | ||||
1395 | return false; | |||
1396 | }; | |||
1397 | ||||
1398 | RFI.foreachUse(SCC, CheckGlobalization); | |||
1399 | } | |||
1400 | ||||
1401 | /// Maps the values stored in the offload arrays passed as arguments to | |||
1402 | /// \p RuntimeCall into the offload arrays in \p OAs. | |||
1403 | bool getValuesInOffloadArrays(CallInst &RuntimeCall, | |||
1404 | MutableArrayRef<OffloadArray> OAs) { | |||
1405 | assert(OAs.size() == 3 && "Need space for three offload arrays!")(static_cast <bool> (OAs.size() == 3 && "Need space for three offload arrays!" ) ? void (0) : __assert_fail ("OAs.size() == 3 && \"Need space for three offload arrays!\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp" , 1405, __extension__ __PRETTY_FUNCTION__)); | |||
1406 | ||||
1407 | // A runtime call that involves memory offloading looks something like: | |||
1408 | // call void @__tgt_target_data_begin_mapper(arg0, arg1, | |||
1409 | // i8** %offload_baseptrs, i8** %offload_ptrs, i64* %offload_sizes, | |||
1410 | // ...) | |||
1411 | // So, the idea is to access the allocas that allocate space for these | |||
1412 | // offload arrays, offload_baseptrs, offload_ptrs, offload_sizes. | |||
1413 | // Therefore: | |||
1414 | // i8** %offload_baseptrs. | |||
1415 | Value *BasePtrsArg = | |||
1416 | RuntimeCall.getArgOperand(OffloadArray::BasePtrsArgNum); | |||
1417 | // i8** %offload_ptrs. | |||
1418 | Value *PtrsArg = RuntimeCall.getArgOperand(OffloadArray::PtrsArgNum); | |||
1419 | // i8** %offload_sizes. | |||
1420 | Value *SizesArg = RuntimeCall.getArgOperand(OffloadArray::SizesArgNum); | |||
1421 | ||||
1422 | // Get values stored in **offload_baseptrs. | |||
1423 | auto *V = getUnderlyingObject(BasePtrsArg); | |||
1424 | if (!isa<AllocaInst>(V)) | |||
1425 | return false; | |||
1426 | auto *BasePtrsArray = cast<AllocaInst>(V); | |||
1427 | if (!OAs[0].initialize(*BasePtrsArray, RuntimeCall)) | |||
1428 | return false; | |||
1429 | ||||
1430 | // Get values stored in **offload_baseptrs. | |||
1431 | V = getUnderlyingObject(PtrsArg); | |||
1432 | if (!isa<AllocaInst>(V)) | |||
1433 | return false; | |||
1434 | auto *PtrsArray = cast<AllocaInst>(V); | |||
1435 | if (!OAs[1].initialize(*PtrsArray, RuntimeCall)) | |||
1436 | return false; | |||
1437 | ||||
1438 | // Get values stored in **offload_sizes. | |||
1439 | V = getUnderlyingObject(SizesArg); | |||
1440 | // If it's a [constant] global array don't analyze it. | |||
1441 | if (isa<GlobalValue>(V)) | |||
1442 | return isa<Constant>(V); | |||
1443 | if (!isa<AllocaInst>(V)) | |||
1444 | return false; | |||
1445 | ||||
1446 | auto *SizesArray = cast<AllocaInst>(V); | |||
1447 | if (!OAs[2].initialize(*SizesArray, RuntimeCall)) | |||
1448 | return false; | |||
1449 | ||||
1450 | return true; | |||
1451 | } | |||
1452 | ||||
1453 | /// Prints the values in the OffloadArrays \p OAs using LLVM_DEBUG. | |||
1454 | /// For now this is a way to test that the function getValuesInOffloadArrays | |||
1455 | /// is working properly. | |||
1456 | /// TODO: Move this to a unittest when unittests are available for OpenMPOpt. | |||
1457 | void dumpValuesInOffloadArrays(ArrayRef<OffloadArray> OAs) { | |||
1458 | assert(OAs.size() == 3 && "There are three offload arrays to debug!")(static_cast <bool> (OAs.size() == 3 && "There are three offload arrays to debug!" ) ? void (0) : __assert_fail ("OAs.size() == 3 && \"There are three offload arrays to debug!\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp" , 1458, __extension__ __PRETTY_FUNCTION__)); | |||
1459 | ||||
1460 | LLVM_DEBUG(dbgs() << TAG << " Successfully got offload values:\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { dbgs() << TAG << " Successfully got offload values:\n" ; } } while (false); | |||
1461 | std::string ValuesStr; | |||
1462 | raw_string_ostream Printer(ValuesStr); | |||
1463 | std::string Separator = " --- "; | |||
1464 | ||||
1465 | for (auto *BP : OAs[0].StoredValues) { | |||
1466 | BP->print(Printer); | |||
1467 | Printer << Separator; | |||
1468 | } | |||
1469 | LLVM_DEBUG(dbgs() << "\t\toffload_baseptrs: " << Printer.str() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { dbgs() << "\t\toffload_baseptrs: " << Printer.str() << "\n"; } } while (false); | |||
1470 | ValuesStr.clear(); | |||
1471 | ||||
1472 | for (auto *P : OAs[1].StoredValues) { | |||
1473 | P->print(Printer); | |||
1474 | Printer << Separator; | |||
1475 | } | |||
1476 | LLVM_DEBUG(dbgs() << "\t\toffload_ptrs: " << Printer.str() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { dbgs() << "\t\toffload_ptrs: " << Printer.str() << "\n"; } } while (false); | |||
1477 | ValuesStr.clear(); | |||
1478 | ||||
1479 | for (auto *S : OAs[2].StoredValues) { | |||
1480 | S->print(Printer); | |||
1481 | Printer << Separator; | |||
1482 | } | |||
1483 | LLVM_DEBUG(dbgs() << "\t\toffload_sizes: " << Printer.str() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { dbgs() << "\t\toffload_sizes: " << Printer.str() << "\n"; } } while (false); | |||
1484 | } | |||
1485 | ||||
1486 | /// Returns the instruction where the "wait" counterpart \p RuntimeCall can be | |||
1487 | /// moved. Returns nullptr if the movement is not possible, or not worth it. | |||
1488 | Instruction *canBeMovedDownwards(CallInst &RuntimeCall) { | |||
1489 | // FIXME: This traverses only the BasicBlock where RuntimeCall is. | |||
1490 | // Make it traverse the CFG. | |||
1491 | ||||
1492 | Instruction *CurrentI = &RuntimeCall; | |||
1493 | bool IsWorthIt = false; | |||
1494 | while ((CurrentI = CurrentI->getNextNode())) { | |||
1495 | ||||
1496 | // TODO: Once we detect the regions to be offloaded we should use the | |||
1497 | // alias analysis manager to check if CurrentI may modify one of | |||
1498 | // the offloaded regions. | |||
1499 | if (CurrentI->mayHaveSideEffects() || CurrentI->mayReadFromMemory()) { | |||
1500 | if (IsWorthIt) | |||
1501 | return CurrentI; | |||
1502 | ||||
1503 | return nullptr; | |||
1504 | } | |||
1505 | ||||
1506 | // FIXME: For now if we move it over anything without side effect | |||
1507 | // is worth it. | |||
1508 | IsWorthIt = true; | |||
1509 | } | |||
1510 | ||||
1511 | // Return end of BasicBlock. | |||
1512 | return RuntimeCall.getParent()->getTerminator(); | |||
1513 | } | |||
1514 | ||||
1515 | /// Splits \p RuntimeCall into its "issue" and "wait" counterparts. | |||
1516 | bool splitTargetDataBeginRTC(CallInst &RuntimeCall, | |||
1517 | Instruction &WaitMovementPoint) { | |||
1518 | // Create stack allocated handle (__tgt_async_info) at the beginning of the | |||
1519 | // function. Used for storing information of the async transfer, allowing to | |||
1520 | // wait on it later. | |||
1521 | auto &IRBuilder = OMPInfoCache.OMPBuilder; | |||
1522 | auto *F = RuntimeCall.getCaller(); | |||
1523 | Instruction *FirstInst = &(F->getEntryBlock().front()); | |||
1524 | AllocaInst *Handle = new AllocaInst( | |||
1525 | IRBuilder.AsyncInfo, F->getAddressSpace(), "handle", FirstInst); | |||
1526 | ||||
1527 | // Add "issue" runtime call declaration: | |||
1528 | // declare %struct.tgt_async_info @__tgt_target_data_begin_issue(i64, i32, | |||
1529 | // i8**, i8**, i64*, i64*) | |||
1530 | FunctionCallee IssueDecl = IRBuilder.getOrCreateRuntimeFunction( | |||
1531 | M, OMPRTL___tgt_target_data_begin_mapper_issue); | |||
1532 | ||||
1533 | // Change RuntimeCall call site for its asynchronous version. | |||
1534 | SmallVector<Value *, 16> Args; | |||
1535 | for (auto &Arg : RuntimeCall.args()) | |||
1536 | Args.push_back(Arg.get()); | |||
1537 | Args.push_back(Handle); | |||
1538 | ||||
1539 | CallInst *IssueCallsite = | |||
1540 | CallInst::Create(IssueDecl, Args, /*NameStr=*/"", &RuntimeCall); | |||
1541 | RuntimeCall.eraseFromParent(); | |||
1542 | ||||
1543 | // Add "wait" runtime call declaration: | |||
1544 | // declare void @__tgt_target_data_begin_wait(i64, %struct.__tgt_async_info) | |||
1545 | FunctionCallee WaitDecl = IRBuilder.getOrCreateRuntimeFunction( | |||
1546 | M, OMPRTL___tgt_target_data_begin_mapper_wait); | |||
1547 | ||||
1548 | Value *WaitParams[2] = { | |||
1549 | IssueCallsite->getArgOperand( | |||
1550 | OffloadArray::DeviceIDArgNum), // device_id. | |||
1551 | Handle // handle to wait on. | |||
1552 | }; | |||
1553 | CallInst::Create(WaitDecl, WaitParams, /*NameStr=*/"", &WaitMovementPoint); | |||
1554 | ||||
1555 | return true; | |||
1556 | } | |||
1557 | ||||
1558 | static Value *combinedIdentStruct(Value *CurrentIdent, Value *NextIdent, | |||
1559 | bool GlobalOnly, bool &SingleChoice) { | |||
1560 | if (CurrentIdent == NextIdent) | |||
1561 | return CurrentIdent; | |||
1562 | ||||
1563 | // TODO: Figure out how to actually combine multiple debug locations. For | |||
1564 | // now we just keep an existing one if there is a single choice. | |||
1565 | if (!GlobalOnly || isa<GlobalValue>(NextIdent)) { | |||
1566 | SingleChoice = !CurrentIdent; | |||
1567 | return NextIdent; | |||
1568 | } | |||
1569 | return nullptr; | |||
1570 | } | |||
1571 | ||||
1572 | /// Return an `struct ident_t*` value that represents the ones used in the | |||
1573 | /// calls of \p RFI inside of \p F. If \p GlobalOnly is true, we will not | |||
1574 | /// return a local `struct ident_t*`. For now, if we cannot find a suitable | |||
1575 | /// return value we create one from scratch. We also do not yet combine | |||
1576 | /// information, e.g., the source locations, see combinedIdentStruct. | |||
1577 | Value * | |||
1578 | getCombinedIdentFromCallUsesIn(OMPInformationCache::RuntimeFunctionInfo &RFI, | |||
1579 | Function &F, bool GlobalOnly) { | |||
1580 | bool SingleChoice = true; | |||
1581 | Value *Ident = nullptr; | |||
1582 | auto CombineIdentStruct = [&](Use &U, Function &Caller) { | |||
1583 | CallInst *CI = getCallIfRegularCall(U, &RFI); | |||
1584 | if (!CI || &F != &Caller) | |||
1585 | return false; | |||
1586 | Ident = combinedIdentStruct(Ident, CI->getArgOperand(0), | |||
1587 | /* GlobalOnly */ true, SingleChoice); | |||
1588 | return false; | |||
1589 | }; | |||
1590 | RFI.foreachUse(SCC, CombineIdentStruct); | |||
1591 | ||||
1592 | if (!Ident || !SingleChoice) { | |||
1593 | // The IRBuilder uses the insertion block to get to the module, this is | |||
1594 | // unfortunate but we work around it for now. | |||
1595 | if (!OMPInfoCache.OMPBuilder.getInsertionPoint().getBlock()) | |||
1596 | OMPInfoCache.OMPBuilder.updateToLocation(OpenMPIRBuilder::InsertPointTy( | |||
1597 | &F.getEntryBlock(), F.getEntryBlock().begin())); | |||
1598 | // Create a fallback location if non was found. | |||
1599 | // TODO: Use the debug locations of the calls instead. | |||
1600 | Constant *Loc = OMPInfoCache.OMPBuilder.getOrCreateDefaultSrcLocStr(); | |||
1601 | Ident = OMPInfoCache.OMPBuilder.getOrCreateIdent(Loc); | |||
1602 | } | |||
1603 | return Ident; | |||
1604 | } | |||
1605 | ||||
1606 | /// Try to eliminate calls of \p RFI in \p F by reusing an existing one or | |||
1607 | /// \p ReplVal if given. | |||
1608 | bool deduplicateRuntimeCalls(Function &F, | |||
1609 | OMPInformationCache::RuntimeFunctionInfo &RFI, | |||
1610 | Value *ReplVal = nullptr) { | |||
1611 | auto *UV = RFI.getUseVector(F); | |||
1612 | if (!UV || UV->size() + (ReplVal != nullptr) < 2) | |||
1613 | return false; | |||
1614 | ||||
1615 | LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { dbgs() << TAG << "Deduplicate " << UV->size() << " uses of " << RFI.Name << (ReplVal ? " with an existing value\n" : "\n") << "\n"; } } while (false) | |||
1616 | dbgs() << TAG << "Deduplicate " << UV->size() << " uses of " << RFI.Namedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { dbgs() << TAG << "Deduplicate " << UV->size() << " uses of " << RFI.Name << (ReplVal ? " with an existing value\n" : "\n") << "\n"; } } while (false) | |||
1617 | << (ReplVal ? " with an existing value\n" : "\n") << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { dbgs() << TAG << "Deduplicate " << UV->size() << " uses of " << RFI.Name << (ReplVal ? " with an existing value\n" : "\n") << "\n"; } } while (false); | |||
1618 | ||||
1619 | assert((!ReplVal || (isa<Argument>(ReplVal) &&(static_cast <bool> ((!ReplVal || (isa<Argument>( ReplVal) && cast<Argument>(ReplVal)->getParent () == &F)) && "Unexpected replacement value!") ? void (0) : __assert_fail ("(!ReplVal || (isa<Argument>(ReplVal) && cast<Argument>(ReplVal)->getParent() == &F)) && \"Unexpected replacement value!\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp" , 1621, __extension__ __PRETTY_FUNCTION__)) | |||
1620 | cast<Argument>(ReplVal)->getParent() == &F)) &&(static_cast <bool> ((!ReplVal || (isa<Argument>( ReplVal) && cast<Argument>(ReplVal)->getParent () == &F)) && "Unexpected replacement value!") ? void (0) : __assert_fail ("(!ReplVal || (isa<Argument>(ReplVal) && cast<Argument>(ReplVal)->getParent() == &F)) && \"Unexpected replacement value!\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp" , 1621, __extension__ __PRETTY_FUNCTION__)) | |||
1621 | "Unexpected replacement value!")(static_cast <bool> ((!ReplVal || (isa<Argument>( ReplVal) && cast<Argument>(ReplVal)->getParent () == &F)) && "Unexpected replacement value!") ? void (0) : __assert_fail ("(!ReplVal || (isa<Argument>(ReplVal) && cast<Argument>(ReplVal)->getParent() == &F)) && \"Unexpected replacement value!\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp" , 1621, __extension__ __PRETTY_FUNCTION__)); | |||
1622 | ||||
1623 | // TODO: Use dominance to find a good position instead. | |||
1624 | auto CanBeMoved = [this](CallBase &CB) { | |||
1625 | unsigned NumArgs = CB.arg_size(); | |||
1626 | if (NumArgs == 0) | |||
1627 | return true; | |||
1628 | if (CB.getArgOperand(0)->getType() != OMPInfoCache.OMPBuilder.IdentPtr) | |||
1629 | return false; | |||
1630 | for (unsigned U = 1; U < NumArgs; ++U) | |||
1631 | if (isa<Instruction>(CB.getArgOperand(U))) | |||
1632 | return false; | |||
1633 | return true; | |||
1634 | }; | |||
1635 | ||||
1636 | if (!ReplVal) { | |||
1637 | for (Use *U : *UV) | |||
1638 | if (CallInst *CI = getCallIfRegularCall(*U, &RFI)) { | |||
1639 | if (!CanBeMoved(*CI)) | |||
1640 | continue; | |||
1641 | ||||
1642 | // If the function is a kernel, dedup will move | |||
1643 | // the runtime call right after the kernel init callsite. Otherwise, | |||
1644 | // it will move it to the beginning of the caller function. | |||
1645 | if (isKernel(F)) { | |||
1646 | auto &KernelInitRFI = OMPInfoCache.RFIs[OMPRTL___kmpc_target_init]; | |||
1647 | auto *KernelInitUV = KernelInitRFI.getUseVector(F); | |||
1648 | ||||
1649 | if (KernelInitUV->empty()) | |||
1650 | continue; | |||
1651 | ||||
1652 | assert(KernelInitUV->size() == 1 &&(static_cast <bool> (KernelInitUV->size() == 1 && "Expected a single __kmpc_target_init in kernel\n") ? void ( 0) : __assert_fail ("KernelInitUV->size() == 1 && \"Expected a single __kmpc_target_init in kernel\\n\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp" , 1653, __extension__ __PRETTY_FUNCTION__)) | |||
1653 | "Expected a single __kmpc_target_init in kernel\n")(static_cast <bool> (KernelInitUV->size() == 1 && "Expected a single __kmpc_target_init in kernel\n") ? void ( 0) : __assert_fail ("KernelInitUV->size() == 1 && \"Expected a single __kmpc_target_init in kernel\\n\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp" , 1653, __extension__ __PRETTY_FUNCTION__)); | |||
1654 | ||||
1655 | CallInst *KernelInitCI = | |||
1656 | getCallIfRegularCall(*KernelInitUV->front(), &KernelInitRFI); | |||
1657 | assert(KernelInitCI &&(static_cast <bool> (KernelInitCI && "Expected a call to __kmpc_target_init in kernel\n" ) ? void (0) : __assert_fail ("KernelInitCI && \"Expected a call to __kmpc_target_init in kernel\\n\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp" , 1658, __extension__ __PRETTY_FUNCTION__)) | |||
1658 | "Expected a call to __kmpc_target_init in kernel\n")(static_cast <bool> (KernelInitCI && "Expected a call to __kmpc_target_init in kernel\n" ) ? void (0) : __assert_fail ("KernelInitCI && \"Expected a call to __kmpc_target_init in kernel\\n\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp" , 1658, __extension__ __PRETTY_FUNCTION__)); | |||
1659 | ||||
1660 | CI->moveAfter(KernelInitCI); | |||
1661 | } else | |||
1662 | CI->moveBefore(&*F.getEntryBlock().getFirstInsertionPt()); | |||
1663 | ReplVal = CI; | |||
1664 | break; | |||
1665 | } | |||
1666 | if (!ReplVal) | |||
1667 | return false; | |||
1668 | } | |||
1669 | ||||
1670 | // If we use a call as a replacement value we need to make sure the ident is | |||
1671 | // valid at the new location. For now we just pick a global one, either | |||
1672 | // existing and used by one of the calls, or created from scratch. | |||
1673 | if (CallBase *CI = dyn_cast<CallBase>(ReplVal)) { | |||
1674 | if (!CI->arg_empty() && | |||
1675 | CI->getArgOperand(0)->getType() == OMPInfoCache.OMPBuilder.IdentPtr) { | |||
1676 | Value *Ident = getCombinedIdentFromCallUsesIn(RFI, F, | |||
1677 | /* GlobalOnly */ true); | |||
1678 | CI->setArgOperand(0, Ident); | |||
1679 | } | |||
1680 | } | |||
1681 | ||||
1682 | bool Changed = false; | |||
1683 | auto ReplaceAndDeleteCB = [&](Use &U, Function &Caller) { | |||
1684 | CallInst *CI = getCallIfRegularCall(U, &RFI); | |||
1685 | if (!CI || CI == ReplVal || &F != &Caller) | |||
1686 | return false; | |||
1687 | assert(CI->getCaller() == &F && "Unexpected call!")(static_cast <bool> (CI->getCaller() == &F && "Unexpected call!") ? void (0) : __assert_fail ("CI->getCaller() == &F && \"Unexpected call!\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp" , 1687, __extension__ __PRETTY_FUNCTION__)); | |||
1688 | ||||
1689 | auto Remark = [&](OptimizationRemark OR) { | |||
1690 | return OR << "OpenMP runtime call " | |||
1691 | << ore::NV("OpenMPOptRuntime", RFI.Name) << " deduplicated."; | |||
1692 | }; | |||
1693 | if (CI->getDebugLoc()) | |||
1694 | emitRemark<OptimizationRemark>(CI, "OMP170", Remark); | |||
1695 | else | |||
1696 | emitRemark<OptimizationRemark>(&F, "OMP170", Remark); | |||
1697 | ||||
1698 | CGUpdater.removeCallSite(*CI); | |||
1699 | CI->replaceAllUsesWith(ReplVal); | |||
1700 | CI->eraseFromParent(); | |||
1701 | ++NumOpenMPRuntimeCallsDeduplicated; | |||
1702 | Changed = true; | |||
1703 | return true; | |||
1704 | }; | |||
1705 | RFI.foreachUse(SCC, ReplaceAndDeleteCB); | |||
1706 | ||||
1707 | return Changed; | |||
1708 | } | |||
1709 | ||||
1710 | /// Collect arguments that represent the global thread id in \p GTIdArgs. | |||
1711 | void collectGlobalThreadIdArguments(SmallSetVector<Value *, 16> >IdArgs) { | |||
1712 | // TODO: Below we basically perform a fixpoint iteration with a pessimistic | |||
1713 | // initialization. We could define an AbstractAttribute instead and | |||
1714 | // run the Attributor here once it can be run as an SCC pass. | |||
1715 | ||||
1716 | // Helper to check the argument \p ArgNo at all call sites of \p F for | |||
1717 | // a GTId. | |||
1718 | auto CallArgOpIsGTId = [&](Function &F, unsigned ArgNo, CallInst &RefCI) { | |||
1719 | if (!F.hasLocalLinkage()) | |||
1720 | return false; | |||
1721 | for (Use &U : F.uses()) { | |||
1722 | if (CallInst *CI = getCallIfRegularCall(U)) { | |||
1723 | Value *ArgOp = CI->getArgOperand(ArgNo); | |||
1724 | if (CI == &RefCI || GTIdArgs.count(ArgOp) || | |||
1725 | getCallIfRegularCall( | |||
1726 | *ArgOp, &OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num])) | |||
1727 | continue; | |||
1728 | } | |||
1729 | return false; | |||
1730 | } | |||
1731 | return true; | |||
1732 | }; | |||
1733 | ||||
1734 | // Helper to identify uses of a GTId as GTId arguments. | |||
1735 | auto AddUserArgs = [&](Value >Id) { | |||
1736 | for (Use &U : GTId.uses()) | |||
1737 | if (CallInst *CI = dyn_cast<CallInst>(U.getUser())) | |||
1738 | if (CI->isArgOperand(&U)) | |||
1739 | if (Function *Callee = CI->getCalledFunction()) | |||
1740 | if (CallArgOpIsGTId(*Callee, U.getOperandNo(), *CI)) | |||
1741 | GTIdArgs.insert(Callee->getArg(U.getOperandNo())); | |||
1742 | }; | |||
1743 | ||||
1744 | // The argument users of __kmpc_global_thread_num calls are GTIds. | |||
1745 | OMPInformationCache::RuntimeFunctionInfo &GlobThreadNumRFI = | |||
1746 | OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num]; | |||
1747 | ||||
1748 | GlobThreadNumRFI.foreachUse(SCC, [&](Use &U, Function &F) { | |||
1749 | if (CallInst *CI = getCallIfRegularCall(U, &GlobThreadNumRFI)) | |||
1750 | AddUserArgs(*CI); | |||
1751 | return false; | |||
1752 | }); | |||
1753 | ||||
1754 | // Transitively search for more arguments by looking at the users of the | |||
1755 | // ones we know already. During the search the GTIdArgs vector is extended | |||
1756 | // so we cannot cache the size nor can we use a range based for. | |||
1757 | for (unsigned U = 0; U < GTIdArgs.size(); ++U) | |||
1758 | AddUserArgs(*GTIdArgs[U]); | |||
1759 | } | |||
1760 | ||||
1761 | /// Kernel (=GPU) optimizations and utility functions | |||
1762 | /// | |||
1763 | ///{{ | |||
1764 | ||||
1765 | /// Check if \p F is a kernel, hence entry point for target offloading. | |||
1766 | bool isKernel(Function &F) { return OMPInfoCache.Kernels.count(&F); } | |||
1767 | ||||
1768 | /// Cache to remember the unique kernel for a function. | |||
1769 | DenseMap<Function *, Optional<Kernel>> UniqueKernelMap; | |||
1770 | ||||
1771 | /// Find the unique kernel that will execute \p F, if any. | |||
1772 | Kernel getUniqueKernelFor(Function &F); | |||
1773 | ||||
1774 | /// Find the unique kernel that will execute \p I, if any. | |||
1775 | Kernel getUniqueKernelFor(Instruction &I) { | |||
1776 | return getUniqueKernelFor(*I.getFunction()); | |||
1777 | } | |||
1778 | ||||
1779 | /// Rewrite the device (=GPU) code state machine create in non-SPMD mode in | |||
1780 | /// the cases we can avoid taking the address of a function. | |||
1781 | bool rewriteDeviceCodeStateMachine(); | |||
1782 | ||||
1783 | /// | |||
1784 | ///}} | |||
1785 | ||||
1786 | /// Emit a remark generically | |||
1787 | /// | |||
1788 | /// This template function can be used to generically emit a remark. The | |||
1789 | /// RemarkKind should be one of the following: | |||
1790 | /// - OptimizationRemark to indicate a successful optimization attempt | |||
1791 | /// - OptimizationRemarkMissed to report a failed optimization attempt | |||
1792 | /// - OptimizationRemarkAnalysis to provide additional information about an | |||
1793 | /// optimization attempt | |||
1794 | /// | |||
1795 | /// The remark is built using a callback function provided by the caller that | |||
1796 | /// takes a RemarkKind as input and returns a RemarkKind. | |||
1797 | template <typename RemarkKind, typename RemarkCallBack> | |||
1798 | void emitRemark(Instruction *I, StringRef RemarkName, | |||
1799 | RemarkCallBack &&RemarkCB) const { | |||
1800 | Function *F = I->getParent()->getParent(); | |||
1801 | auto &ORE = OREGetter(F); | |||
1802 | ||||
1803 | if (RemarkName.startswith("OMP")) | |||
1804 | ORE.emit([&]() { | |||
1805 | return RemarkCB(RemarkKind(DEBUG_TYPE"openmp-opt", RemarkName, I)) | |||
1806 | << " [" << RemarkName << "]"; | |||
1807 | }); | |||
1808 | else | |||
1809 | ORE.emit( | |||
1810 | [&]() { return RemarkCB(RemarkKind(DEBUG_TYPE"openmp-opt", RemarkName, I)); }); | |||
1811 | } | |||
1812 | ||||
1813 | /// Emit a remark on a function. | |||
1814 | template <typename RemarkKind, typename RemarkCallBack> | |||
1815 | void emitRemark(Function *F, StringRef RemarkName, | |||
1816 | RemarkCallBack &&RemarkCB) const { | |||
1817 | auto &ORE = OREGetter(F); | |||
1818 | ||||
1819 | if (RemarkName.startswith("OMP")) | |||
1820 | ORE.emit([&]() { | |||
1821 | return RemarkCB(RemarkKind(DEBUG_TYPE"openmp-opt", RemarkName, F)) | |||
1822 | << " [" << RemarkName << "]"; | |||
1823 | }); | |||
1824 | else | |||
1825 | ORE.emit( | |||
1826 | [&]() { return RemarkCB(RemarkKind(DEBUG_TYPE"openmp-opt", RemarkName, F)); }); | |||
1827 | } | |||
1828 | ||||
1829 | /// RAII struct to temporarily change an RTL function's linkage to external. | |||
1830 | /// This prevents it from being mistakenly removed by other optimizations. | |||
1831 | struct ExternalizationRAII { | |||
1832 | ExternalizationRAII(OMPInformationCache &OMPInfoCache, | |||
1833 | RuntimeFunction RFKind) | |||
1834 | : Declaration(OMPInfoCache.RFIs[RFKind].Declaration) { | |||
1835 | if (!Declaration) | |||
1836 | return; | |||
1837 | ||||
1838 | LinkageType = Declaration->getLinkage(); | |||
1839 | Declaration->setLinkage(GlobalValue::ExternalLinkage); | |||
1840 | } | |||
1841 | ||||
1842 | ~ExternalizationRAII() { | |||
1843 | if (!Declaration) | |||
1844 | return; | |||
1845 | ||||
1846 | Declaration->setLinkage(LinkageType); | |||
1847 | } | |||
1848 | ||||
1849 | Function *Declaration; | |||
1850 | GlobalValue::LinkageTypes LinkageType; | |||
1851 | }; | |||
1852 | ||||
1853 | /// The underlying module. | |||
1854 | Module &M; | |||
1855 | ||||
1856 | /// The SCC we are operating on. | |||
1857 | SmallVectorImpl<Function *> &SCC; | |||
1858 | ||||
1859 | /// Callback to update the call graph, the first argument is a removed call, | |||
1860 | /// the second an optional replacement call. | |||
1861 | CallGraphUpdater &CGUpdater; | |||
1862 | ||||
1863 | /// Callback to get an OptimizationRemarkEmitter from a Function * | |||
1864 | OptimizationRemarkGetter OREGetter; | |||
1865 | ||||
1866 | /// OpenMP-specific information cache. Also Used for Attributor runs. | |||
1867 | OMPInformationCache &OMPInfoCache; | |||
1868 | ||||
1869 | /// Attributor instance. | |||
1870 | Attributor &A; | |||
1871 | ||||
1872 | /// Helper function to run Attributor on SCC. | |||
1873 | bool runAttributor(bool IsModulePass) { | |||
1874 | if (SCC.empty()) | |||
1875 | return false; | |||
1876 | ||||
1877 | // Temporarily make these function have external linkage so the Attributor | |||
1878 | // doesn't remove them when we try to look them up later. | |||
1879 | ExternalizationRAII Parallel(OMPInfoCache, OMPRTL___kmpc_kernel_parallel); | |||
1880 | ExternalizationRAII EndParallel(OMPInfoCache, | |||
1881 | OMPRTL___kmpc_kernel_end_parallel); | |||
1882 | ExternalizationRAII BarrierSPMD(OMPInfoCache, | |||
1883 | OMPRTL___kmpc_barrier_simple_spmd); | |||
1884 | ExternalizationRAII BarrierGeneric(OMPInfoCache, | |||
1885 | OMPRTL___kmpc_barrier_simple_generic); | |||
1886 | ExternalizationRAII ThreadId(OMPInfoCache, | |||
1887 | OMPRTL___kmpc_get_hardware_thread_id_in_block); | |||
1888 | ||||
1889 | registerAAs(IsModulePass); | |||
1890 | ||||
1891 | ChangeStatus Changed = A.run(); | |||
1892 | ||||
1893 | LLVM_DEBUG(dbgs() << "[Attributor] Done with " << SCC.size()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { dbgs() << "[Attributor] Done with " << SCC.size() << " functions, result: " << Changed << ".\n"; } } while (false) | |||
1894 | << " functions, result: " << Changed << ".\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { dbgs() << "[Attributor] Done with " << SCC.size() << " functions, result: " << Changed << ".\n"; } } while (false); | |||
1895 | ||||
1896 | return Changed == ChangeStatus::CHANGED; | |||
1897 | } | |||
1898 | ||||
1899 | void registerFoldRuntimeCall(RuntimeFunction RF); | |||
1900 | ||||
1901 | /// Populate the Attributor with abstract attribute opportunities in the | |||
1902 | /// function. | |||
1903 | void registerAAs(bool IsModulePass); | |||
1904 | }; | |||
1905 | ||||
1906 | Kernel OpenMPOpt::getUniqueKernelFor(Function &F) { | |||
1907 | if (!OMPInfoCache.ModuleSlice.count(&F)) | |||
1908 | return nullptr; | |||
1909 | ||||
1910 | // Use a scope to keep the lifetime of the CachedKernel short. | |||
1911 | { | |||
1912 | Optional<Kernel> &CachedKernel = UniqueKernelMap[&F]; | |||
1913 | if (CachedKernel) | |||
1914 | return *CachedKernel; | |||
1915 | ||||
1916 | // TODO: We should use an AA to create an (optimistic and callback | |||
1917 | // call-aware) call graph. For now we stick to simple patterns that | |||
1918 | // are less powerful, basically the worst fixpoint. | |||
1919 | if (isKernel(F)) { | |||
1920 | CachedKernel = Kernel(&F); | |||
1921 | return *CachedKernel; | |||
1922 | } | |||
1923 | ||||
1924 | CachedKernel = nullptr; | |||
1925 | if (!F.hasLocalLinkage()) { | |||
1926 | ||||
1927 | // See https://openmp.llvm.org/remarks/OptimizationRemarks.html | |||
1928 | auto Remark = [&](OptimizationRemarkAnalysis ORA) { | |||
1929 | return ORA << "Potentially unknown OpenMP target region caller."; | |||
1930 | }; | |||
1931 | emitRemark<OptimizationRemarkAnalysis>(&F, "OMP100", Remark); | |||
1932 | ||||
1933 | return nullptr; | |||
1934 | } | |||
1935 | } | |||
1936 | ||||
1937 | auto GetUniqueKernelForUse = [&](const Use &U) -> Kernel { | |||
1938 | if (auto *Cmp = dyn_cast<ICmpInst>(U.getUser())) { | |||
1939 | // Allow use in equality comparisons. | |||
1940 | if (Cmp->isEquality()) | |||
1941 | return getUniqueKernelFor(*Cmp); | |||
1942 | return nullptr; | |||
1943 | } | |||
1944 | if (auto *CB = dyn_cast<CallBase>(U.getUser())) { | |||
1945 | // Allow direct calls. | |||
1946 | if (CB->isCallee(&U)) | |||
1947 | return getUniqueKernelFor(*CB); | |||
1948 | ||||
1949 | OMPInformationCache::RuntimeFunctionInfo &KernelParallelRFI = | |||
1950 | OMPInfoCache.RFIs[OMPRTL___kmpc_parallel_51]; | |||
1951 | // Allow the use in __kmpc_parallel_51 calls. | |||
1952 | if (OpenMPOpt::getCallIfRegularCall(*U.getUser(), &KernelParallelRFI)) | |||
1953 | return getUniqueKernelFor(*CB); | |||
1954 | return nullptr; | |||
1955 | } | |||
1956 | // Disallow every other use. | |||
1957 | return nullptr; | |||
1958 | }; | |||
1959 | ||||
1960 | // TODO: In the future we want to track more than just a unique kernel. | |||
1961 | SmallPtrSet<Kernel, 2> PotentialKernels; | |||
1962 | OMPInformationCache::foreachUse(F, [&](const Use &U) { | |||
1963 | PotentialKernels.insert(GetUniqueKernelForUse(U)); | |||
1964 | }); | |||
1965 | ||||
1966 | Kernel K = nullptr; | |||
1967 | if (PotentialKernels.size() == 1) | |||
1968 | K = *PotentialKernels.begin(); | |||
1969 | ||||
1970 | // Cache the result. | |||
1971 | UniqueKernelMap[&F] = K; | |||
1972 | ||||
1973 | return K; | |||
1974 | } | |||
1975 | ||||
1976 | bool OpenMPOpt::rewriteDeviceCodeStateMachine() { | |||
1977 | OMPInformationCache::RuntimeFunctionInfo &KernelParallelRFI = | |||
1978 | OMPInfoCache.RFIs[OMPRTL___kmpc_parallel_51]; | |||
1979 | ||||
1980 | bool Changed = false; | |||
1981 | if (!KernelParallelRFI) | |||
1982 | return Changed; | |||
1983 | ||||
1984 | // If we have disabled state machine changes, exit | |||
1985 | if (DisableOpenMPOptStateMachineRewrite) | |||
1986 | return Changed; | |||
1987 | ||||
1988 | for (Function *F : SCC) { | |||
1989 | ||||
1990 | // Check if the function is a use in a __kmpc_parallel_51 call at | |||
1991 | // all. | |||
1992 | bool UnknownUse = false; | |||
1993 | bool KernelParallelUse = false; | |||
1994 | unsigned NumDirectCalls = 0; | |||
1995 | ||||
1996 | SmallVector<Use *, 2> ToBeReplacedStateMachineUses; | |||
1997 | OMPInformationCache::foreachUse(*F, [&](Use &U) { | |||
1998 | if (auto *CB = dyn_cast<CallBase>(U.getUser())) | |||
1999 | if (CB->isCallee(&U)) { | |||
2000 | ++NumDirectCalls; | |||
2001 | return; | |||
2002 | } | |||
2003 | ||||
2004 | if (isa<ICmpInst>(U.getUser())) { | |||
2005 | ToBeReplacedStateMachineUses.push_back(&U); | |||
2006 | return; | |||
2007 | } | |||
2008 | ||||
2009 | // Find wrapper functions that represent parallel kernels. | |||
2010 | CallInst *CI = | |||
2011 | OpenMPOpt::getCallIfRegularCall(*U.getUser(), &KernelParallelRFI); | |||
2012 | const unsigned int WrapperFunctionArgNo = 6; | |||
2013 | if (!KernelParallelUse && CI && | |||
2014 | CI->getArgOperandNo(&U) == WrapperFunctionArgNo) { | |||
2015 | KernelParallelUse = true; | |||
2016 | ToBeReplacedStateMachineUses.push_back(&U); | |||
2017 | return; | |||
2018 | } | |||
2019 | UnknownUse = true; | |||
2020 | }); | |||
2021 | ||||
2022 | // Do not emit a remark if we haven't seen a __kmpc_parallel_51 | |||
2023 | // use. | |||
2024 | if (!KernelParallelUse) | |||
2025 | continue; | |||
2026 | ||||
2027 | // If this ever hits, we should investigate. | |||
2028 | // TODO: Checking the number of uses is not a necessary restriction and | |||
2029 | // should be lifted. | |||
2030 | if (UnknownUse || NumDirectCalls != 1 || | |||
2031 | ToBeReplacedStateMachineUses.size() > 2) { | |||
2032 | auto Remark = [&](OptimizationRemarkAnalysis ORA) { | |||
2033 | return ORA << "Parallel region is used in " | |||
2034 | << (UnknownUse ? "unknown" : "unexpected") | |||
2035 | << " ways. Will not attempt to rewrite the state machine."; | |||
2036 | }; | |||
2037 | emitRemark<OptimizationRemarkAnalysis>(F, "OMP101", Remark); | |||
2038 | continue; | |||
2039 | } | |||
2040 | ||||
2041 | // Even if we have __kmpc_parallel_51 calls, we (for now) give | |||
2042 | // up if the function is not called from a unique kernel. | |||
2043 | Kernel K = getUniqueKernelFor(*F); | |||
2044 | if (!K) { | |||
2045 | auto Remark = [&](OptimizationRemarkAnalysis ORA) { | |||
2046 | return ORA << "Parallel region is not called from a unique kernel. " | |||
2047 | "Will not attempt to rewrite the state machine."; | |||
2048 | }; | |||
2049 | emitRemark<OptimizationRemarkAnalysis>(F, "OMP102", Remark); | |||
2050 | continue; | |||
2051 | } | |||
2052 | ||||
2053 | // We now know F is a parallel body function called only from the kernel K. | |||
2054 | // We also identified the state machine uses in which we replace the | |||
2055 | // function pointer by a new global symbol for identification purposes. This | |||
2056 | // ensures only direct calls to the function are left. | |||
2057 | ||||
2058 | Module &M = *F->getParent(); | |||
2059 | Type *Int8Ty = Type::getInt8Ty(M.getContext()); | |||
2060 | ||||
2061 | auto *ID = new GlobalVariable( | |||
2062 | M, Int8Ty, /* isConstant */ true, GlobalValue::PrivateLinkage, | |||
2063 | UndefValue::get(Int8Ty), F->getName() + ".ID"); | |||
2064 | ||||
2065 | for (Use *U : ToBeReplacedStateMachineUses) | |||
2066 | U->set(ConstantExpr::getPointerBitCastOrAddrSpaceCast( | |||
2067 | ID, U->get()->getType())); | |||
2068 | ||||
2069 | ++NumOpenMPParallelRegionsReplacedInGPUStateMachine; | |||
2070 | ||||
2071 | Changed = true; | |||
2072 | } | |||
2073 | ||||
2074 | return Changed; | |||
2075 | } | |||
2076 | ||||
2077 | /// Abstract Attribute for tracking ICV values. | |||
2078 | struct AAICVTracker : public StateWrapper<BooleanState, AbstractAttribute> { | |||
2079 | using Base = StateWrapper<BooleanState, AbstractAttribute>; | |||
2080 | AAICVTracker(const IRPosition &IRP, Attributor &A) : Base(IRP) {} | |||
2081 | ||||
2082 | void initialize(Attributor &A) override { | |||
2083 | Function *F = getAnchorScope(); | |||
2084 | if (!F || !A.isFunctionIPOAmendable(*F)) | |||
2085 | indicatePessimisticFixpoint(); | |||
2086 | } | |||
2087 | ||||
2088 | /// Returns true if value is assumed to be tracked. | |||
2089 | bool isAssumedTracked() const { return getAssumed(); } | |||
2090 | ||||
2091 | /// Returns true if value is known to be tracked. | |||
2092 | bool isKnownTracked() const { return getAssumed(); } | |||
2093 | ||||
2094 | /// Create an abstract attribute biew for the position \p IRP. | |||
2095 | static AAICVTracker &createForPosition(const IRPosition &IRP, Attributor &A); | |||
2096 | ||||
2097 | /// Return the value with which \p I can be replaced for specific \p ICV. | |||
2098 | virtual Optional<Value *> getReplacementValue(InternalControlVar ICV, | |||
2099 | const Instruction *I, | |||
2100 | Attributor &A) const { | |||
2101 | return None; | |||
2102 | } | |||
2103 | ||||
2104 | /// Return an assumed unique ICV value if a single candidate is found. If | |||
2105 | /// there cannot be one, return a nullptr. If it is not clear yet, return the | |||
2106 | /// Optional::NoneType. | |||
2107 | virtual Optional<Value *> | |||
2108 | getUniqueReplacementValue(InternalControlVar ICV) const = 0; | |||
2109 | ||||
2110 | // Currently only nthreads is being tracked. | |||
2111 | // this array will only grow with time. | |||
2112 | InternalControlVar TrackableICVs[1] = {ICV_nthreads}; | |||
2113 | ||||
2114 | /// See AbstractAttribute::getName() | |||
2115 | const std::string getName() const override { return "AAICVTracker"; } | |||
2116 | ||||
2117 | /// See AbstractAttribute::getIdAddr() | |||
2118 | const char *getIdAddr() const override { return &ID; } | |||
2119 | ||||
2120 | /// This function should return true if the type of the \p AA is AAICVTracker | |||
2121 | static bool classof(const AbstractAttribute *AA) { | |||
2122 | return (AA->getIdAddr() == &ID); | |||
2123 | } | |||
2124 | ||||
2125 | static const char ID; | |||
2126 | }; | |||
2127 | ||||
2128 | struct AAICVTrackerFunction : public AAICVTracker { | |||
2129 | AAICVTrackerFunction(const IRPosition &IRP, Attributor &A) | |||
2130 | : AAICVTracker(IRP, A) {} | |||
2131 | ||||
2132 | // FIXME: come up with better string. | |||
2133 | const std::string getAsStr() const override { return "ICVTrackerFunction"; } | |||
2134 | ||||
2135 | // FIXME: come up with some stats. | |||
2136 | void trackStatistics() const override {} | |||
2137 | ||||
2138 | /// We don't manifest anything for this AA. | |||
2139 | ChangeStatus manifest(Attributor &A) override { | |||
2140 | return ChangeStatus::UNCHANGED; | |||
2141 | } | |||
2142 | ||||
2143 | // Map of ICV to their values at specific program point. | |||
2144 | EnumeratedArray<DenseMap<Instruction *, Value *>, InternalControlVar, | |||
2145 | InternalControlVar::ICV___last> | |||
2146 | ICVReplacementValuesMap; | |||
2147 | ||||
2148 | ChangeStatus updateImpl(Attributor &A) override { | |||
2149 | ChangeStatus HasChanged = ChangeStatus::UNCHANGED; | |||
2150 | ||||
2151 | Function *F = getAnchorScope(); | |||
2152 | ||||
2153 | auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); | |||
2154 | ||||
2155 | for (InternalControlVar ICV : TrackableICVs) { | |||
2156 | auto &SetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Setter]; | |||
2157 | ||||
2158 | auto &ValuesMap = ICVReplacementValuesMap[ICV]; | |||
2159 | auto TrackValues = [&](Use &U, Function &) { | |||
2160 | CallInst *CI = OpenMPOpt::getCallIfRegularCall(U); | |||
2161 | if (!CI) | |||
2162 | return false; | |||
2163 | ||||
2164 | // FIXME: handle setters with more that 1 arguments. | |||
2165 | /// Track new value. | |||
2166 | if (ValuesMap.insert(std::make_pair(CI, CI->getArgOperand(0))).second) | |||
2167 | HasChanged = ChangeStatus::CHANGED; | |||
2168 | ||||
2169 | return false; | |||
2170 | }; | |||
2171 | ||||
2172 | auto CallCheck = [&](Instruction &I) { | |||
2173 | Optional<Value *> ReplVal = getValueForCall(A, &I, ICV); | |||
2174 | if (ReplVal.hasValue() && | |||
2175 | ValuesMap.insert(std::make_pair(&I, *ReplVal)).second) | |||
2176 | HasChanged = ChangeStatus::CHANGED; | |||
2177 | ||||
2178 | return true; | |||
2179 | }; | |||
2180 | ||||
2181 | // Track all changes of an ICV. | |||
2182 | SetterRFI.foreachUse(TrackValues, F); | |||
2183 | ||||
2184 | bool UsedAssumedInformation = false; | |||
2185 | A.checkForAllInstructions(CallCheck, *this, {Instruction::Call}, | |||
2186 | UsedAssumedInformation, | |||
2187 | /* CheckBBLivenessOnly */ true); | |||
2188 | ||||
2189 | /// TODO: Figure out a way to avoid adding entry in | |||
2190 | /// ICVReplacementValuesMap | |||
2191 | Instruction *Entry = &F->getEntryBlock().front(); | |||
2192 | if (HasChanged == ChangeStatus::CHANGED && !ValuesMap.count(Entry)) | |||
2193 | ValuesMap.insert(std::make_pair(Entry, nullptr)); | |||
2194 | } | |||
2195 | ||||
2196 | return HasChanged; | |||
2197 | } | |||
2198 | ||||
2199 | /// Hepler to check if \p I is a call and get the value for it if it is | |||
2200 | /// unique. | |||
2201 | Optional<Value *> getValueForCall(Attributor &A, const Instruction *I, | |||
2202 | InternalControlVar &ICV) const { | |||
2203 | ||||
2204 | const auto *CB = dyn_cast<CallBase>(I); | |||
2205 | if (!CB || CB->hasFnAttr("no_openmp") || | |||
2206 | CB->hasFnAttr("no_openmp_routines")) | |||
2207 | return None; | |||
2208 | ||||
2209 | auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); | |||
2210 | auto &GetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Getter]; | |||
2211 | auto &SetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Setter]; | |||
2212 | Function *CalledFunction = CB->getCalledFunction(); | |||
2213 | ||||
2214 | // Indirect call, assume ICV changes. | |||
2215 | if (CalledFunction == nullptr) | |||
2216 | return nullptr; | |||
2217 | if (CalledFunction == GetterRFI.Declaration) | |||
2218 | return None; | |||
2219 | if (CalledFunction == SetterRFI.Declaration) { | |||
2220 | if (ICVReplacementValuesMap[ICV].count(I)) | |||
2221 | return ICVReplacementValuesMap[ICV].lookup(I); | |||
2222 | ||||
2223 | return nullptr; | |||
2224 | } | |||
2225 | ||||
2226 | // Since we don't know, assume it changes the ICV. | |||
2227 | if (CalledFunction->isDeclaration()) | |||
2228 | return nullptr; | |||
2229 | ||||
2230 | const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>( | |||
2231 | *this, IRPosition::callsite_returned(*CB), DepClassTy::REQUIRED); | |||
2232 | ||||
2233 | if (ICVTrackingAA.isAssumedTracked()) | |||
2234 | return ICVTrackingAA.getUniqueReplacementValue(ICV); | |||
2235 | ||||
2236 | // If we don't know, assume it changes. | |||
2237 | return nullptr; | |||
2238 | } | |||
2239 | ||||
2240 | // We don't check unique value for a function, so return None. | |||
2241 | Optional<Value *> | |||
2242 | getUniqueReplacementValue(InternalControlVar ICV) const override { | |||
2243 | return None; | |||
2244 | } | |||
2245 | ||||
2246 | /// Return the value with which \p I can be replaced for specific \p ICV. | |||
2247 | Optional<Value *> getReplacementValue(InternalControlVar ICV, | |||
2248 | const Instruction *I, | |||
2249 | Attributor &A) const override { | |||
2250 | const auto &ValuesMap = ICVReplacementValuesMap[ICV]; | |||
2251 | if (ValuesMap.count(I)) | |||
2252 | return ValuesMap.lookup(I); | |||
2253 | ||||
2254 | SmallVector<const Instruction *, 16> Worklist; | |||
2255 | SmallPtrSet<const Instruction *, 16> Visited; | |||
2256 | Worklist.push_back(I); | |||
2257 | ||||
2258 | Optional<Value *> ReplVal; | |||
2259 | ||||
2260 | while (!Worklist.empty()) { | |||
2261 | const Instruction *CurrInst = Worklist.pop_back_val(); | |||
2262 | if (!Visited.insert(CurrInst).second) | |||
2263 | continue; | |||
2264 | ||||
2265 | const BasicBlock *CurrBB = CurrInst->getParent(); | |||
2266 | ||||
2267 | // Go up and look for all potential setters/calls that might change the | |||
2268 | // ICV. | |||
2269 | while ((CurrInst = CurrInst->getPrevNode())) { | |||
2270 | if (ValuesMap.count(CurrInst)) { | |||
2271 | Optional<Value *> NewReplVal = ValuesMap.lookup(CurrInst); | |||
2272 | // Unknown value, track new. | |||
2273 | if (!ReplVal.hasValue()) { | |||
2274 | ReplVal = NewReplVal; | |||
2275 | break; | |||
2276 | } | |||
2277 | ||||
2278 | // If we found a new value, we can't know the icv value anymore. | |||
2279 | if (NewReplVal.hasValue()) | |||
2280 | if (ReplVal != NewReplVal) | |||
2281 | return nullptr; | |||
2282 | ||||
2283 | break; | |||
2284 | } | |||
2285 | ||||
2286 | Optional<Value *> NewReplVal = getValueForCall(A, CurrInst, ICV); | |||
2287 | if (!NewReplVal.hasValue()) | |||
2288 | continue; | |||
2289 | ||||
2290 | // Unknown value, track new. | |||
2291 | if (!ReplVal.hasValue()) { | |||
2292 | ReplVal = NewReplVal; | |||
2293 | break; | |||
2294 | } | |||
2295 | ||||
2296 | // if (NewReplVal.hasValue()) | |||
2297 | // We found a new value, we can't know the icv value anymore. | |||
2298 | if (ReplVal != NewReplVal) | |||
2299 | return nullptr; | |||
2300 | } | |||
2301 | ||||
2302 | // If we are in the same BB and we have a value, we are done. | |||
2303 | if (CurrBB == I->getParent() && ReplVal.hasValue()) | |||
2304 | return ReplVal; | |||
2305 | ||||
2306 | // Go through all predecessors and add terminators for analysis. | |||
2307 | for (const BasicBlock *Pred : predecessors(CurrBB)) | |||
2308 | if (const Instruction *Terminator = Pred->getTerminator()) | |||
2309 | Worklist.push_back(Terminator); | |||
2310 | } | |||
2311 | ||||
2312 | return ReplVal; | |||
2313 | } | |||
2314 | }; | |||
2315 | ||||
2316 | struct AAICVTrackerFunctionReturned : AAICVTracker { | |||
2317 | AAICVTrackerFunctionReturned(const IRPosition &IRP, Attributor &A) | |||
2318 | : AAICVTracker(IRP, A) {} | |||
2319 | ||||
2320 | // FIXME: come up with better string. | |||
2321 | const std::string getAsStr() const override { | |||
2322 | return "ICVTrackerFunctionReturned"; | |||
2323 | } | |||
2324 | ||||
2325 | // FIXME: come up with some stats. | |||
2326 | void trackStatistics() const override {} | |||
2327 | ||||
2328 | /// We don't manifest anything for this AA. | |||
2329 | ChangeStatus manifest(Attributor &A) override { | |||
2330 | return ChangeStatus::UNCHANGED; | |||
2331 | } | |||
2332 | ||||
2333 | // Map of ICV to their values at specific program point. | |||
2334 | EnumeratedArray<Optional<Value *>, InternalControlVar, | |||
2335 | InternalControlVar::ICV___last> | |||
2336 | ICVReplacementValuesMap; | |||
2337 | ||||
2338 | /// Return the value with which \p I can be replaced for specific \p ICV. | |||
2339 | Optional<Value *> | |||
2340 | getUniqueReplacementValue(InternalControlVar ICV) const override { | |||
2341 | return ICVReplacementValuesMap[ICV]; | |||
2342 | } | |||
2343 | ||||
2344 | ChangeStatus updateImpl(Attributor &A) override { | |||
2345 | ChangeStatus Changed = ChangeStatus::UNCHANGED; | |||
2346 | const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>( | |||
2347 | *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED); | |||
2348 | ||||
2349 | if (!ICVTrackingAA.isAssumedTracked()) | |||
2350 | return indicatePessimisticFixpoint(); | |||
2351 | ||||
2352 | for (InternalControlVar ICV : TrackableICVs) { | |||
2353 | Optional<Value *> &ReplVal = ICVReplacementValuesMap[ICV]; | |||
2354 | Optional<Value *> UniqueICVValue; | |||
2355 | ||||
2356 | auto CheckReturnInst = [&](Instruction &I) { | |||
2357 | Optional<Value *> NewReplVal = | |||
2358 | ICVTrackingAA.getReplacementValue(ICV, &I, A); | |||
2359 | ||||
2360 | // If we found a second ICV value there is no unique returned value. | |||
2361 | if (UniqueICVValue.hasValue() && UniqueICVValue != NewReplVal) | |||
2362 | return false; | |||
2363 | ||||
2364 | UniqueICVValue = NewReplVal; | |||
2365 | ||||
2366 | return true; | |||
2367 | }; | |||
2368 | ||||
2369 | bool UsedAssumedInformation = false; | |||
2370 | if (!A.checkForAllInstructions(CheckReturnInst, *this, {Instruction::Ret}, | |||
2371 | UsedAssumedInformation, | |||
2372 | /* CheckBBLivenessOnly */ true)) | |||
2373 | UniqueICVValue = nullptr; | |||
2374 | ||||
2375 | if (UniqueICVValue == ReplVal) | |||
2376 | continue; | |||
2377 | ||||
2378 | ReplVal = UniqueICVValue; | |||
2379 | Changed = ChangeStatus::CHANGED; | |||
2380 | } | |||
2381 | ||||
2382 | return Changed; | |||
2383 | } | |||
2384 | }; | |||
2385 | ||||
2386 | struct AAICVTrackerCallSite : AAICVTracker { | |||
2387 | AAICVTrackerCallSite(const IRPosition &IRP, Attributor &A) | |||
2388 | : AAICVTracker(IRP, A) {} | |||
2389 | ||||
2390 | void initialize(Attributor &A) override { | |||
2391 | Function *F = getAnchorScope(); | |||
2392 | if (!F || !A.isFunctionIPOAmendable(*F)) | |||
2393 | indicatePessimisticFixpoint(); | |||
2394 | ||||
2395 | // We only initialize this AA for getters, so we need to know which ICV it | |||
2396 | // gets. | |||
2397 | auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); | |||
2398 | for (InternalControlVar ICV : TrackableICVs) { | |||
2399 | auto ICVInfo = OMPInfoCache.ICVs[ICV]; | |||
2400 | auto &Getter = OMPInfoCache.RFIs[ICVInfo.Getter]; | |||
2401 | if (Getter.Declaration == getAssociatedFunction()) { | |||
2402 | AssociatedICV = ICVInfo.Kind; | |||
2403 | return; | |||
2404 | } | |||
2405 | } | |||
2406 | ||||
2407 | /// Unknown ICV. | |||
2408 | indicatePessimisticFixpoint(); | |||
2409 | } | |||
2410 | ||||
2411 | ChangeStatus manifest(Attributor &A) override { | |||
2412 | if (!ReplVal.hasValue() || !ReplVal.getValue()) | |||
2413 | return ChangeStatus::UNCHANGED; | |||
2414 | ||||
2415 | A.changeValueAfterManifest(*getCtxI(), **ReplVal); | |||
2416 | A.deleteAfterManifest(*getCtxI()); | |||
2417 | ||||
2418 | return ChangeStatus::CHANGED; | |||
2419 | } | |||
2420 | ||||
2421 | // FIXME: come up with better string. | |||
2422 | const std::string getAsStr() const override { return "ICVTrackerCallSite"; } | |||
2423 | ||||
2424 | // FIXME: come up with some stats. | |||
2425 | void trackStatistics() const override {} | |||
2426 | ||||
2427 | InternalControlVar AssociatedICV; | |||
2428 | Optional<Value *> ReplVal; | |||
2429 | ||||
2430 | ChangeStatus updateImpl(Attributor &A) override { | |||
2431 | const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>( | |||
2432 | *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED); | |||
2433 | ||||
2434 | // We don't have any information, so we assume it changes the ICV. | |||
2435 | if (!ICVTrackingAA.isAssumedTracked()) | |||
2436 | return indicatePessimisticFixpoint(); | |||
2437 | ||||
2438 | Optional<Value *> NewReplVal = | |||
2439 | ICVTrackingAA.getReplacementValue(AssociatedICV, getCtxI(), A); | |||
2440 | ||||
2441 | if (ReplVal == NewReplVal) | |||
2442 | return ChangeStatus::UNCHANGED; | |||
2443 | ||||
2444 | ReplVal = NewReplVal; | |||
2445 | return ChangeStatus::CHANGED; | |||
2446 | } | |||
2447 | ||||
2448 | // Return the value with which associated value can be replaced for specific | |||
2449 | // \p ICV. | |||
2450 | Optional<Value *> | |||
2451 | getUniqueReplacementValue(InternalControlVar ICV) const override { | |||
2452 | return ReplVal; | |||
2453 | } | |||
2454 | }; | |||
2455 | ||||
2456 | struct AAICVTrackerCallSiteReturned : AAICVTracker { | |||
2457 | AAICVTrackerCallSiteReturned(const IRPosition &IRP, Attributor &A) | |||
2458 | : AAICVTracker(IRP, A) {} | |||
2459 | ||||
2460 | // FIXME: come up with better string. | |||
2461 | const std::string getAsStr() const override { | |||
2462 | return "ICVTrackerCallSiteReturned"; | |||
2463 | } | |||
2464 | ||||
2465 | // FIXME: come up with some stats. | |||
2466 | void trackStatistics() const override {} | |||
2467 | ||||
2468 | /// We don't manifest anything for this AA. | |||
2469 | ChangeStatus manifest(Attributor &A) override { | |||
2470 | return ChangeStatus::UNCHANGED; | |||
2471 | } | |||
2472 | ||||
2473 | // Map of ICV to their values at specific program point. | |||
2474 | EnumeratedArray<Optional<Value *>, InternalControlVar, | |||
2475 | InternalControlVar::ICV___last> | |||
2476 | ICVReplacementValuesMap; | |||
2477 | ||||
2478 | /// Return the value with which associated value can be replaced for specific | |||
2479 | /// \p ICV. | |||
2480 | Optional<Value *> | |||
2481 | getUniqueReplacementValue(InternalControlVar ICV) const override { | |||
2482 | return ICVReplacementValuesMap[ICV]; | |||
2483 | } | |||
2484 | ||||
2485 | ChangeStatus updateImpl(Attributor &A) override { | |||
2486 | ChangeStatus Changed = ChangeStatus::UNCHANGED; | |||
2487 | const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>( | |||
2488 | *this, IRPosition::returned(*getAssociatedFunction()), | |||
2489 | DepClassTy::REQUIRED); | |||
2490 | ||||
2491 | // We don't have any information, so we assume it changes the ICV. | |||
2492 | if (!ICVTrackingAA.isAssumedTracked()) | |||
2493 | return indicatePessimisticFixpoint(); | |||
2494 | ||||
2495 | for (InternalControlVar ICV : TrackableICVs) { | |||
2496 | Optional<Value *> &ReplVal = ICVReplacementValuesMap[ICV]; | |||
2497 | Optional<Value *> NewReplVal = | |||
2498 | ICVTrackingAA.getUniqueReplacementValue(ICV); | |||
2499 | ||||
2500 | if (ReplVal == NewReplVal) | |||
2501 | continue; | |||
2502 | ||||
2503 | ReplVal = NewReplVal; | |||
2504 | Changed = ChangeStatus::CHANGED; | |||
2505 | } | |||
2506 | return Changed; | |||
2507 | } | |||
2508 | }; | |||
2509 | ||||
2510 | struct AAExecutionDomainFunction : public AAExecutionDomain { | |||
2511 | AAExecutionDomainFunction(const IRPosition &IRP, Attributor &A) | |||
2512 | : AAExecutionDomain(IRP, A) {} | |||
2513 | ||||
2514 | const std::string getAsStr() const override { | |||
2515 | return "[AAExecutionDomain] " + std::to_string(SingleThreadedBBs.size()) + | |||
2516 | "/" + std::to_string(NumBBs) + " BBs thread 0 only."; | |||
2517 | } | |||
2518 | ||||
2519 | /// See AbstractAttribute::trackStatistics(). | |||
2520 | void trackStatistics() const override {} | |||
2521 | ||||
2522 | void initialize(Attributor &A) override { | |||
2523 | Function *F = getAnchorScope(); | |||
2524 | for (const auto &BB : *F) | |||
2525 | SingleThreadedBBs.insert(&BB); | |||
2526 | NumBBs = SingleThreadedBBs.size(); | |||
2527 | } | |||
2528 | ||||
2529 | ChangeStatus manifest(Attributor &A) override { | |||
2530 | LLVM_DEBUG({do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { { for (const BasicBlock *BB : SingleThreadedBBs ) dbgs() << TAG << " Basic block @" << getAnchorScope ()->getName() << " " << BB->getName() << " is executed by a single thread.\n"; }; } } while (false) | |||
2531 | for (const BasicBlock *BB : SingleThreadedBBs)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { { for (const BasicBlock *BB : SingleThreadedBBs ) dbgs() << TAG << " Basic block @" << getAnchorScope ()->getName() << " " << BB->getName() << " is executed by a single thread.\n"; }; } } while (false) | |||
2532 | dbgs() << TAG << " Basic block @" << getAnchorScope()->getName() << " "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { { for (const BasicBlock *BB : SingleThreadedBBs ) dbgs() << TAG << " Basic block @" << getAnchorScope ()->getName() << " " << BB->getName() << " is executed by a single thread.\n"; }; } } while (false) | |||
2533 | << BB->getName() << " is executed by a single thread.\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { { for (const BasicBlock *BB : SingleThreadedBBs ) dbgs() << TAG << " Basic block @" << getAnchorScope ()->getName() << " " << BB->getName() << " is executed by a single thread.\n"; }; } } while (false) | |||
2534 | })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { { for (const BasicBlock *BB : SingleThreadedBBs ) dbgs() << TAG << " Basic block @" << getAnchorScope ()->getName() << " " << BB->getName() << " is executed by a single thread.\n"; }; } } while (false); | |||
2535 | return ChangeStatus::UNCHANGED; | |||
2536 | } | |||
2537 | ||||
2538 | ChangeStatus updateImpl(Attributor &A) override; | |||
2539 | ||||
2540 | /// Check if an instruction is executed by a single thread. | |||
2541 | bool isExecutedByInitialThreadOnly(const Instruction &I) const override { | |||
2542 | return isExecutedByInitialThreadOnly(*I.getParent()); | |||
2543 | } | |||
2544 | ||||
2545 | bool isExecutedByInitialThreadOnly(const BasicBlock &BB) const override { | |||
2546 | return isValidState() && SingleThreadedBBs.contains(&BB); | |||
2547 | } | |||
2548 | ||||
2549 | /// Set of basic blocks that are executed by a single thread. | |||
2550 | DenseSet<const BasicBlock *> SingleThreadedBBs; | |||
2551 | ||||
2552 | /// Total number of basic blocks in this function. | |||
2553 | long unsigned NumBBs; | |||
2554 | }; | |||
2555 | ||||
2556 | ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) { | |||
2557 | Function *F = getAnchorScope(); | |||
2558 | ReversePostOrderTraversal<Function *> RPOT(F); | |||
2559 | auto NumSingleThreadedBBs = SingleThreadedBBs.size(); | |||
2560 | ||||
2561 | bool AllCallSitesKnown; | |||
2562 | auto PredForCallSite = [&](AbstractCallSite ACS) { | |||
2563 | const auto &ExecutionDomainAA = A.getAAFor<AAExecutionDomain>( | |||
2564 | *this, IRPosition::function(*ACS.getInstruction()->getFunction()), | |||
2565 | DepClassTy::REQUIRED); | |||
2566 | return ACS.isDirectCall() && | |||
2567 | ExecutionDomainAA.isExecutedByInitialThreadOnly( | |||
2568 | *ACS.getInstruction()); | |||
2569 | }; | |||
2570 | ||||
2571 | if (!A.checkForAllCallSites(PredForCallSite, *this, | |||
2572 | /* RequiresAllCallSites */ true, | |||
2573 | AllCallSitesKnown)) | |||
2574 | SingleThreadedBBs.erase(&F->getEntryBlock()); | |||
2575 | ||||
2576 | auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); | |||
2577 | auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_target_init]; | |||
2578 | ||||
2579 | // Check if the edge into the successor block contains a condition that only | |||
2580 | // lets the main thread execute it. | |||
2581 | auto IsInitialThreadOnly = [&](BranchInst *Edge, BasicBlock *SuccessorBB) { | |||
2582 | if (!Edge || !Edge->isConditional()) | |||
2583 | return false; | |||
2584 | if (Edge->getSuccessor(0) != SuccessorBB) | |||
2585 | return false; | |||
2586 | ||||
2587 | auto *Cmp = dyn_cast<CmpInst>(Edge->getCondition()); | |||
2588 | if (!Cmp || !Cmp->isTrueWhenEqual() || !Cmp->isEquality()) | |||
2589 | return false; | |||
2590 | ||||
2591 | ConstantInt *C = dyn_cast<ConstantInt>(Cmp->getOperand(1)); | |||
2592 | if (!C) | |||
2593 | return false; | |||
2594 | ||||
2595 | // Match: -1 == __kmpc_target_init (for non-SPMD kernels only!) | |||
2596 | if (C->isAllOnesValue()) { | |||
2597 | auto *CB = dyn_cast<CallBase>(Cmp->getOperand(0)); | |||
2598 | CB = CB ? OpenMPOpt::getCallIfRegularCall(*CB, &RFI) : nullptr; | |||
2599 | if (!CB) | |||
2600 | return false; | |||
2601 | const int InitModeArgNo = 1; | |||
2602 | auto *ModeCI = dyn_cast<ConstantInt>(CB->getOperand(InitModeArgNo)); | |||
2603 | return ModeCI && (ModeCI->getSExtValue() & OMP_TGT_EXEC_MODE_GENERIC); | |||
2604 | } | |||
2605 | ||||
2606 | if (C->isZero()) { | |||
2607 | // Match: 0 == llvm.nvvm.read.ptx.sreg.tid.x() | |||
2608 | if (auto *II = dyn_cast<IntrinsicInst>(Cmp->getOperand(0))) | |||
2609 | if (II->getIntrinsicID() == Intrinsic::nvvm_read_ptx_sreg_tid_x) | |||
2610 | return true; | |||
2611 | ||||
2612 | // Match: 0 == llvm.amdgcn.workitem.id.x() | |||
2613 | if (auto *II = dyn_cast<IntrinsicInst>(Cmp->getOperand(0))) | |||
2614 | if (II->getIntrinsicID() == Intrinsic::amdgcn_workitem_id_x) | |||
2615 | return true; | |||
2616 | } | |||
2617 | ||||
2618 | return false; | |||
2619 | }; | |||
2620 | ||||
2621 | // Merge all the predecessor states into the current basic block. A basic | |||
2622 | // block is executed by a single thread if all of its predecessors are. | |||
2623 | auto MergePredecessorStates = [&](BasicBlock *BB) { | |||
2624 | if (pred_empty(BB)) | |||
2625 | return SingleThreadedBBs.contains(BB); | |||
2626 | ||||
2627 | bool IsInitialThread = true; | |||
2628 | for (BasicBlock *PredBB : predecessors(BB)) { | |||
2629 | if (!IsInitialThreadOnly(dyn_cast<BranchInst>(PredBB->getTerminator()), | |||
2630 | BB)) | |||
2631 | IsInitialThread &= SingleThreadedBBs.contains(PredBB); | |||
2632 | } | |||
2633 | ||||
2634 | return IsInitialThread; | |||
2635 | }; | |||
2636 | ||||
2637 | for (auto *BB : RPOT) { | |||
2638 | if (!MergePredecessorStates(BB)) | |||
2639 | SingleThreadedBBs.erase(BB); | |||
2640 | } | |||
2641 | ||||
2642 | return (NumSingleThreadedBBs == SingleThreadedBBs.size()) | |||
2643 | ? ChangeStatus::UNCHANGED | |||
2644 | : ChangeStatus::CHANGED; | |||
2645 | } | |||
2646 | ||||
2647 | /// Try to replace memory allocation calls called by a single thread with a | |||
2648 | /// static buffer of shared memory. | |||
2649 | struct AAHeapToShared : public StateWrapper<BooleanState, AbstractAttribute> { | |||
2650 | using Base = StateWrapper<BooleanState, AbstractAttribute>; | |||
2651 | AAHeapToShared(const IRPosition &IRP, Attributor &A) : Base(IRP) {} | |||
2652 | ||||
2653 | /// Create an abstract attribute view for the position \p IRP. | |||
2654 | static AAHeapToShared &createForPosition(const IRPosition &IRP, | |||
2655 | Attributor &A); | |||
2656 | ||||
2657 | /// Returns true if HeapToShared conversion is assumed to be possible. | |||
2658 | virtual bool isAssumedHeapToShared(CallBase &CB) const = 0; | |||
2659 | ||||
2660 | /// Returns true if HeapToShared conversion is assumed and the CB is a | |||
2661 | /// callsite to a free operation to be removed. | |||
2662 | virtual bool isAssumedHeapToSharedRemovedFree(CallBase &CB) const = 0; | |||
2663 | ||||
2664 | /// See AbstractAttribute::getName(). | |||
2665 | const std::string getName() const override { return "AAHeapToShared"; } | |||
2666 | ||||
2667 | /// See AbstractAttribute::getIdAddr(). | |||
2668 | const char *getIdAddr() const override { return &ID; } | |||
2669 | ||||
2670 | /// This function should return true if the type of the \p AA is | |||
2671 | /// AAHeapToShared. | |||
2672 | static bool classof(const AbstractAttribute *AA) { | |||
2673 | return (AA->getIdAddr() == &ID); | |||
2674 | } | |||
2675 | ||||
2676 | /// Unique ID (due to the unique address) | |||
2677 | static const char ID; | |||
2678 | }; | |||
2679 | ||||
2680 | struct AAHeapToSharedFunction : public AAHeapToShared { | |||
2681 | AAHeapToSharedFunction(const IRPosition &IRP, Attributor &A) | |||
2682 | : AAHeapToShared(IRP, A) {} | |||
2683 | ||||
2684 | const std::string getAsStr() const override { | |||
2685 | return "[AAHeapToShared] " + std::to_string(MallocCalls.size()) + | |||
2686 | " malloc calls eligible."; | |||
2687 | } | |||
2688 | ||||
2689 | /// See AbstractAttribute::trackStatistics(). | |||
2690 | void trackStatistics() const override {} | |||
2691 | ||||
2692 | /// This functions finds free calls that will be removed by the | |||
2693 | /// HeapToShared transformation. | |||
2694 | void findPotentialRemovedFreeCalls(Attributor &A) { | |||
2695 | auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); | |||
2696 | auto &FreeRFI = OMPInfoCache.RFIs[OMPRTL___kmpc_free_shared]; | |||
2697 | ||||
2698 | PotentialRemovedFreeCalls.clear(); | |||
2699 | // Update free call users of found malloc calls. | |||
2700 | for (CallBase *CB : MallocCalls) { | |||
2701 | SmallVector<CallBase *, 4> FreeCalls; | |||
2702 | for (auto *U : CB->users()) { | |||
2703 | CallBase *C = dyn_cast<CallBase>(U); | |||
2704 | if (C && C->getCalledFunction() == FreeRFI.Declaration) | |||
2705 | FreeCalls.push_back(C); | |||
2706 | } | |||
2707 | ||||
2708 | if (FreeCalls.size() != 1) | |||
2709 | continue; | |||
2710 | ||||
2711 | PotentialRemovedFreeCalls.insert(FreeCalls.front()); | |||
2712 | } | |||
2713 | } | |||
2714 | ||||
2715 | void initialize(Attributor &A) override { | |||
2716 | auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); | |||
2717 | auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared]; | |||
2718 | ||||
2719 | for (User *U : RFI.Declaration->users()) | |||
2720 | if (CallBase *CB = dyn_cast<CallBase>(U)) | |||
2721 | MallocCalls.insert(CB); | |||
2722 | ||||
2723 | findPotentialRemovedFreeCalls(A); | |||
2724 | } | |||
2725 | ||||
2726 | bool isAssumedHeapToShared(CallBase &CB) const override { | |||
2727 | return isValidState() && MallocCalls.count(&CB); | |||
2728 | } | |||
2729 | ||||
2730 | bool isAssumedHeapToSharedRemovedFree(CallBase &CB) const override { | |||
2731 | return isValidState() && PotentialRemovedFreeCalls.count(&CB); | |||
2732 | } | |||
2733 | ||||
2734 | ChangeStatus manifest(Attributor &A) override { | |||
2735 | if (MallocCalls.empty()) | |||
2736 | return ChangeStatus::UNCHANGED; | |||
2737 | ||||
2738 | auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); | |||
2739 | auto &FreeCall = OMPInfoCache.RFIs[OMPRTL___kmpc_free_shared]; | |||
2740 | ||||
2741 | Function *F = getAnchorScope(); | |||
2742 | auto *HS = A.lookupAAFor<AAHeapToStack>(IRPosition::function(*F), this, | |||
2743 | DepClassTy::OPTIONAL); | |||
2744 | ||||
2745 | ChangeStatus Changed = ChangeStatus::UNCHANGED; | |||
2746 | for (CallBase *CB : MallocCalls) { | |||
2747 | // Skip replacing this if HeapToStack has already claimed it. | |||
2748 | if (HS && HS->isAssumedHeapToStack(*CB)) | |||
2749 | continue; | |||
2750 | ||||
2751 | // Find the unique free call to remove it. | |||
2752 | SmallVector<CallBase *, 4> FreeCalls; | |||
2753 | for (auto *U : CB->users()) { | |||
2754 | CallBase *C = dyn_cast<CallBase>(U); | |||
2755 | if (C && C->getCalledFunction() == FreeCall.Declaration) | |||
2756 | FreeCalls.push_back(C); | |||
2757 | } | |||
2758 | if (FreeCalls.size() != 1) | |||
2759 | continue; | |||
2760 | ||||
2761 | ConstantInt *AllocSize = dyn_cast<ConstantInt>(CB->getArgOperand(0)); | |||
2762 | ||||
2763 | LLVM_DEBUG(dbgs() << TAG << "Replace globalization call " << *CBdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { dbgs() << TAG << "Replace globalization call " << *CB << " with " << AllocSize->getZExtValue () << " bytes of shared memory\n"; } } while (false) | |||
2764 | << " with " << AllocSize->getZExtValue()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { dbgs() << TAG << "Replace globalization call " << *CB << " with " << AllocSize->getZExtValue () << " bytes of shared memory\n"; } } while (false) | |||
2765 | << " bytes of shared memory\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { dbgs() << TAG << "Replace globalization call " << *CB << " with " << AllocSize->getZExtValue () << " bytes of shared memory\n"; } } while (false); | |||
2766 | ||||
2767 | // Create a new shared memory buffer of the same size as the allocation | |||
2768 | // and replace all the uses of the original allocation with it. | |||
2769 | Module *M = CB->getModule(); | |||
2770 | Type *Int8Ty = Type::getInt8Ty(M->getContext()); | |||
2771 | Type *Int8ArrTy = ArrayType::get(Int8Ty, AllocSize->getZExtValue()); | |||
2772 | auto *SharedMem = new GlobalVariable( | |||
2773 | *M, Int8ArrTy, /* IsConstant */ false, GlobalValue::InternalLinkage, | |||
2774 | UndefValue::get(Int8ArrTy), CB->getName(), nullptr, | |||
2775 | GlobalValue::NotThreadLocal, | |||
2776 | static_cast<unsigned>(AddressSpace::Shared)); | |||
2777 | auto *NewBuffer = | |||
2778 | ConstantExpr::getPointerCast(SharedMem, Int8Ty->getPointerTo()); | |||
2779 | ||||
2780 | auto Remark = [&](OptimizationRemark OR) { | |||
2781 | return OR << "Replaced globalized variable with " | |||
2782 | << ore::NV("SharedMemory", AllocSize->getZExtValue()) | |||
2783 | << ((AllocSize->getZExtValue() != 1) ? " bytes " : " byte ") | |||
2784 | << "of shared memory."; | |||
2785 | }; | |||
2786 | A.emitRemark<OptimizationRemark>(CB, "OMP111", Remark); | |||
2787 | ||||
2788 | SharedMem->setAlignment(MaybeAlign(32)); | |||
2789 | ||||
2790 | A.changeValueAfterManifest(*CB, *NewBuffer); | |||
2791 | A.deleteAfterManifest(*CB); | |||
2792 | A.deleteAfterManifest(*FreeCalls.front()); | |||
2793 | ||||
2794 | NumBytesMovedToSharedMemory += AllocSize->getZExtValue(); | |||
2795 | Changed = ChangeStatus::CHANGED; | |||
2796 | } | |||
2797 | ||||
2798 | return Changed; | |||
2799 | } | |||
2800 | ||||
2801 | ChangeStatus updateImpl(Attributor &A) override { | |||
2802 | auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); | |||
2803 | auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared]; | |||
2804 | Function *F = getAnchorScope(); | |||
2805 | ||||
2806 | auto NumMallocCalls = MallocCalls.size(); | |||
2807 | ||||
2808 | // Only consider malloc calls executed by a single thread with a constant. | |||
2809 | for (User *U : RFI.Declaration->users()) { | |||
2810 | const auto &ED = A.getAAFor<AAExecutionDomain>( | |||
2811 | *this, IRPosition::function(*F), DepClassTy::REQUIRED); | |||
2812 | if (CallBase *CB = dyn_cast<CallBase>(U)) | |||
2813 | if (!dyn_cast<ConstantInt>(CB->getArgOperand(0)) || | |||
2814 | !ED.isExecutedByInitialThreadOnly(*CB)) | |||
2815 | MallocCalls.erase(CB); | |||
2816 | } | |||
2817 | ||||
2818 | findPotentialRemovedFreeCalls(A); | |||
2819 | ||||
2820 | if (NumMallocCalls != MallocCalls.size()) | |||
2821 | return ChangeStatus::CHANGED; | |||
2822 | ||||
2823 | return ChangeStatus::UNCHANGED; | |||
2824 | } | |||
2825 | ||||
2826 | /// Collection of all malloc calls in a function. | |||
2827 | SmallPtrSet<CallBase *, 4> MallocCalls; | |||
2828 | /// Collection of potentially removed free calls in a function. | |||
2829 | SmallPtrSet<CallBase *, 4> PotentialRemovedFreeCalls; | |||
2830 | }; | |||
2831 | ||||
2832 | struct AAKernelInfo : public StateWrapper<KernelInfoState, AbstractAttribute> { | |||
2833 | using Base = StateWrapper<KernelInfoState, AbstractAttribute>; | |||
2834 | AAKernelInfo(const IRPosition &IRP, Attributor &A) : Base(IRP) {} | |||
2835 | ||||
2836 | /// Statistics are tracked as part of manifest for now. | |||
2837 | void trackStatistics() const override {} | |||
2838 | ||||
2839 | /// See AbstractAttribute::getAsStr() | |||
2840 | const std::string getAsStr() const override { | |||
2841 | if (!isValidState()) | |||
2842 | return "<invalid>"; | |||
2843 | return std::string(SPMDCompatibilityTracker.isAssumed() ? "SPMD" | |||
2844 | : "generic") + | |||
2845 | std::string(SPMDCompatibilityTracker.isAtFixpoint() ? " [FIX]" | |||
2846 | : "") + | |||
2847 | std::string(" #PRs: ") + | |||
2848 | (ReachedKnownParallelRegions.isValidState() | |||
2849 | ? std::to_string(ReachedKnownParallelRegions.size()) | |||
2850 | : "<invalid>") + | |||
2851 | ", #Unknown PRs: " + | |||
2852 | (ReachedUnknownParallelRegions.isValidState() | |||
2853 | ? std::to_string(ReachedUnknownParallelRegions.size()) | |||
2854 | : "<invalid>") + | |||
2855 | ", #Reaching Kernels: " + | |||
2856 | (ReachingKernelEntries.isValidState() | |||
2857 | ? std::to_string(ReachingKernelEntries.size()) | |||
2858 | : "<invalid>"); | |||
2859 | } | |||
2860 | ||||
2861 | /// Create an abstract attribute biew for the position \p IRP. | |||
2862 | static AAKernelInfo &createForPosition(const IRPosition &IRP, Attributor &A); | |||
2863 | ||||
2864 | /// See AbstractAttribute::getName() | |||
2865 | const std::string getName() const override { return "AAKernelInfo"; } | |||
2866 | ||||
2867 | /// See AbstractAttribute::getIdAddr() | |||
2868 | const char *getIdAddr() const override { return &ID; } | |||
2869 | ||||
2870 | /// This function should return true if the type of the \p AA is AAKernelInfo | |||
2871 | static bool classof(const AbstractAttribute *AA) { | |||
2872 | return (AA->getIdAddr() == &ID); | |||
2873 | } | |||
2874 | ||||
2875 | static const char ID; | |||
2876 | }; | |||
2877 | ||||
2878 | /// The function kernel info abstract attribute, basically, what can we say | |||
2879 | /// about a function with regards to the KernelInfoState. | |||
2880 | struct AAKernelInfoFunction : AAKernelInfo { | |||
2881 | AAKernelInfoFunction(const IRPosition &IRP, Attributor &A) | |||
2882 | : AAKernelInfo(IRP, A) {} | |||
2883 | ||||
2884 | SmallPtrSet<Instruction *, 4> GuardedInstructions; | |||
2885 | ||||
2886 | SmallPtrSetImpl<Instruction *> &getGuardedInstructions() { | |||
2887 | return GuardedInstructions; | |||
2888 | } | |||
2889 | ||||
2890 | /// See AbstractAttribute::initialize(...). | |||
2891 | void initialize(Attributor &A) override { | |||
2892 | // This is a high-level transform that might change the constant arguments | |||
2893 | // of the init and dinit calls. We need to tell the Attributor about this | |||
2894 | // to avoid other parts using the current constant value for simpliication. | |||
2895 | auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); | |||
2896 | ||||
2897 | Function *Fn = getAnchorScope(); | |||
2898 | if (!OMPInfoCache.Kernels.count(Fn)) | |||
2899 | return; | |||
2900 | ||||
2901 | // Add itself to the reaching kernel and set IsKernelEntry. | |||
2902 | ReachingKernelEntries.insert(Fn); | |||
2903 | IsKernelEntry = true; | |||
2904 | ||||
2905 | OMPInformationCache::RuntimeFunctionInfo &InitRFI = | |||
2906 | OMPInfoCache.RFIs[OMPRTL___kmpc_target_init]; | |||
2907 | OMPInformationCache::RuntimeFunctionInfo &DeinitRFI = | |||
2908 | OMPInfoCache.RFIs[OMPRTL___kmpc_target_deinit]; | |||
2909 | ||||
2910 | // For kernels we perform more initialization work, first we find the init | |||
2911 | // and deinit calls. | |||
2912 | auto StoreCallBase = [](Use &U, | |||
2913 | OMPInformationCache::RuntimeFunctionInfo &RFI, | |||
2914 | CallBase *&Storage) { | |||
2915 | CallBase *CB = OpenMPOpt::getCallIfRegularCall(U, &RFI); | |||
2916 | assert(CB &&(static_cast <bool> (CB && "Unexpected use of __kmpc_target_init or __kmpc_target_deinit!" ) ? void (0) : __assert_fail ("CB && \"Unexpected use of __kmpc_target_init or __kmpc_target_deinit!\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp" , 2917, __extension__ __PRETTY_FUNCTION__)) | |||
2917 | "Unexpected use of __kmpc_target_init or __kmpc_target_deinit!")(static_cast <bool> (CB && "Unexpected use of __kmpc_target_init or __kmpc_target_deinit!" ) ? void (0) : __assert_fail ("CB && \"Unexpected use of __kmpc_target_init or __kmpc_target_deinit!\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp" , 2917, __extension__ __PRETTY_FUNCTION__)); | |||
2918 | assert(!Storage &&(static_cast <bool> (!Storage && "Multiple uses of __kmpc_target_init or __kmpc_target_deinit!" ) ? void (0) : __assert_fail ("!Storage && \"Multiple uses of __kmpc_target_init or __kmpc_target_deinit!\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp" , 2919, __extension__ __PRETTY_FUNCTION__)) | |||
2919 | "Multiple uses of __kmpc_target_init or __kmpc_target_deinit!")(static_cast <bool> (!Storage && "Multiple uses of __kmpc_target_init or __kmpc_target_deinit!" ) ? void (0) : __assert_fail ("!Storage && \"Multiple uses of __kmpc_target_init or __kmpc_target_deinit!\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp" , 2919, __extension__ __PRETTY_FUNCTION__)); | |||
2920 | Storage = CB; | |||
2921 | return false; | |||
2922 | }; | |||
2923 | InitRFI.foreachUse( | |||
2924 | [&](Use &U, Function &) { | |||
2925 | StoreCallBase(U, InitRFI, KernelInitCB); | |||
2926 | return false; | |||
2927 | }, | |||
2928 | Fn); | |||
2929 | DeinitRFI.foreachUse( | |||
2930 | [&](Use &U, Function &) { | |||
2931 | StoreCallBase(U, DeinitRFI, KernelDeinitCB); | |||
2932 | return false; | |||
2933 | }, | |||
2934 | Fn); | |||
2935 | ||||
2936 | // Ignore kernels without initializers such as global constructors. | |||
2937 | if (!KernelInitCB || !KernelDeinitCB) { | |||
2938 | indicateOptimisticFixpoint(); | |||
2939 | return; | |||
2940 | } | |||
2941 | ||||
2942 | // For kernels we might need to initialize/finalize the IsSPMD state and | |||
2943 | // we need to register a simplification callback so that the Attributor | |||
2944 | // knows the constant arguments to __kmpc_target_init and | |||
2945 | // __kmpc_target_deinit might actually change. | |||
2946 | ||||
2947 | Attributor::SimplifictionCallbackTy StateMachineSimplifyCB = | |||
2948 | [&](const IRPosition &IRP, const AbstractAttribute *AA, | |||
2949 | bool &UsedAssumedInformation) -> Optional<Value *> { | |||
2950 | // IRP represents the "use generic state machine" argument of an | |||
2951 | // __kmpc_target_init call. We will answer this one with the internal | |||
2952 | // state. As long as we are not in an invalid state, we will create a | |||
2953 | // custom state machine so the value should be a `i1 false`. If we are | |||
2954 | // in an invalid state, we won't change the value that is in the IR. | |||
2955 | if (!ReachedKnownParallelRegions.isValidState()) | |||
2956 | return nullptr; | |||
2957 | // If we have disabled state machine rewrites, don't make a custom one. | |||
2958 | if (DisableOpenMPOptStateMachineRewrite) | |||
2959 | return nullptr; | |||
2960 | if (AA) | |||
2961 | A.recordDependence(*this, *AA, DepClassTy::OPTIONAL); | |||
2962 | UsedAssumedInformation = !isAtFixpoint(); | |||
2963 | auto *FalseVal = | |||
2964 | ConstantInt::getBool(IRP.getAnchorValue().getContext(), 0); | |||
2965 | return FalseVal; | |||
2966 | }; | |||
2967 | ||||
2968 | Attributor::SimplifictionCallbackTy ModeSimplifyCB = | |||
2969 | [&](const IRPosition &IRP, const AbstractAttribute *AA, | |||
2970 | bool &UsedAssumedInformation) -> Optional<Value *> { | |||
2971 | // IRP represents the "SPMDCompatibilityTracker" argument of an | |||
2972 | // __kmpc_target_init or | |||
2973 | // __kmpc_target_deinit call. We will answer this one with the internal | |||
2974 | // state. | |||
2975 | if (!SPMDCompatibilityTracker.isValidState()) | |||
2976 | return nullptr; | |||
2977 | if (!SPMDCompatibilityTracker.isAtFixpoint()) { | |||
2978 | if (AA) | |||
2979 | A.recordDependence(*this, *AA, DepClassTy::OPTIONAL); | |||
2980 | UsedAssumedInformation = true; | |||
2981 | } else { | |||
2982 | UsedAssumedInformation = false; | |||
2983 | } | |||
2984 | auto *Val = ConstantInt::getSigned( | |||
2985 | IntegerType::getInt8Ty(IRP.getAnchorValue().getContext()), | |||
2986 | SPMDCompatibilityTracker.isAssumed() ? OMP_TGT_EXEC_MODE_SPMD | |||
2987 | : OMP_TGT_EXEC_MODE_GENERIC); | |||
2988 | return Val; | |||
2989 | }; | |||
2990 | ||||
2991 | Attributor::SimplifictionCallbackTy IsGenericModeSimplifyCB = | |||
2992 | [&](const IRPosition &IRP, const AbstractAttribute *AA, | |||
2993 | bool &UsedAssumedInformation) -> Optional<Value *> { | |||
2994 | // IRP represents the "RequiresFullRuntime" argument of an | |||
2995 | // __kmpc_target_init or __kmpc_target_deinit call. We will answer this | |||
2996 | // one with the internal state of the SPMDCompatibilityTracker, so if | |||
2997 | // generic then true, if SPMD then false. | |||
2998 | if (!SPMDCompatibilityTracker.isValidState()) | |||
2999 | return nullptr; | |||
3000 | if (!SPMDCompatibilityTracker.isAtFixpoint()) { | |||
3001 | if (AA) | |||
3002 | A.recordDependence(*this, *AA, DepClassTy::OPTIONAL); | |||
3003 | UsedAssumedInformation = true; | |||
3004 | } else { | |||
3005 | UsedAssumedInformation = false; | |||
3006 | } | |||
3007 | auto *Val = ConstantInt::getBool(IRP.getAnchorValue().getContext(), | |||
3008 | !SPMDCompatibilityTracker.isAssumed()); | |||
3009 | return Val; | |||
3010 | }; | |||
3011 | ||||
3012 | constexpr const int InitModeArgNo = 1; | |||
3013 | constexpr const int DeinitModeArgNo = 1; | |||
3014 | constexpr const int InitUseStateMachineArgNo = 2; | |||
3015 | constexpr const int InitRequiresFullRuntimeArgNo = 3; | |||
3016 | constexpr const int DeinitRequiresFullRuntimeArgNo = 2; | |||
3017 | A.registerSimplificationCallback( | |||
3018 | IRPosition::callsite_argument(*KernelInitCB, InitUseStateMachineArgNo), | |||
3019 | StateMachineSimplifyCB); | |||
3020 | A.registerSimplificationCallback( | |||
3021 | IRPosition::callsite_argument(*KernelInitCB, InitModeArgNo), | |||
3022 | ModeSimplifyCB); | |||
3023 | A.registerSimplificationCallback( | |||
3024 | IRPosition::callsite_argument(*KernelDeinitCB, DeinitModeArgNo), | |||
3025 | ModeSimplifyCB); | |||
3026 | A.registerSimplificationCallback( | |||
3027 | IRPosition::callsite_argument(*KernelInitCB, | |||
3028 | InitRequiresFullRuntimeArgNo), | |||
3029 | IsGenericModeSimplifyCB); | |||
3030 | A.registerSimplificationCallback( | |||
3031 | IRPosition::callsite_argument(*KernelDeinitCB, | |||
3032 | DeinitRequiresFullRuntimeArgNo), | |||
3033 | IsGenericModeSimplifyCB); | |||
3034 | ||||
3035 | // Check if we know we are in SPMD-mode already. | |||
3036 | ConstantInt *ModeArg = | |||
3037 | dyn_cast<ConstantInt>(KernelInitCB->getArgOperand(InitModeArgNo)); | |||
3038 | if (ModeArg && (ModeArg->getSExtValue() & OMP_TGT_EXEC_MODE_SPMD)) | |||
3039 | SPMDCompatibilityTracker.indicateOptimisticFixpoint(); | |||
3040 | // This is a generic region but SPMDization is disabled so stop tracking. | |||
3041 | else if (DisableOpenMPOptSPMDization) | |||
3042 | SPMDCompatibilityTracker.indicatePessimisticFixpoint(); | |||
3043 | } | |||
3044 | ||||
3045 | /// Sanitize the string \p S such that it is a suitable global symbol name. | |||
3046 | static std::string sanitizeForGlobalName(std::string S) { | |||
3047 | std::replace_if( | |||
3048 | S.begin(), S.end(), | |||
3049 | [](const char C) { | |||
3050 | return !((C >= 'a' && C <= 'z') || (C >= 'A' && C <= 'Z') || | |||
3051 | (C >= '0' && C <= '9') || C == '_'); | |||
3052 | }, | |||
3053 | '.'); | |||
3054 | return S; | |||
3055 | } | |||
3056 | ||||
3057 | /// Modify the IR based on the KernelInfoState as the fixpoint iteration is | |||
3058 | /// finished now. | |||
3059 | ChangeStatus manifest(Attributor &A) override { | |||
3060 | // If we are not looking at a kernel with __kmpc_target_init and | |||
3061 | // __kmpc_target_deinit call we cannot actually manifest the information. | |||
3062 | if (!KernelInitCB || !KernelDeinitCB) | |||
3063 | return ChangeStatus::UNCHANGED; | |||
3064 | ||||
3065 | // If we can we change the execution mode to SPMD-mode otherwise we build a | |||
3066 | // custom state machine. | |||
3067 | ChangeStatus Changed = ChangeStatus::UNCHANGED; | |||
3068 | if (!changeToSPMDMode(A, Changed)) | |||
3069 | return buildCustomStateMachine(A); | |||
3070 | ||||
3071 | return Changed; | |||
3072 | } | |||
3073 | ||||
3074 | bool changeToSPMDMode(Attributor &A, ChangeStatus &Changed) { | |||
3075 | auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); | |||
3076 | ||||
3077 | if (!SPMDCompatibilityTracker.isAssumed()) { | |||
3078 | for (Instruction *NonCompatibleI : SPMDCompatibilityTracker) { | |||
3079 | if (!NonCompatibleI) | |||
3080 | continue; | |||
3081 | ||||
3082 | // Skip diagnostics on calls to known OpenMP runtime functions for now. | |||
3083 | if (auto *CB = dyn_cast<CallBase>(NonCompatibleI)) | |||
3084 | if (OMPInfoCache.RTLFunctions.contains(CB->getCalledFunction())) | |||
3085 | continue; | |||
3086 | ||||
3087 | auto Remark = [&](OptimizationRemarkAnalysis ORA) { | |||
3088 | ORA << "Value has potential side effects preventing SPMD-mode " | |||
3089 | "execution"; | |||
3090 | if (isa<CallBase>(NonCompatibleI)) { | |||
3091 | ORA << ". Add `__attribute__((assume(\"ompx_spmd_amenable\")))` to " | |||
3092 | "the called function to override"; | |||
3093 | } | |||
3094 | return ORA << "."; | |||
3095 | }; | |||
3096 | A.emitRemark<OptimizationRemarkAnalysis>(NonCompatibleI, "OMP121", | |||
3097 | Remark); | |||
3098 | ||||
3099 | LLVM_DEBUG(dbgs() << TAG << "SPMD-incompatible side-effect: "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { dbgs() << TAG << "SPMD-incompatible side-effect: " << *NonCompatibleI << "\n"; } } while (false) | |||
3100 | << *NonCompatibleI << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { dbgs() << TAG << "SPMD-incompatible side-effect: " << *NonCompatibleI << "\n"; } } while (false); | |||
3101 | } | |||
3102 | ||||
3103 | return false; | |||
3104 | } | |||
3105 | ||||
3106 | // Check if the kernel is already in SPMD mode, if so, return success. | |||
3107 | Function *Kernel = getAnchorScope(); | |||
3108 | GlobalVariable *ExecMode = Kernel->getParent()->getGlobalVariable( | |||
3109 | (Kernel->getName() + "_exec_mode").str()); | |||
3110 | assert(ExecMode && "Kernel without exec mode?")(static_cast <bool> (ExecMode && "Kernel without exec mode?" ) ? void (0) : __assert_fail ("ExecMode && \"Kernel without exec mode?\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp" , 3110, __extension__ __PRETTY_FUNCTION__)); | |||
3111 | assert(ExecMode->getInitializer() && "ExecMode doesn't have initializer!")(static_cast <bool> (ExecMode->getInitializer() && "ExecMode doesn't have initializer!") ? void (0) : __assert_fail ("ExecMode->getInitializer() && \"ExecMode doesn't have initializer!\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp" , 3111, __extension__ __PRETTY_FUNCTION__)); | |||
3112 | ||||
3113 | // Set the global exec mode flag to indicate SPMD-Generic mode. | |||
3114 | assert(isa<ConstantInt>(ExecMode->getInitializer()) &&(static_cast <bool> (isa<ConstantInt>(ExecMode-> getInitializer()) && "ExecMode is not an integer!") ? void (0) : __assert_fail ("isa<ConstantInt>(ExecMode->getInitializer()) && \"ExecMode is not an integer!\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp" , 3115, __extension__ __PRETTY_FUNCTION__)) | |||
3115 | "ExecMode is not an integer!")(static_cast <bool> (isa<ConstantInt>(ExecMode-> getInitializer()) && "ExecMode is not an integer!") ? void (0) : __assert_fail ("isa<ConstantInt>(ExecMode->getInitializer()) && \"ExecMode is not an integer!\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp" , 3115, __extension__ __PRETTY_FUNCTION__)); | |||
3116 | const int8_t ExecModeVal = | |||
3117 | cast<ConstantInt>(ExecMode->getInitializer())->getSExtValue(); | |||
3118 | if (ExecModeVal != OMP_TGT_EXEC_MODE_GENERIC) | |||
3119 | return true; | |||
3120 | ||||
3121 | // We will now unconditionally modify the IR, indicate a change. | |||
3122 | Changed = ChangeStatus::CHANGED; | |||
3123 | ||||
3124 | auto CreateGuardedRegion = [&](Instruction *RegionStartI, | |||
3125 | Instruction *RegionEndI) { | |||
3126 | LoopInfo *LI = nullptr; | |||
3127 | DominatorTree *DT = nullptr; | |||
3128 | MemorySSAUpdater *MSU = nullptr; | |||
3129 | using InsertPointTy = OpenMPIRBuilder::InsertPointTy; | |||
3130 | ||||
3131 | BasicBlock *ParentBB = RegionStartI->getParent(); | |||
3132 | Function *Fn = ParentBB->getParent(); | |||
3133 | Module &M = *Fn->getParent(); | |||
3134 | ||||
3135 | // Create all the blocks and logic. | |||
3136 | // ParentBB: | |||
3137 | // goto RegionCheckTidBB | |||
3138 | // RegionCheckTidBB: | |||
3139 | // Tid = __kmpc_hardware_thread_id() | |||
3140 | // if (Tid != 0) | |||
3141 | // goto RegionBarrierBB | |||
3142 | // RegionStartBB: | |||
3143 | // <execute instructions guarded> | |||
3144 | // goto RegionEndBB | |||
3145 | // RegionEndBB: | |||
3146 | // <store escaping values to shared mem> | |||
3147 | // goto RegionBarrierBB | |||
3148 | // RegionBarrierBB: | |||
3149 | // __kmpc_simple_barrier_spmd() | |||
3150 | // // second barrier is omitted if lacking escaping values. | |||
3151 | // <load escaping values from shared mem> | |||
3152 | // __kmpc_simple_barrier_spmd() | |||
3153 | // goto RegionExitBB | |||
3154 | // RegionExitBB: | |||
3155 | // <execute rest of instructions> | |||
3156 | ||||
3157 | BasicBlock *RegionEndBB = SplitBlock(ParentBB, RegionEndI->getNextNode(), | |||
3158 | DT, LI, MSU, "region.guarded.end"); | |||
3159 | BasicBlock *RegionBarrierBB = | |||
3160 | SplitBlock(RegionEndBB, &*RegionEndBB->getFirstInsertionPt(), DT, LI, | |||
3161 | MSU, "region.barrier"); | |||
3162 | BasicBlock *RegionExitBB = | |||
3163 | SplitBlock(RegionBarrierBB, &*RegionBarrierBB->getFirstInsertionPt(), | |||
3164 | DT, LI, MSU, "region.exit"); | |||
3165 | BasicBlock *RegionStartBB = | |||
3166 | SplitBlock(ParentBB, RegionStartI, DT, LI, MSU, "region.guarded"); | |||
3167 | ||||
3168 | assert(ParentBB->getUniqueSuccessor() == RegionStartBB &&(static_cast <bool> (ParentBB->getUniqueSuccessor() == RegionStartBB && "Expected a different CFG") ? void ( 0) : __assert_fail ("ParentBB->getUniqueSuccessor() == RegionStartBB && \"Expected a different CFG\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp" , 3169, __extension__ __PRETTY_FUNCTION__)) | |||
3169 | "Expected a different CFG")(static_cast <bool> (ParentBB->getUniqueSuccessor() == RegionStartBB && "Expected a different CFG") ? void ( 0) : __assert_fail ("ParentBB->getUniqueSuccessor() == RegionStartBB && \"Expected a different CFG\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp" , 3169, __extension__ __PRETTY_FUNCTION__)); | |||
3170 | ||||
3171 | BasicBlock *RegionCheckTidBB = SplitBlock( | |||
3172 | ParentBB, ParentBB->getTerminator(), DT, LI, MSU, "region.check.tid"); | |||
3173 | ||||
3174 | // Register basic blocks with the Attributor. | |||
3175 | A.registerManifestAddedBasicBlock(*RegionEndBB); | |||
3176 | A.registerManifestAddedBasicBlock(*RegionBarrierBB); | |||
3177 | A.registerManifestAddedBasicBlock(*RegionExitBB); | |||
3178 | A.registerManifestAddedBasicBlock(*RegionStartBB); | |||
3179 | A.registerManifestAddedBasicBlock(*RegionCheckTidBB); | |||
3180 | ||||
3181 | bool HasBroadcastValues = false; | |||
3182 | // Find escaping outputs from the guarded region to outside users and | |||
3183 | // broadcast their values to them. | |||
3184 | for (Instruction &I : *RegionStartBB) { | |||
3185 | SmallPtrSet<Instruction *, 4> OutsideUsers; | |||
3186 | for (User *Usr : I.users()) { | |||
3187 | Instruction &UsrI = *cast<Instruction>(Usr); | |||
3188 | if (UsrI.getParent() != RegionStartBB) | |||
3189 | OutsideUsers.insert(&UsrI); | |||
3190 | } | |||
3191 | ||||
3192 | if (OutsideUsers.empty()) | |||
3193 | continue; | |||
3194 | ||||
3195 | HasBroadcastValues = true; | |||
3196 | ||||
3197 | // Emit a global variable in shared memory to store the broadcasted | |||
3198 | // value. | |||
3199 | auto *SharedMem = new GlobalVariable( | |||
3200 | M, I.getType(), /* IsConstant */ false, | |||
3201 | GlobalValue::InternalLinkage, UndefValue::get(I.getType()), | |||
3202 | sanitizeForGlobalName( | |||
3203 | (I.getName() + ".guarded.output.alloc").str()), | |||
3204 | nullptr, GlobalValue::NotThreadLocal, | |||
3205 | static_cast<unsigned>(AddressSpace::Shared)); | |||
3206 | ||||
3207 | // Emit a store instruction to update the value. | |||
3208 | new StoreInst(&I, SharedMem, RegionEndBB->getTerminator()); | |||
3209 | ||||
3210 | LoadInst *LoadI = new LoadInst(I.getType(), SharedMem, | |||
3211 | I.getName() + ".guarded.output.load", | |||
3212 | RegionBarrierBB->getTerminator()); | |||
3213 | ||||
3214 | // Emit a load instruction and replace uses of the output value. | |||
3215 | for (Instruction *UsrI : OutsideUsers) | |||
3216 | UsrI->replaceUsesOfWith(&I, LoadI); | |||
3217 | } | |||
3218 | ||||
3219 | auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); | |||
3220 | ||||
3221 | // Go to tid check BB in ParentBB. | |||
3222 | const DebugLoc DL = ParentBB->getTerminator()->getDebugLoc(); | |||
3223 | ParentBB->getTerminator()->eraseFromParent(); | |||
3224 | OpenMPIRBuilder::LocationDescription Loc( | |||
3225 | InsertPointTy(ParentBB, ParentBB->end()), DL); | |||
3226 | OMPInfoCache.OMPBuilder.updateToLocation(Loc); | |||
3227 | auto *SrcLocStr = OMPInfoCache.OMPBuilder.getOrCreateSrcLocStr(Loc); | |||
3228 | Value *Ident = OMPInfoCache.OMPBuilder.getOrCreateIdent(SrcLocStr); | |||
3229 | BranchInst::Create(RegionCheckTidBB, ParentBB)->setDebugLoc(DL); | |||
3230 | ||||
3231 | // Add check for Tid in RegionCheckTidBB | |||
3232 | RegionCheckTidBB->getTerminator()->eraseFromParent(); | |||
3233 | OpenMPIRBuilder::LocationDescription LocRegionCheckTid( | |||
3234 | InsertPointTy(RegionCheckTidBB, RegionCheckTidBB->end()), DL); | |||
3235 | OMPInfoCache.OMPBuilder.updateToLocation(LocRegionCheckTid); | |||
3236 | FunctionCallee HardwareTidFn = | |||
3237 | OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction( | |||
3238 | M, OMPRTL___kmpc_get_hardware_thread_id_in_block); | |||
3239 | Value *Tid = | |||
3240 | OMPInfoCache.OMPBuilder.Builder.CreateCall(HardwareTidFn, {}); | |||
3241 | Value *TidCheck = OMPInfoCache.OMPBuilder.Builder.CreateIsNull(Tid); | |||
3242 | OMPInfoCache.OMPBuilder.Builder | |||
3243 | .CreateCondBr(TidCheck, RegionStartBB, RegionBarrierBB) | |||
3244 | ->setDebugLoc(DL); | |||
3245 | ||||
3246 | // First barrier for synchronization, ensures main thread has updated | |||
3247 | // values. | |||
3248 | FunctionCallee BarrierFn = | |||
3249 | OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction( | |||
3250 | M, OMPRTL___kmpc_barrier_simple_spmd); | |||
3251 | OMPInfoCache.OMPBuilder.updateToLocation(InsertPointTy( | |||
3252 | RegionBarrierBB, RegionBarrierBB->getFirstInsertionPt())); | |||
3253 | OMPInfoCache.OMPBuilder.Builder.CreateCall(BarrierFn, {Ident, Tid}) | |||
3254 | ->setDebugLoc(DL); | |||
3255 | ||||
3256 | // Second barrier ensures workers have read broadcast values. | |||
3257 | if (HasBroadcastValues) | |||
3258 | CallInst::Create(BarrierFn, {Ident, Tid}, "", | |||
3259 | RegionBarrierBB->getTerminator()) | |||
3260 | ->setDebugLoc(DL); | |||
3261 | }; | |||
3262 | ||||
3263 | auto &AllocSharedRFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared]; | |||
3264 | SmallPtrSet<BasicBlock *, 8> Visited; | |||
3265 | for (Instruction *GuardedI : SPMDCompatibilityTracker) { | |||
3266 | BasicBlock *BB = GuardedI->getParent(); | |||
3267 | if (!Visited.insert(BB).second) | |||
3268 | continue; | |||
3269 | ||||
3270 | SmallVector<std::pair<Instruction *, Instruction *>> Reorders; | |||
3271 | Instruction *LastEffect = nullptr; | |||
3272 | BasicBlock::reverse_iterator IP = BB->rbegin(), IPEnd = BB->rend(); | |||
3273 | while (++IP != IPEnd) { | |||
3274 | if (!IP->mayHaveSideEffects() && !IP->mayReadFromMemory()) | |||
3275 | continue; | |||
3276 | Instruction *I = &*IP; | |||
3277 | if (OpenMPOpt::getCallIfRegularCall(*I, &AllocSharedRFI)) | |||
3278 | continue; | |||
3279 | if (!I->user_empty() || !SPMDCompatibilityTracker.contains(I)) { | |||
3280 | LastEffect = nullptr; | |||
3281 | continue; | |||
3282 | } | |||
3283 | if (LastEffect) | |||
3284 | Reorders.push_back({I, LastEffect}); | |||
3285 | LastEffect = &*IP; | |||
3286 | } | |||
3287 | for (auto &Reorder : Reorders) | |||
3288 | Reorder.first->moveBefore(Reorder.second); | |||
3289 | } | |||
3290 | ||||
3291 | SmallVector<std::pair<Instruction *, Instruction *>, 4> GuardedRegions; | |||
3292 | ||||
3293 | for (Instruction *GuardedI : SPMDCompatibilityTracker) { | |||
3294 | BasicBlock *BB = GuardedI->getParent(); | |||
3295 | auto *CalleeAA = A.lookupAAFor<AAKernelInfo>( | |||
3296 | IRPosition::function(*GuardedI->getFunction()), nullptr, | |||
3297 | DepClassTy::NONE); | |||
3298 | assert(CalleeAA != nullptr && "Expected Callee AAKernelInfo")(static_cast <bool> (CalleeAA != nullptr && "Expected Callee AAKernelInfo" ) ? void (0) : __assert_fail ("CalleeAA != nullptr && \"Expected Callee AAKernelInfo\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp" , 3298, __extension__ __PRETTY_FUNCTION__)); | |||
3299 | auto &CalleeAAFunction = *cast<AAKernelInfoFunction>(CalleeAA); | |||
3300 | // Continue if instruction is already guarded. | |||
3301 | if (CalleeAAFunction.getGuardedInstructions().contains(GuardedI)) | |||
3302 | continue; | |||
3303 | ||||
3304 | Instruction *GuardedRegionStart = nullptr, *GuardedRegionEnd = nullptr; | |||
3305 | for (Instruction &I : *BB) { | |||
3306 | // If instruction I needs to be guarded update the guarded region | |||
3307 | // bounds. | |||
3308 | if (SPMDCompatibilityTracker.contains(&I)) { | |||
3309 | CalleeAAFunction.getGuardedInstructions().insert(&I); | |||
3310 | if (GuardedRegionStart) | |||
3311 | GuardedRegionEnd = &I; | |||
3312 | else | |||
3313 | GuardedRegionStart = GuardedRegionEnd = &I; | |||
3314 | ||||
3315 | continue; | |||
3316 | } | |||
3317 | ||||
3318 | // Instruction I does not need guarding, store | |||
3319 | // any region found and reset bounds. | |||
3320 | if (GuardedRegionStart) { | |||
3321 | GuardedRegions.push_back( | |||
3322 | std::make_pair(GuardedRegionStart, GuardedRegionEnd)); | |||
3323 | GuardedRegionStart = nullptr; | |||
3324 | GuardedRegionEnd = nullptr; | |||
3325 | } | |||
3326 | } | |||
3327 | } | |||
3328 | ||||
3329 | for (auto &GR : GuardedRegions) | |||
3330 | CreateGuardedRegion(GR.first, GR.second); | |||
3331 | ||||
3332 | // Adjust the global exec mode flag that tells the runtime what mode this | |||
3333 | // kernel is executed in. | |||
3334 | assert(ExecModeVal == OMP_TGT_EXEC_MODE_GENERIC &&(static_cast <bool> (ExecModeVal == OMP_TGT_EXEC_MODE_GENERIC && "Initially non-SPMD kernel has SPMD exec mode!") ? void (0) : __assert_fail ("ExecModeVal == OMP_TGT_EXEC_MODE_GENERIC && \"Initially non-SPMD kernel has SPMD exec mode!\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp" , 3335, __extension__ __PRETTY_FUNCTION__)) | |||
3335 | "Initially non-SPMD kernel has SPMD exec mode!")(static_cast <bool> (ExecModeVal == OMP_TGT_EXEC_MODE_GENERIC && "Initially non-SPMD kernel has SPMD exec mode!") ? void (0) : __assert_fail ("ExecModeVal == OMP_TGT_EXEC_MODE_GENERIC && \"Initially non-SPMD kernel has SPMD exec mode!\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp" , 3335, __extension__ __PRETTY_FUNCTION__)); | |||
3336 | ExecMode->setInitializer( | |||
3337 | ConstantInt::get(ExecMode->getInitializer()->getType(), | |||
3338 | ExecModeVal | OMP_TGT_EXEC_MODE_GENERIC_SPMD)); | |||
3339 | ||||
3340 | // Next rewrite the init and deinit calls to indicate we use SPMD-mode now. | |||
3341 | const int InitModeArgNo = 1; | |||
3342 | const int DeinitModeArgNo = 1; | |||
3343 | const int InitUseStateMachineArgNo = 2; | |||
3344 | const int InitRequiresFullRuntimeArgNo = 3; | |||
3345 | const int DeinitRequiresFullRuntimeArgNo = 2; | |||
3346 | ||||
3347 | auto &Ctx = getAnchorValue().getContext(); | |||
3348 | A.changeUseAfterManifest( | |||
3349 | KernelInitCB->getArgOperandUse(InitModeArgNo), | |||
3350 | *ConstantInt::getSigned(IntegerType::getInt8Ty(Ctx), | |||
3351 | OMP_TGT_EXEC_MODE_SPMD)); | |||
3352 | A.changeUseAfterManifest( | |||
3353 | KernelInitCB->getArgOperandUse(InitUseStateMachineArgNo), | |||
3354 | *ConstantInt::getBool(Ctx, 0)); | |||
3355 | A.changeUseAfterManifest( | |||
3356 | KernelDeinitCB->getArgOperandUse(DeinitModeArgNo), | |||
3357 | *ConstantInt::getSigned(IntegerType::getInt8Ty(Ctx), | |||
3358 | OMP_TGT_EXEC_MODE_SPMD)); | |||
3359 | A.changeUseAfterManifest( | |||
3360 | KernelInitCB->getArgOperandUse(InitRequiresFullRuntimeArgNo), | |||
3361 | *ConstantInt::getBool(Ctx, 0)); | |||
3362 | A.changeUseAfterManifest( | |||
3363 | KernelDeinitCB->getArgOperandUse(DeinitRequiresFullRuntimeArgNo), | |||
3364 | *ConstantInt::getBool(Ctx, 0)); | |||
3365 | ||||
3366 | ++NumOpenMPTargetRegionKernelsSPMD; | |||
3367 | ||||
3368 | auto Remark = [&](OptimizationRemark OR) { | |||
3369 | return OR << "Transformed generic-mode kernel to SPMD-mode."; | |||
3370 | }; | |||
3371 | A.emitRemark<OptimizationRemark>(KernelInitCB, "OMP120", Remark); | |||
3372 | return true; | |||
3373 | }; | |||
3374 | ||||
3375 | ChangeStatus buildCustomStateMachine(Attributor &A) { | |||
3376 | // If we have disabled state machine rewrites, don't make a custom one | |||
3377 | if (DisableOpenMPOptStateMachineRewrite) | |||
3378 | return ChangeStatus::UNCHANGED; | |||
3379 | ||||
3380 | // Don't rewrite the state machine if we are not in a valid state. | |||
3381 | if (!ReachedKnownParallelRegions.isValidState()) | |||
3382 | return ChangeStatus::UNCHANGED; | |||
3383 | ||||
3384 | const int InitModeArgNo = 1; | |||
3385 | const int InitUseStateMachineArgNo = 2; | |||
3386 | ||||
3387 | // Check if the current configuration is non-SPMD and generic state machine. | |||
3388 | // If we already have SPMD mode or a custom state machine we do not need to | |||
3389 | // go any further. If it is anything but a constant something is weird and | |||
3390 | // we give up. | |||
3391 | ConstantInt *UseStateMachine = dyn_cast<ConstantInt>( | |||
3392 | KernelInitCB->getArgOperand(InitUseStateMachineArgNo)); | |||
3393 | ConstantInt *Mode = | |||
3394 | dyn_cast<ConstantInt>(KernelInitCB->getArgOperand(InitModeArgNo)); | |||
3395 | ||||
3396 | // If we are stuck with generic mode, try to create a custom device (=GPU) | |||
3397 | // state machine which is specialized for the parallel regions that are | |||
3398 | // reachable by the kernel. | |||
3399 | if (!UseStateMachine || UseStateMachine->isZero() || !Mode || | |||
3400 | (Mode->getSExtValue() & OMP_TGT_EXEC_MODE_SPMD)) | |||
3401 | return ChangeStatus::UNCHANGED; | |||
3402 | ||||
3403 | // If not SPMD mode, indicate we use a custom state machine now. | |||
3404 | auto &Ctx = getAnchorValue().getContext(); | |||
3405 | auto *FalseVal = ConstantInt::getBool(Ctx, 0); | |||
3406 | A.changeUseAfterManifest( | |||
3407 | KernelInitCB->getArgOperandUse(InitUseStateMachineArgNo), *FalseVal); | |||
3408 | ||||
3409 | // If we don't actually need a state machine we are done here. This can | |||
3410 | // happen if there simply are no parallel regions. In the resulting kernel | |||
3411 | // all worker threads will simply exit right away, leaving the main thread | |||
3412 | // to do the work alone. | |||
3413 | if (!mayContainParallelRegion()) { | |||
3414 | ++NumOpenMPTargetRegionKernelsWithoutStateMachine; | |||
3415 | ||||
3416 | auto Remark = [&](OptimizationRemark OR) { | |||
3417 | return OR << "Removing unused state machine from generic-mode kernel."; | |||
3418 | }; | |||
3419 | A.emitRemark<OptimizationRemark>(KernelInitCB, "OMP130", Remark); | |||
3420 | ||||
3421 | return ChangeStatus::CHANGED; | |||
3422 | } | |||
3423 | ||||
3424 | // Keep track in the statistics of our new shiny custom state machine. | |||
3425 | if (ReachedUnknownParallelRegions.empty()) { | |||
3426 | ++NumOpenMPTargetRegionKernelsCustomStateMachineWithoutFallback; | |||
3427 | ||||
3428 | auto Remark = [&](OptimizationRemark OR) { | |||
3429 | return OR << "Rewriting generic-mode kernel with a customized state " | |||
3430 | "machine."; | |||
3431 | }; | |||
3432 | A.emitRemark<OptimizationRemark>(KernelInitCB, "OMP131", Remark); | |||
3433 | } else { | |||
3434 | ++NumOpenMPTargetRegionKernelsCustomStateMachineWithFallback; | |||
3435 | ||||
3436 | auto Remark = [&](OptimizationRemarkAnalysis OR) { | |||
3437 | return OR << "Generic-mode kernel is executed with a customized state " | |||
3438 | "machine that requires a fallback."; | |||
3439 | }; | |||
3440 | A.emitRemark<OptimizationRemarkAnalysis>(KernelInitCB, "OMP132", Remark); | |||
3441 | ||||
3442 | // Tell the user why we ended up with a fallback. | |||
3443 | for (CallBase *UnknownParallelRegionCB : ReachedUnknownParallelRegions) { | |||
3444 | if (!UnknownParallelRegionCB) | |||
3445 | continue; | |||
3446 | auto Remark = [&](OptimizationRemarkAnalysis ORA) { | |||
3447 | return ORA << "Call may contain unknown parallel regions. Use " | |||
3448 | << "`__attribute__((assume(\"omp_no_parallelism\")))` to " | |||
3449 | "override."; | |||
3450 | }; | |||
3451 | A.emitRemark<OptimizationRemarkAnalysis>(UnknownParallelRegionCB, | |||
3452 | "OMP133", Remark); | |||
3453 | } | |||
3454 | } | |||
3455 | ||||
3456 | // Create all the blocks: | |||
3457 | // | |||
3458 | // InitCB = __kmpc_target_init(...) | |||
3459 | // bool IsWorker = InitCB >= 0; | |||
3460 | // if (IsWorker) { | |||
3461 | // SMBeginBB: __kmpc_barrier_simple_generic(...); | |||
3462 | // void *WorkFn; | |||
3463 | // bool Active = __kmpc_kernel_parallel(&WorkFn); | |||
3464 | // if (!WorkFn) return; | |||
3465 | // SMIsActiveCheckBB: if (Active) { | |||
3466 | // SMIfCascadeCurrentBB: if (WorkFn == <ParFn0>) | |||
3467 | // ParFn0(...); | |||
3468 | // SMIfCascadeCurrentBB: else if (WorkFn == <ParFn1>) | |||
3469 | // ParFn1(...); | |||
3470 | // ... | |||
3471 | // SMIfCascadeCurrentBB: else | |||
3472 | // ((WorkFnTy*)WorkFn)(...); | |||
3473 | // SMEndParallelBB: __kmpc_kernel_end_parallel(...); | |||
3474 | // } | |||
3475 | // SMDoneBB: __kmpc_barrier_simple_generic(...); | |||
3476 | // goto SMBeginBB; | |||
3477 | // } | |||
3478 | // UserCodeEntryBB: // user code | |||
3479 | // __kmpc_target_deinit(...) | |||
3480 | // | |||
3481 | Function *Kernel = getAssociatedFunction(); | |||
3482 | assert(Kernel && "Expected an associated function!")(static_cast <bool> (Kernel && "Expected an associated function!" ) ? void (0) : __assert_fail ("Kernel && \"Expected an associated function!\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp" , 3482, __extension__ __PRETTY_FUNCTION__)); | |||
3483 | ||||
3484 | BasicBlock *InitBB = KernelInitCB->getParent(); | |||
3485 | BasicBlock *UserCodeEntryBB = InitBB->splitBasicBlock( | |||
3486 | KernelInitCB->getNextNode(), "thread.user_code.check"); | |||
3487 | BasicBlock *StateMachineBeginBB = BasicBlock::Create( | |||
3488 | Ctx, "worker_state_machine.begin", Kernel, UserCodeEntryBB); | |||
3489 | BasicBlock *StateMachineFinishedBB = BasicBlock::Create( | |||
3490 | Ctx, "worker_state_machine.finished", Kernel, UserCodeEntryBB); | |||
3491 | BasicBlock *StateMachineIsActiveCheckBB = BasicBlock::Create( | |||
3492 | Ctx, "worker_state_machine.is_active.check", Kernel, UserCodeEntryBB); | |||
3493 | BasicBlock *StateMachineIfCascadeCurrentBB = | |||
3494 | BasicBlock::Create(Ctx, "worker_state_machine.parallel_region.check", | |||
3495 | Kernel, UserCodeEntryBB); | |||
3496 | BasicBlock *StateMachineEndParallelBB = | |||
3497 | BasicBlock::Create(Ctx, "worker_state_machine.parallel_region.end", | |||
3498 | Kernel, UserCodeEntryBB); | |||
3499 | BasicBlock *StateMachineDoneBarrierBB = BasicBlock::Create( | |||
3500 | Ctx, "worker_state_machine.done.barrier", Kernel, UserCodeEntryBB); | |||
3501 | A.registerManifestAddedBasicBlock(*InitBB); | |||
3502 | A.registerManifestAddedBasicBlock(*UserCodeEntryBB); | |||
3503 | A.registerManifestAddedBasicBlock(*StateMachineBeginBB); | |||
3504 | A.registerManifestAddedBasicBlock(*StateMachineFinishedBB); | |||
3505 | A.registerManifestAddedBasicBlock(*StateMachineIsActiveCheckBB); | |||
3506 | A.registerManifestAddedBasicBlock(*StateMachineIfCascadeCurrentBB); | |||
3507 | A.registerManifestAddedBasicBlock(*StateMachineEndParallelBB); | |||
3508 | A.registerManifestAddedBasicBlock(*StateMachineDoneBarrierBB); | |||
3509 | ||||
3510 | const DebugLoc &DLoc = KernelInitCB->getDebugLoc(); | |||
3511 | ReturnInst::Create(Ctx, StateMachineFinishedBB)->setDebugLoc(DLoc); | |||
3512 | ||||
3513 | InitBB->getTerminator()->eraseFromParent(); | |||
3514 | Instruction *IsWorker = | |||
3515 | ICmpInst::Create(ICmpInst::ICmp, llvm::CmpInst::ICMP_NE, KernelInitCB, | |||
3516 | ConstantInt::get(KernelInitCB->getType(), -1), | |||
3517 | "thread.is_worker", InitBB); | |||
3518 | IsWorker->setDebugLoc(DLoc); | |||
3519 | BranchInst::Create(StateMachineBeginBB, UserCodeEntryBB, IsWorker, InitBB); | |||
3520 | ||||
3521 | Module &M = *Kernel->getParent(); | |||
3522 | ||||
3523 | // Create local storage for the work function pointer. | |||
3524 | const DataLayout &DL = M.getDataLayout(); | |||
3525 | Type *VoidPtrTy = Type::getInt8PtrTy(Ctx); | |||
3526 | Instruction *WorkFnAI = | |||
3527 | new AllocaInst(VoidPtrTy, DL.getAllocaAddrSpace(), nullptr, | |||
3528 | "worker.work_fn.addr", &Kernel->getEntryBlock().front()); | |||
3529 | WorkFnAI->setDebugLoc(DLoc); | |||
3530 | ||||
3531 | auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); | |||
3532 | OMPInfoCache.OMPBuilder.updateToLocation( | |||
3533 | OpenMPIRBuilder::LocationDescription( | |||
3534 | IRBuilder<>::InsertPoint(StateMachineBeginBB, | |||
3535 | StateMachineBeginBB->end()), | |||
3536 | DLoc)); | |||
3537 | ||||
3538 | Value *Ident = KernelInitCB->getArgOperand(0); | |||
3539 | Value *GTid = KernelInitCB; | |||
3540 | ||||
3541 | FunctionCallee BarrierFn = | |||
3542 | OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction( | |||
3543 | M, OMPRTL___kmpc_barrier_simple_generic); | |||
3544 | CallInst::Create(BarrierFn, {Ident, GTid}, "", StateMachineBeginBB) | |||
3545 | ->setDebugLoc(DLoc); | |||
3546 | ||||
3547 | if (WorkFnAI->getType()->getPointerAddressSpace() != | |||
3548 | (unsigned int)AddressSpace::Generic) { | |||
3549 | WorkFnAI = new AddrSpaceCastInst( | |||
3550 | WorkFnAI, | |||
3551 | PointerType::getWithSamePointeeType( | |||
3552 | cast<PointerType>(WorkFnAI->getType()), | |||
3553 | (unsigned int)AddressSpace::Generic), | |||
3554 | WorkFnAI->getName() + ".generic", StateMachineBeginBB); | |||
3555 | WorkFnAI->setDebugLoc(DLoc); | |||
3556 | } | |||
3557 | ||||
3558 | FunctionCallee KernelParallelFn = | |||
3559 | OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction( | |||
3560 | M, OMPRTL___kmpc_kernel_parallel); | |||
3561 | Instruction *IsActiveWorker = CallInst::Create( | |||
3562 | KernelParallelFn, {WorkFnAI}, "worker.is_active", StateMachineBeginBB); | |||
3563 | IsActiveWorker->setDebugLoc(DLoc); | |||
3564 | Instruction *WorkFn = new LoadInst(VoidPtrTy, WorkFnAI, "worker.work_fn", | |||
3565 | StateMachineBeginBB); | |||
3566 | WorkFn->setDebugLoc(DLoc); | |||
3567 | ||||
3568 | FunctionType *ParallelRegionFnTy = FunctionType::get( | |||
3569 | Type::getVoidTy(Ctx), {Type::getInt16Ty(Ctx), Type::getInt32Ty(Ctx)}, | |||
3570 | false); | |||
3571 | Value *WorkFnCast = BitCastInst::CreatePointerBitCastOrAddrSpaceCast( | |||
3572 | WorkFn, ParallelRegionFnTy->getPointerTo(), "worker.work_fn.addr_cast", | |||
3573 | StateMachineBeginBB); | |||
3574 | ||||
3575 | Instruction *IsDone = | |||
3576 | ICmpInst::Create(ICmpInst::ICmp, llvm::CmpInst::ICMP_EQ, WorkFn, | |||
3577 | Constant::getNullValue(VoidPtrTy), "worker.is_done", | |||
3578 | StateMachineBeginBB); | |||
3579 | IsDone->setDebugLoc(DLoc); | |||
3580 | BranchInst::Create(StateMachineFinishedBB, StateMachineIsActiveCheckBB, | |||
3581 | IsDone, StateMachineBeginBB) | |||
3582 | ->setDebugLoc(DLoc); | |||
3583 | ||||
3584 | BranchInst::Create(StateMachineIfCascadeCurrentBB, | |||
3585 | StateMachineDoneBarrierBB, IsActiveWorker, | |||
3586 | StateMachineIsActiveCheckBB) | |||
3587 | ->setDebugLoc(DLoc); | |||
3588 | ||||
3589 | Value *ZeroArg = | |||
3590 | Constant::getNullValue(ParallelRegionFnTy->getParamType(0)); | |||
3591 | ||||
3592 | // Now that we have most of the CFG skeleton it is time for the if-cascade | |||
3593 | // that checks the function pointer we got from the runtime against the | |||
3594 | // parallel regions we expect, if there are any. | |||
3595 | for (int I = 0, E = ReachedKnownParallelRegions.size(); I < E; ++I) { | |||
3596 | auto *ParallelRegion = ReachedKnownParallelRegions[I]; | |||
3597 | BasicBlock *PRExecuteBB = BasicBlock::Create( | |||
3598 | Ctx, "worker_state_machine.parallel_region.execute", Kernel, | |||
3599 | StateMachineEndParallelBB); | |||
3600 | CallInst::Create(ParallelRegion, {ZeroArg, GTid}, "", PRExecuteBB) | |||
3601 | ->setDebugLoc(DLoc); | |||
3602 | BranchInst::Create(StateMachineEndParallelBB, PRExecuteBB) | |||
3603 | ->setDebugLoc(DLoc); | |||
3604 | ||||
3605 | BasicBlock *PRNextBB = | |||
3606 | BasicBlock::Create(Ctx, "worker_state_machine.parallel_region.check", | |||
3607 | Kernel, StateMachineEndParallelBB); | |||
3608 | ||||
3609 | // Check if we need to compare the pointer at all or if we can just | |||
3610 | // call the parallel region function. | |||
3611 | Value *IsPR; | |||
3612 | if (I + 1 < E || !ReachedUnknownParallelRegions.empty()) { | |||
3613 | Instruction *CmpI = ICmpInst::Create( | |||
3614 | ICmpInst::ICmp, llvm::CmpInst::ICMP_EQ, WorkFnCast, ParallelRegion, | |||
3615 | "worker.check_parallel_region", StateMachineIfCascadeCurrentBB); | |||
3616 | CmpI->setDebugLoc(DLoc); | |||
3617 | IsPR = CmpI; | |||
3618 | } else { | |||
3619 | IsPR = ConstantInt::getTrue(Ctx); | |||
3620 | } | |||
3621 | ||||
3622 | BranchInst::Create(PRExecuteBB, PRNextBB, IsPR, | |||
3623 | StateMachineIfCascadeCurrentBB) | |||
3624 | ->setDebugLoc(DLoc); | |||
3625 | StateMachineIfCascadeCurrentBB = PRNextBB; | |||
3626 | } | |||
3627 | ||||
3628 | // At the end of the if-cascade we place the indirect function pointer call | |||
3629 | // in case we might need it, that is if there can be parallel regions we | |||
3630 | // have not handled in the if-cascade above. | |||
3631 | if (!ReachedUnknownParallelRegions.empty()) { | |||
3632 | StateMachineIfCascadeCurrentBB->setName( | |||
3633 | "worker_state_machine.parallel_region.fallback.execute"); | |||
3634 | CallInst::Create(ParallelRegionFnTy, WorkFnCast, {ZeroArg, GTid}, "", | |||
3635 | StateMachineIfCascadeCurrentBB) | |||
3636 | ->setDebugLoc(DLoc); | |||
3637 | } | |||
3638 | BranchInst::Create(StateMachineEndParallelBB, | |||
3639 | StateMachineIfCascadeCurrentBB) | |||
3640 | ->setDebugLoc(DLoc); | |||
3641 | ||||
3642 | CallInst::Create(OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction( | |||
3643 | M, OMPRTL___kmpc_kernel_end_parallel), | |||
3644 | {}, "", StateMachineEndParallelBB) | |||
3645 | ->setDebugLoc(DLoc); | |||
3646 | BranchInst::Create(StateMachineDoneBarrierBB, StateMachineEndParallelBB) | |||
3647 | ->setDebugLoc(DLoc); | |||
3648 | ||||
3649 | CallInst::Create(BarrierFn, {Ident, GTid}, "", StateMachineDoneBarrierBB) | |||
3650 | ->setDebugLoc(DLoc); | |||
3651 | BranchInst::Create(StateMachineBeginBB, StateMachineDoneBarrierBB) | |||
3652 | ->setDebugLoc(DLoc); | |||
3653 | ||||
3654 | return ChangeStatus::CHANGED; | |||
3655 | } | |||
3656 | ||||
3657 | /// Fixpoint iteration update function. Will be called every time a dependence | |||
3658 | /// changed its state (and in the beginning). | |||
3659 | ChangeStatus updateImpl(Attributor &A) override { | |||
3660 | KernelInfoState StateBefore = getState(); | |||
3661 | ||||
3662 | // Callback to check a read/write instruction. | |||
3663 | auto CheckRWInst = [&](Instruction &I) { | |||
3664 | // We handle calls later. | |||
3665 | if (isa<CallBase>(I)) | |||
3666 | return true; | |||
3667 | // We only care about write effects. | |||
3668 | if (!I.mayWriteToMemory()) | |||
3669 | return true; | |||
3670 | if (auto *SI = dyn_cast<StoreInst>(&I)) { | |||
3671 | SmallVector<const Value *> Objects; | |||
3672 | getUnderlyingObjects(SI->getPointerOperand(), Objects); | |||
3673 | if (llvm::all_of(Objects, | |||
3674 | [](const Value *Obj) { return isa<AllocaInst>(Obj); })) | |||
3675 | return true; | |||
3676 | // Check for AAHeapToStack moved objects which must not be guarded. | |||
3677 | auto &HS = A.getAAFor<AAHeapToStack>( | |||
3678 | *this, IRPosition::function(*I.getFunction()), | |||
3679 | DepClassTy::OPTIONAL); | |||
3680 | if (llvm::all_of(Objects, [&HS](const Value *Obj) { | |||
3681 | auto *CB = dyn_cast<CallBase>(Obj); | |||
3682 | if (!CB) | |||
3683 | return false; | |||
3684 | return HS.isAssumedHeapToStack(*CB); | |||
3685 | })) { | |||
3686 | return true; | |||
3687 | } | |||
3688 | } | |||
3689 | ||||
3690 | // Insert instruction that needs guarding. | |||
3691 | SPMDCompatibilityTracker.insert(&I); | |||
3692 | return true; | |||
3693 | }; | |||
3694 | ||||
3695 | bool UsedAssumedInformationInCheckRWInst = false; | |||
3696 | if (!SPMDCompatibilityTracker.isAtFixpoint()) | |||
3697 | if (!A.checkForAllReadWriteInstructions( | |||
3698 | CheckRWInst, *this, UsedAssumedInformationInCheckRWInst)) | |||
3699 | SPMDCompatibilityTracker.indicatePessimisticFixpoint(); | |||
3700 | ||||
3701 | if (!IsKernelEntry) { | |||
3702 | updateReachingKernelEntries(A); | |||
3703 | updateParallelLevels(A); | |||
3704 | ||||
3705 | if (!ParallelLevels.isValidState()) | |||
3706 | SPMDCompatibilityTracker.indicatePessimisticFixpoint(); | |||
3707 | } | |||
3708 | ||||
3709 | // Callback to check a call instruction. | |||
3710 | bool AllParallelRegionStatesWereFixed = true; | |||
3711 | bool AllSPMDStatesWereFixed = true; | |||
3712 | auto CheckCallInst = [&](Instruction &I) { | |||
3713 | auto &CB = cast<CallBase>(I); | |||
3714 | auto &CBAA = A.getAAFor<AAKernelInfo>( | |||
3715 | *this, IRPosition::callsite_function(CB), DepClassTy::OPTIONAL); | |||
3716 | getState() ^= CBAA.getState(); | |||
3717 | AllSPMDStatesWereFixed &= CBAA.SPMDCompatibilityTracker.isAtFixpoint(); | |||
3718 | AllParallelRegionStatesWereFixed &= | |||
3719 | CBAA.ReachedKnownParallelRegions.isAtFixpoint(); | |||
3720 | AllParallelRegionStatesWereFixed &= | |||
3721 | CBAA.ReachedUnknownParallelRegions.isAtFixpoint(); | |||
3722 | return true; | |||
3723 | }; | |||
3724 | ||||
3725 | bool UsedAssumedInformationInCheckCallInst = false; | |||
3726 | if (!A.checkForAllCallLikeInstructions( | |||
3727 | CheckCallInst, *this, UsedAssumedInformationInCheckCallInst)) { | |||
3728 | LLVM_DEBUG(dbgs() << TAGdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { dbgs() << TAG << "Failed to visit all call-like instructions!\n" ;; } } while (false) | |||
3729 | << "Failed to visit all call-like instructions!\n";)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { dbgs() << TAG << "Failed to visit all call-like instructions!\n" ;; } } while (false); | |||
3730 | return indicatePessimisticFixpoint(); | |||
3731 | } | |||
3732 | ||||
3733 | // If we haven't used any assumed information for the reached parallel | |||
3734 | // region states we can fix it. | |||
3735 | if (!UsedAssumedInformationInCheckCallInst && | |||
3736 | AllParallelRegionStatesWereFixed) { | |||
3737 | ReachedKnownParallelRegions.indicateOptimisticFixpoint(); | |||
3738 | ReachedUnknownParallelRegions.indicateOptimisticFixpoint(); | |||
3739 | } | |||
3740 | ||||
3741 | // If we are sure there are no parallel regions in the kernel we do not | |||
3742 | // want SPMD mode. | |||
3743 | if (IsKernelEntry && ReachedUnknownParallelRegions.isAtFixpoint() && | |||
3744 | ReachedKnownParallelRegions.isAtFixpoint() && | |||
3745 | ReachedUnknownParallelRegions.isValidState() && | |||
3746 | ReachedKnownParallelRegions.isValidState() && | |||
3747 | !mayContainParallelRegion()) | |||
3748 | SPMDCompatibilityTracker.indicatePessimisticFixpoint(); | |||
3749 | ||||
3750 | // If we haven't used any assumed information for the SPMD state we can fix | |||
3751 | // it. | |||
3752 | if (!UsedAssumedInformationInCheckRWInst && | |||
3753 | !UsedAssumedInformationInCheckCallInst && AllSPMDStatesWereFixed) | |||
3754 | SPMDCompatibilityTracker.indicateOptimisticFixpoint(); | |||
3755 | ||||
3756 | return StateBefore == getState() ? ChangeStatus::UNCHANGED | |||
3757 | : ChangeStatus::CHANGED; | |||
3758 | } | |||
3759 | ||||
3760 | private: | |||
3761 | /// Update info regarding reaching kernels. | |||
3762 | void updateReachingKernelEntries(Attributor &A) { | |||
3763 | auto PredCallSite = [&](AbstractCallSite ACS) { | |||
3764 | Function *Caller = ACS.getInstruction()->getFunction(); | |||
3765 | ||||
3766 | assert(Caller && "Caller is nullptr")(static_cast <bool> (Caller && "Caller is nullptr" ) ? void (0) : __assert_fail ("Caller && \"Caller is nullptr\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp" , 3766, __extension__ __PRETTY_FUNCTION__)); | |||
| ||||
3767 | ||||
3768 | auto &CAA = A.getOrCreateAAFor<AAKernelInfo>( | |||
3769 | IRPosition::function(*Caller), this, DepClassTy::REQUIRED); | |||
3770 | if (CAA.ReachingKernelEntries.isValidState()) { | |||
3771 | ReachingKernelEntries ^= CAA.ReachingKernelEntries; | |||
| ||||
3772 | return true; | |||
3773 | } | |||
3774 | ||||
3775 | // We lost track of the caller of the associated function, any kernel | |||
3776 | // could reach now. | |||
3777 | ReachingKernelEntries.indicatePessimisticFixpoint(); | |||
3778 | ||||
3779 | return true; | |||
3780 | }; | |||
3781 | ||||
3782 | bool AllCallSitesKnown; | |||
3783 | if (!A.checkForAllCallSites(PredCallSite, *this, | |||
3784 | true /* RequireAllCallSites */, | |||
3785 | AllCallSitesKnown)) | |||
3786 | ReachingKernelEntries.indicatePessimisticFixpoint(); | |||
3787 | } | |||
3788 | ||||
3789 | /// Update info regarding parallel levels. | |||
3790 | void updateParallelLevels(Attributor &A) { | |||
3791 | auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); | |||
3792 | OMPInformationCache::RuntimeFunctionInfo &Parallel51RFI = | |||
3793 | OMPInfoCache.RFIs[OMPRTL___kmpc_parallel_51]; | |||
3794 | ||||
3795 | auto PredCallSite = [&](AbstractCallSite ACS) { | |||
3796 | Function *Caller = ACS.getInstruction()->getFunction(); | |||
3797 | ||||
3798 | assert(Caller && "Caller is nullptr")(static_cast <bool> (Caller && "Caller is nullptr" ) ? void (0) : __assert_fail ("Caller && \"Caller is nullptr\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp" , 3798, __extension__ __PRETTY_FUNCTION__)); | |||
3799 | ||||
3800 | auto &CAA = | |||
3801 | A.getOrCreateAAFor<AAKernelInfo>(IRPosition::function(*Caller)); | |||
3802 | if (CAA.ParallelLevels.isValidState()) { | |||
3803 | // Any function that is called by `__kmpc_parallel_51` will not be | |||
3804 | // folded as the parallel level in the function is updated. In order to | |||
3805 | // get it right, all the analysis would depend on the implentation. That | |||
3806 | // said, if in the future any change to the implementation, the analysis | |||
3807 | // could be wrong. As a consequence, we are just conservative here. | |||
3808 | if (Caller == Parallel51RFI.Declaration) { | |||
3809 | ParallelLevels.indicatePessimisticFixpoint(); | |||
3810 | return true; | |||
3811 | } | |||
3812 | ||||
3813 | ParallelLevels ^= CAA.ParallelLevels; | |||
3814 | ||||
3815 | return true; | |||
3816 | } | |||
3817 | ||||
3818 | // We lost track of the caller of the associated function, any kernel | |||
3819 | // could reach now. | |||
3820 | ParallelLevels.indicatePessimisticFixpoint(); | |||
3821 | ||||
3822 | return true; | |||
3823 | }; | |||
3824 | ||||
3825 | bool AllCallSitesKnown = true; | |||
3826 | if (!A.checkForAllCallSites(PredCallSite, *this, | |||
3827 | true /* RequireAllCallSites */, | |||
3828 | AllCallSitesKnown)) | |||
3829 | ParallelLevels.indicatePessimisticFixpoint(); | |||
3830 | } | |||
3831 | }; | |||
3832 | ||||
3833 | /// The call site kernel info abstract attribute, basically, what can we say | |||
3834 | /// about a call site with regards to the KernelInfoState. For now this simply | |||
3835 | /// forwards the information from the callee. | |||
3836 | struct AAKernelInfoCallSite : AAKernelInfo { | |||
3837 | AAKernelInfoCallSite(const IRPosition &IRP, Attributor &A) | |||
3838 | : AAKernelInfo(IRP, A) {} | |||
3839 | ||||
3840 | /// See AbstractAttribute::initialize(...). | |||
3841 | void initialize(Attributor &A) override { | |||
3842 | AAKernelInfo::initialize(A); | |||
3843 | ||||
3844 | CallBase &CB = cast<CallBase>(getAssociatedValue()); | |||
3845 | Function *Callee = getAssociatedFunction(); | |||
3846 | ||||
3847 | auto &AssumptionAA = A.getAAFor<AAAssumptionInfo>( | |||
3848 | *this, IRPosition::callsite_function(CB), DepClassTy::OPTIONAL); | |||
3849 | ||||
3850 | // Check for SPMD-mode assumptions. | |||
3851 | if (AssumptionAA.hasAssumption("ompx_spmd_amenable")) { | |||
3852 | SPMDCompatibilityTracker.indicateOptimisticFixpoint(); | |||
3853 | indicateOptimisticFixpoint(); | |||
3854 | } | |||
3855 | ||||
3856 | // First weed out calls we do not care about, that is readonly/readnone | |||
3857 | // calls, intrinsics, and "no_openmp" calls. Neither of these can reach a | |||
3858 | // parallel region or anything else we are looking for. | |||
3859 | if (!CB.mayWriteToMemory() || isa<IntrinsicInst>(CB)) { | |||
3860 | indicateOptimisticFixpoint(); | |||
3861 | return; | |||
3862 | } | |||
3863 | ||||
3864 | // Next we check if we know the callee. If it is a known OpenMP function | |||
3865 | // we will handle them explicitly in the switch below. If it is not, we | |||
3866 | // will use an AAKernelInfo object on the callee to gather information and | |||
3867 | // merge that into the current state. The latter happens in the updateImpl. | |||
3868 | auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); | |||
3869 | const auto &It = OMPInfoCache.RuntimeFunctionIDMap.find(Callee); | |||
3870 | if (It == OMPInfoCache.RuntimeFunctionIDMap.end()) { | |||
3871 | // Unknown caller or declarations are not analyzable, we give up. | |||
3872 | if (!Callee || !A.isFunctionIPOAmendable(*Callee)) { | |||
3873 | ||||
3874 | // Unknown callees might contain parallel regions, except if they have | |||
3875 | // an appropriate assumption attached. | |||
3876 | if (!(AssumptionAA.hasAssumption("omp_no_openmp") || | |||
3877 | AssumptionAA.hasAssumption("omp_no_parallelism"))) | |||
3878 | ReachedUnknownParallelRegions.insert(&CB); | |||
3879 | ||||
3880 | // If SPMDCompatibilityTracker is not fixed, we need to give up on the | |||
3881 | // idea we can run something unknown in SPMD-mode. | |||
3882 | if (!SPMDCompatibilityTracker.isAtFixpoint()) { | |||
3883 | SPMDCompatibilityTracker.indicatePessimisticFixpoint(); | |||
3884 | SPMDCompatibilityTracker.insert(&CB); | |||
3885 | } | |||
3886 | ||||
3887 | // We have updated the state for this unknown call properly, there won't | |||
3888 | // be any change so we indicate a fixpoint. | |||
3889 | indicateOptimisticFixpoint(); | |||
3890 | } | |||
3891 | // If the callee is known and can be used in IPO, we will update the state | |||
3892 | // based on the callee state in updateImpl. | |||
3893 | return; | |||
3894 | } | |||
3895 | ||||
3896 | const unsigned int WrapperFunctionArgNo = 6; | |||
3897 | RuntimeFunction RF = It->getSecond(); | |||
3898 | switch (RF) { | |||
3899 | // All the functions we know are compatible with SPMD mode. | |||
3900 | case OMPRTL___kmpc_is_spmd_exec_mode: | |||
3901 | case OMPRTL___kmpc_distribute_static_fini: | |||
3902 | case OMPRTL___kmpc_for_static_fini: | |||
3903 | case OMPRTL___kmpc_global_thread_num: | |||
3904 | case OMPRTL___kmpc_get_hardware_num_threads_in_block: | |||
3905 | case OMPRTL___kmpc_get_hardware_num_blocks: | |||
3906 | case OMPRTL___kmpc_single: | |||
3907 | case OMPRTL___kmpc_end_single: | |||
3908 | case OMPRTL___kmpc_master: | |||
3909 | case OMPRTL___kmpc_end_master: | |||
3910 | case OMPRTL___kmpc_barrier: | |||
3911 | break; | |||
3912 | case OMPRTL___kmpc_distribute_static_init_4: | |||
3913 | case OMPRTL___kmpc_distribute_static_init_4u: | |||
3914 | case OMPRTL___kmpc_distribute_static_init_8: | |||
3915 | case OMPRTL___kmpc_distribute_static_init_8u: | |||
3916 | case OMPRTL___kmpc_for_static_init_4: | |||
3917 | case OMPRTL___kmpc_for_static_init_4u: | |||
3918 | case OMPRTL___kmpc_for_static_init_8: | |||
3919 | case OMPRTL___kmpc_for_static_init_8u: { | |||
3920 | // Check the schedule and allow static schedule in SPMD mode. | |||
3921 | unsigned ScheduleArgOpNo = 2; | |||
3922 | auto *ScheduleTypeCI = | |||
3923 | dyn_cast<ConstantInt>(CB.getArgOperand(ScheduleArgOpNo)); | |||
3924 | unsigned ScheduleTypeVal = | |||
3925 | ScheduleTypeCI ? ScheduleTypeCI->getZExtValue() : 0; | |||
3926 | switch (OMPScheduleType(ScheduleTypeVal)) { | |||
3927 | case OMPScheduleType::Static: | |||
3928 | case OMPScheduleType::StaticChunked: | |||
3929 | case OMPScheduleType::Distribute: | |||
3930 | case OMPScheduleType::DistributeChunked: | |||
3931 | break; | |||
3932 | default: | |||
3933 | SPMDCompatibilityTracker.indicatePessimisticFixpoint(); | |||
3934 | SPMDCompatibilityTracker.insert(&CB); | |||
3935 | break; | |||
3936 | }; | |||
3937 | } break; | |||
3938 | case OMPRTL___kmpc_target_init: | |||
3939 | KernelInitCB = &CB; | |||
3940 | break; | |||
3941 | case OMPRTL___kmpc_target_deinit: | |||
3942 | KernelDeinitCB = &CB; | |||
3943 | break; | |||
3944 | case OMPRTL___kmpc_parallel_51: | |||
3945 | if (auto *ParallelRegion = dyn_cast<Function>( | |||
3946 | CB.getArgOperand(WrapperFunctionArgNo)->stripPointerCasts())) { | |||
3947 | ReachedKnownParallelRegions.insert(ParallelRegion); | |||
3948 | break; | |||
3949 | } | |||
3950 | // The condition above should usually get the parallel region function | |||
3951 | // pointer and record it. In the off chance it doesn't we assume the | |||
3952 | // worst. | |||
3953 | ReachedUnknownParallelRegions.insert(&CB); | |||
3954 | break; | |||
3955 | case OMPRTL___kmpc_omp_task: | |||
3956 | // We do not look into tasks right now, just give up. | |||
3957 | SPMDCompatibilityTracker.insert(&CB); | |||
3958 | ReachedUnknownParallelRegions.insert(&CB); | |||
3959 | break; | |||
3960 | case OMPRTL___kmpc_alloc_shared: | |||
3961 | case OMPRTL___kmpc_free_shared: | |||
3962 | // Return without setting a fixpoint, to be resolved in updateImpl. | |||
3963 | return; | |||
3964 | default: | |||
3965 | // Unknown OpenMP runtime calls cannot be executed in SPMD-mode, | |||
3966 | // generally. However, they do not hide parallel regions. | |||
3967 | SPMDCompatibilityTracker.insert(&CB); | |||
3968 | break; | |||
3969 | } | |||
3970 | // All other OpenMP runtime calls will not reach parallel regions so they | |||
3971 | // can be safely ignored for now. Since it is a known OpenMP runtime call we | |||
3972 | // have now modeled all effects and there is no need for any update. | |||
3973 | indicateOptimisticFixpoint(); | |||
3974 | } | |||
3975 | ||||
3976 | ChangeStatus updateImpl(Attributor &A) override { | |||
3977 | // TODO: Once we have call site specific value information we can provide | |||
3978 | // call site specific liveness information and then it makes | |||
3979 | // sense to specialize attributes for call sites arguments instead of | |||
3980 | // redirecting requests to the callee argument. | |||
3981 | Function *F = getAssociatedFunction(); | |||
3982 | ||||
3983 | auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); | |||
3984 | const auto &It = OMPInfoCache.RuntimeFunctionIDMap.find(F); | |||
3985 | ||||
3986 | // If F is not a runtime function, propagate the AAKernelInfo of the callee. | |||
3987 | if (It == OMPInfoCache.RuntimeFunctionIDMap.end()) { | |||
3988 | const IRPosition &FnPos = IRPosition::function(*F); | |||
3989 | auto &FnAA = A.getAAFor<AAKernelInfo>(*this, FnPos, DepClassTy::REQUIRED); | |||
3990 | if (getState() == FnAA.getState()) | |||
3991 | return ChangeStatus::UNCHANGED; | |||
3992 | getState() = FnAA.getState(); | |||
3993 | return ChangeStatus::CHANGED; | |||
3994 | } | |||
3995 | ||||
3996 | // F is a runtime function that allocates or frees memory, check | |||
3997 | // AAHeapToStack and AAHeapToShared. | |||
3998 | KernelInfoState StateBefore = getState(); | |||
3999 | assert((It->getSecond() == OMPRTL___kmpc_alloc_shared ||(static_cast <bool> ((It->getSecond() == OMPRTL___kmpc_alloc_shared || It->getSecond() == OMPRTL___kmpc_free_shared) && "Expected a __kmpc_alloc_shared or __kmpc_free_shared runtime call" ) ? void (0) : __assert_fail ("(It->getSecond() == OMPRTL___kmpc_alloc_shared || It->getSecond() == OMPRTL___kmpc_free_shared) && \"Expected a __kmpc_alloc_shared or __kmpc_free_shared runtime call\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp" , 4001, __extension__ __PRETTY_FUNCTION__)) | |||
4000 | It->getSecond() == OMPRTL___kmpc_free_shared) &&(static_cast <bool> ((It->getSecond() == OMPRTL___kmpc_alloc_shared || It->getSecond() == OMPRTL___kmpc_free_shared) && "Expected a __kmpc_alloc_shared or __kmpc_free_shared runtime call" ) ? void (0) : __assert_fail ("(It->getSecond() == OMPRTL___kmpc_alloc_shared || It->getSecond() == OMPRTL___kmpc_free_shared) && \"Expected a __kmpc_alloc_shared or __kmpc_free_shared runtime call\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp" , 4001, __extension__ __PRETTY_FUNCTION__)) | |||
4001 | "Expected a __kmpc_alloc_shared or __kmpc_free_shared runtime call")(static_cast <bool> ((It->getSecond() == OMPRTL___kmpc_alloc_shared || It->getSecond() == OMPRTL___kmpc_free_shared) && "Expected a __kmpc_alloc_shared or __kmpc_free_shared runtime call" ) ? void (0) : __assert_fail ("(It->getSecond() == OMPRTL___kmpc_alloc_shared || It->getSecond() == OMPRTL___kmpc_free_shared) && \"Expected a __kmpc_alloc_shared or __kmpc_free_shared runtime call\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp" , 4001, __extension__ __PRETTY_FUNCTION__)); | |||
4002 | ||||
4003 | CallBase &CB = cast<CallBase>(getAssociatedValue()); | |||
4004 | ||||
4005 | auto &HeapToStackAA = A.getAAFor<AAHeapToStack>( | |||
4006 | *this, IRPosition::function(*CB.getCaller()), DepClassTy::OPTIONAL); | |||
4007 | auto &HeapToSharedAA = A.getAAFor<AAHeapToShared>( | |||
4008 | *this, IRPosition::function(*CB.getCaller()), DepClassTy::OPTIONAL); | |||
4009 | ||||
4010 | RuntimeFunction RF = It->getSecond(); | |||
4011 | ||||
4012 | switch (RF) { | |||
4013 | // If neither HeapToStack nor HeapToShared assume the call is removed, | |||
4014 | // assume SPMD incompatibility. | |||
4015 | case OMPRTL___kmpc_alloc_shared: | |||
4016 | if (!HeapToStackAA.isAssumedHeapToStack(CB) && | |||
4017 | !HeapToSharedAA.isAssumedHeapToShared(CB)) | |||
4018 | SPMDCompatibilityTracker.insert(&CB); | |||
4019 | break; | |||
4020 | case OMPRTL___kmpc_free_shared: | |||
4021 | if (!HeapToStackAA.isAssumedHeapToStackRemovedFree(CB) && | |||
4022 | !HeapToSharedAA.isAssumedHeapToSharedRemovedFree(CB)) | |||
4023 | SPMDCompatibilityTracker.insert(&CB); | |||
4024 | break; | |||
4025 | default: | |||
4026 | SPMDCompatibilityTracker.insert(&CB); | |||
4027 | } | |||
4028 | ||||
4029 | return StateBefore == getState() ? ChangeStatus::UNCHANGED | |||
4030 | : ChangeStatus::CHANGED; | |||
4031 | } | |||
4032 | }; | |||
4033 | ||||
4034 | struct AAFoldRuntimeCall | |||
4035 | : public StateWrapper<BooleanState, AbstractAttribute> { | |||
4036 | using Base = StateWrapper<BooleanState, AbstractAttribute>; | |||
4037 | ||||
4038 | AAFoldRuntimeCall(const IRPosition &IRP, Attributor &A) : Base(IRP) {} | |||
4039 | ||||
4040 | /// Statistics are tracked as part of manifest for now. | |||
4041 | void trackStatistics() const override {} | |||
4042 | ||||
4043 | /// Create an abstract attribute biew for the position \p IRP. | |||
4044 | static AAFoldRuntimeCall &createForPosition(const IRPosition &IRP, | |||
4045 | Attributor &A); | |||
4046 | ||||
4047 | /// See AbstractAttribute::getName() | |||
4048 | const std::string getName() const override { return "AAFoldRuntimeCall"; } | |||
4049 | ||||
4050 | /// See AbstractAttribute::getIdAddr() | |||
4051 | const char *getIdAddr() const override { return &ID; } | |||
4052 | ||||
4053 | /// This function should return true if the type of the \p AA is | |||
4054 | /// AAFoldRuntimeCall | |||
4055 | static bool classof(const AbstractAttribute *AA) { | |||
4056 | return (AA->getIdAddr() == &ID); | |||
4057 | } | |||
4058 | ||||
4059 | static const char ID; | |||
4060 | }; | |||
4061 | ||||
4062 | struct AAFoldRuntimeCallCallSiteReturned : AAFoldRuntimeCall { | |||
4063 | AAFoldRuntimeCallCallSiteReturned(const IRPosition &IRP, Attributor &A) | |||
4064 | : AAFoldRuntimeCall(IRP, A) {} | |||
4065 | ||||
4066 | /// See AbstractAttribute::getAsStr() | |||
4067 | const std::string getAsStr() const override { | |||
4068 | if (!isValidState()) | |||
4069 | return "<invalid>"; | |||
4070 | ||||
4071 | std::string Str("simplified value: "); | |||
4072 | ||||
4073 | if (!SimplifiedValue.hasValue()) | |||
4074 | return Str + std::string("none"); | |||
4075 | ||||
4076 | if (!SimplifiedValue.getValue()) | |||
4077 | return Str + std::string("nullptr"); | |||
4078 | ||||
4079 | if (ConstantInt *CI = dyn_cast<ConstantInt>(SimplifiedValue.getValue())) | |||
4080 | return Str + std::to_string(CI->getSExtValue()); | |||
4081 | ||||
4082 | return Str + std::string("unknown"); | |||
4083 | } | |||
4084 | ||||
4085 | void initialize(Attributor &A) override { | |||
4086 | if (DisableOpenMPOptFolding) | |||
4087 | indicatePessimisticFixpoint(); | |||
4088 | ||||
4089 | Function *Callee = getAssociatedFunction(); | |||
4090 | ||||
4091 | auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); | |||
4092 | const auto &It = OMPInfoCache.RuntimeFunctionIDMap.find(Callee); | |||
4093 | assert(It != OMPInfoCache.RuntimeFunctionIDMap.end() &&(static_cast <bool> (It != OMPInfoCache.RuntimeFunctionIDMap .end() && "Expected a known OpenMP runtime function") ? void (0) : __assert_fail ("It != OMPInfoCache.RuntimeFunctionIDMap.end() && \"Expected a known OpenMP runtime function\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp" , 4094, __extension__ __PRETTY_FUNCTION__)) | |||
4094 | "Expected a known OpenMP runtime function")(static_cast <bool> (It != OMPInfoCache.RuntimeFunctionIDMap .end() && "Expected a known OpenMP runtime function") ? void (0) : __assert_fail ("It != OMPInfoCache.RuntimeFunctionIDMap.end() && \"Expected a known OpenMP runtime function\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp" , 4094, __extension__ __PRETTY_FUNCTION__)); | |||
4095 | ||||
4096 | RFKind = It->getSecond(); | |||
4097 | ||||
4098 | CallBase &CB = cast<CallBase>(getAssociatedValue()); | |||
4099 | A.registerSimplificationCallback( | |||
4100 | IRPosition::callsite_returned(CB), | |||
4101 | [&](const IRPosition &IRP, const AbstractAttribute *AA, | |||
4102 | bool &UsedAssumedInformation) -> Optional<Value *> { | |||
4103 | assert((isValidState() || (SimplifiedValue.hasValue() &&(static_cast <bool> ((isValidState() || (SimplifiedValue .hasValue() && SimplifiedValue.getValue() == nullptr) ) && "Unexpected invalid state!") ? void (0) : __assert_fail ("(isValidState() || (SimplifiedValue.hasValue() && SimplifiedValue.getValue() == nullptr)) && \"Unexpected invalid state!\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp" , 4105, __extension__ __PRETTY_FUNCTION__)) | |||
4104 | SimplifiedValue.getValue() == nullptr)) &&(static_cast <bool> ((isValidState() || (SimplifiedValue .hasValue() && SimplifiedValue.getValue() == nullptr) ) && "Unexpected invalid state!") ? void (0) : __assert_fail ("(isValidState() || (SimplifiedValue.hasValue() && SimplifiedValue.getValue() == nullptr)) && \"Unexpected invalid state!\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp" , 4105, __extension__ __PRETTY_FUNCTION__)) | |||
4105 | "Unexpected invalid state!")(static_cast <bool> ((isValidState() || (SimplifiedValue .hasValue() && SimplifiedValue.getValue() == nullptr) ) && "Unexpected invalid state!") ? void (0) : __assert_fail ("(isValidState() || (SimplifiedValue.hasValue() && SimplifiedValue.getValue() == nullptr)) && \"Unexpected invalid state!\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp" , 4105, __extension__ __PRETTY_FUNCTION__)); | |||
4106 | ||||
4107 | if (!isAtFixpoint()) { | |||
4108 | UsedAssumedInformation = true; | |||
4109 | if (AA) | |||
4110 | A.recordDependence(*this, *AA, DepClassTy::OPTIONAL); | |||
4111 | } | |||
4112 | return SimplifiedValue; | |||
4113 | }); | |||
4114 | } | |||
4115 | ||||
4116 | ChangeStatus updateImpl(Attributor &A) override { | |||
4117 | ChangeStatus Changed = ChangeStatus::UNCHANGED; | |||
4118 | switch (RFKind) { | |||
4119 | case OMPRTL___kmpc_is_spmd_exec_mode: | |||
4120 | Changed |= foldIsSPMDExecMode(A); | |||
4121 | break; | |||
4122 | case OMPRTL___kmpc_is_generic_main_thread_id: | |||
4123 | Changed |= foldIsGenericMainThread(A); | |||
4124 | break; | |||
4125 | case OMPRTL___kmpc_parallel_level: | |||
4126 | Changed |= foldParallelLevel(A); | |||
4127 | break; | |||
4128 | case OMPRTL___kmpc_get_hardware_num_threads_in_block: | |||
4129 | Changed = Changed | foldKernelFnAttribute(A, "omp_target_thread_limit"); | |||
4130 | break; | |||
4131 | case OMPRTL___kmpc_get_hardware_num_blocks: | |||
4132 | Changed = Changed | foldKernelFnAttribute(A, "omp_target_num_teams"); | |||
4133 | break; | |||
4134 | default: | |||
4135 | llvm_unreachable("Unhandled OpenMP runtime function!")::llvm::llvm_unreachable_internal("Unhandled OpenMP runtime function!" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp" , 4135); | |||
4136 | } | |||
4137 | ||||
4138 | return Changed; | |||
4139 | } | |||
4140 | ||||
4141 | ChangeStatus manifest(Attributor &A) override { | |||
4142 | ChangeStatus Changed = ChangeStatus::UNCHANGED; | |||
4143 | ||||
4144 | if (SimplifiedValue.hasValue() && SimplifiedValue.getValue()) { | |||
4145 | Instruction &I = *getCtxI(); | |||
4146 | A.changeValueAfterManifest(I, **SimplifiedValue); | |||
4147 | A.deleteAfterManifest(I); | |||
4148 | ||||
4149 | CallBase *CB = dyn_cast<CallBase>(&I); | |||
4150 | auto Remark = [&](OptimizationRemark OR) { | |||
4151 | if (auto *C = dyn_cast<ConstantInt>(*SimplifiedValue)) | |||
4152 | return OR << "Replacing OpenMP runtime call " | |||
4153 | << CB->getCalledFunction()->getName() << " with " | |||
4154 | << ore::NV("FoldedValue", C->getZExtValue()) << "."; | |||
4155 | return OR << "Replacing OpenMP runtime call " | |||
4156 | << CB->getCalledFunction()->getName() << "."; | |||
4157 | }; | |||
4158 | ||||
4159 | if (CB && EnableVerboseRemarks) | |||
4160 | A.emitRemark<OptimizationRemark>(CB, "OMP180", Remark); | |||
4161 | ||||
4162 | LLVM_DEBUG(dbgs() << TAG << "Replacing runtime call: " << I << " with "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { dbgs() << TAG << "Replacing runtime call: " << I << " with " << **SimplifiedValue << "\n"; } } while (false) | |||
4163 | << **SimplifiedValue << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { dbgs() << TAG << "Replacing runtime call: " << I << " with " << **SimplifiedValue << "\n"; } } while (false); | |||
4164 | ||||
4165 | Changed = ChangeStatus::CHANGED; | |||
4166 | } | |||
4167 | ||||
4168 | return Changed; | |||
4169 | } | |||
4170 | ||||
4171 | ChangeStatus indicatePessimisticFixpoint() override { | |||
4172 | SimplifiedValue = nullptr; | |||
4173 | return AAFoldRuntimeCall::indicatePessimisticFixpoint(); | |||
4174 | } | |||
4175 | ||||
4176 | private: | |||
4177 | /// Fold __kmpc_is_spmd_exec_mode into a constant if possible. | |||
4178 | ChangeStatus foldIsSPMDExecMode(Attributor &A) { | |||
4179 | Optional<Value *> SimplifiedValueBefore = SimplifiedValue; | |||
4180 | ||||
4181 | unsigned AssumedSPMDCount = 0, KnownSPMDCount = 0; | |||
4182 | unsigned AssumedNonSPMDCount = 0, KnownNonSPMDCount = 0; | |||
4183 | auto &CallerKernelInfoAA = A.getAAFor<AAKernelInfo>( | |||
4184 | *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED); | |||
4185 | ||||
4186 | if (!CallerKernelInfoAA.ReachingKernelEntries.isValidState()) | |||
4187 | return indicatePessimisticFixpoint(); | |||
4188 | ||||
4189 | for (Kernel K : CallerKernelInfoAA.ReachingKernelEntries) { | |||
4190 | auto &AA = A.getAAFor<AAKernelInfo>(*this, IRPosition::function(*K), | |||
4191 | DepClassTy::REQUIRED); | |||
4192 | ||||
4193 | if (!AA.isValidState()) { | |||
4194 | SimplifiedValue = nullptr; | |||
4195 | return indicatePessimisticFixpoint(); | |||
4196 | } | |||
4197 | ||||
4198 | if (AA.SPMDCompatibilityTracker.isAssumed()) { | |||
4199 | if (AA.SPMDCompatibilityTracker.isAtFixpoint()) | |||
4200 | ++KnownSPMDCount; | |||
4201 | else | |||
4202 | ++AssumedSPMDCount; | |||
4203 | } else { | |||
4204 | if (AA.SPMDCompatibilityTracker.isAtFixpoint()) | |||
4205 | ++KnownNonSPMDCount; | |||
4206 | else | |||
4207 | ++AssumedNonSPMDCount; | |||
4208 | } | |||
4209 | } | |||
4210 | ||||
4211 | if ((AssumedSPMDCount + KnownSPMDCount) && | |||
4212 | (AssumedNonSPMDCount + KnownNonSPMDCount)) | |||
4213 | return indicatePessimisticFixpoint(); | |||
4214 | ||||
4215 | auto &Ctx = getAnchorValue().getContext(); | |||
4216 | if (KnownSPMDCount || AssumedSPMDCount) { | |||
4217 | assert(KnownNonSPMDCount == 0 && AssumedNonSPMDCount == 0 &&(static_cast <bool> (KnownNonSPMDCount == 0 && AssumedNonSPMDCount == 0 && "Expected only SPMD kernels!") ? void (0) : __assert_fail ("KnownNonSPMDCount == 0 && AssumedNonSPMDCount == 0 && \"Expected only SPMD kernels!\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp" , 4218, __extension__ __PRETTY_FUNCTION__)) | |||
4218 | "Expected only SPMD kernels!")(static_cast <bool> (KnownNonSPMDCount == 0 && AssumedNonSPMDCount == 0 && "Expected only SPMD kernels!") ? void (0) : __assert_fail ("KnownNonSPMDCount == 0 && AssumedNonSPMDCount == 0 && \"Expected only SPMD kernels!\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp" , 4218, __extension__ __PRETTY_FUNCTION__)); | |||
4219 | // All reaching kernels are in SPMD mode. Update all function calls to | |||
4220 | // __kmpc_is_spmd_exec_mode to 1. | |||
4221 | SimplifiedValue = ConstantInt::get(Type::getInt8Ty(Ctx), true); | |||
4222 | } else if (KnownNonSPMDCount || AssumedNonSPMDCount) { | |||
4223 | assert(KnownSPMDCount == 0 && AssumedSPMDCount == 0 &&(static_cast <bool> (KnownSPMDCount == 0 && AssumedSPMDCount == 0 && "Expected only non-SPMD kernels!") ? void (0 ) : __assert_fail ("KnownSPMDCount == 0 && AssumedSPMDCount == 0 && \"Expected only non-SPMD kernels!\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp" , 4224, __extension__ __PRETTY_FUNCTION__)) | |||
4224 | "Expected only non-SPMD kernels!")(static_cast <bool> (KnownSPMDCount == 0 && AssumedSPMDCount == 0 && "Expected only non-SPMD kernels!") ? void (0 ) : __assert_fail ("KnownSPMDCount == 0 && AssumedSPMDCount == 0 && \"Expected only non-SPMD kernels!\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp" , 4224, __extension__ __PRETTY_FUNCTION__)); | |||
4225 | // All reaching kernels are in non-SPMD mode. Update all function | |||
4226 | // calls to __kmpc_is_spmd_exec_mode to 0. | |||
4227 | SimplifiedValue = ConstantInt::get(Type::getInt8Ty(Ctx), false); | |||
4228 | } else { | |||
4229 | // We have empty reaching kernels, therefore we cannot tell if the | |||
4230 | // associated call site can be folded. At this moment, SimplifiedValue | |||
4231 | // must be none. | |||
4232 | assert(!SimplifiedValue.hasValue() && "SimplifiedValue should be none")(static_cast <bool> (!SimplifiedValue.hasValue() && "SimplifiedValue should be none") ? void (0) : __assert_fail ("!SimplifiedValue.hasValue() && \"SimplifiedValue should be none\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp" , 4232, __extension__ __PRETTY_FUNCTION__)); | |||
4233 | } | |||
4234 | ||||
4235 | return SimplifiedValue == SimplifiedValueBefore ? ChangeStatus::UNCHANGED | |||
4236 | : ChangeStatus::CHANGED; | |||
4237 | } | |||
4238 | ||||
4239 | /// Fold __kmpc_is_generic_main_thread_id into a constant if possible. | |||
4240 | ChangeStatus foldIsGenericMainThread(Attributor &A) { | |||
4241 | Optional<Value *> SimplifiedValueBefore = SimplifiedValue; | |||
4242 | ||||
4243 | CallBase &CB = cast<CallBase>(getAssociatedValue()); | |||
4244 | Function *F = CB.getFunction(); | |||
4245 | const auto &ExecutionDomainAA = A.getAAFor<AAExecutionDomain>( | |||
4246 | *this, IRPosition::function(*F), DepClassTy::REQUIRED); | |||
4247 | ||||
4248 | if (!ExecutionDomainAA.isValidState()) | |||
4249 | return indicatePessimisticFixpoint(); | |||
4250 | ||||
4251 | auto &Ctx = getAnchorValue().getContext(); | |||
4252 | if (ExecutionDomainAA.isExecutedByInitialThreadOnly(CB)) | |||
4253 | SimplifiedValue = ConstantInt::get(Type::getInt8Ty(Ctx), true); | |||
4254 | else | |||
4255 | return indicatePessimisticFixpoint(); | |||
4256 | ||||
4257 | return SimplifiedValue == SimplifiedValueBefore ? ChangeStatus::UNCHANGED | |||
4258 | : ChangeStatus::CHANGED; | |||
4259 | } | |||
4260 | ||||
4261 | /// Fold __kmpc_parallel_level into a constant if possible. | |||
4262 | ChangeStatus foldParallelLevel(Attributor &A) { | |||
4263 | Optional<Value *> SimplifiedValueBefore = SimplifiedValue; | |||
4264 | ||||
4265 | auto &CallerKernelInfoAA = A.getAAFor<AAKernelInfo>( | |||
4266 | *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED); | |||
4267 | ||||
4268 | if (!CallerKernelInfoAA.ParallelLevels.isValidState()) | |||
4269 | return indicatePessimisticFixpoint(); | |||
4270 | ||||
4271 | if (!CallerKernelInfoAA.ReachingKernelEntries.isValidState()) | |||
4272 | return indicatePessimisticFixpoint(); | |||
4273 | ||||
4274 | if (CallerKernelInfoAA.ReachingKernelEntries.empty()) { | |||
4275 | assert(!SimplifiedValue.hasValue() &&(static_cast <bool> (!SimplifiedValue.hasValue() && "SimplifiedValue should keep none at this point") ? void (0) : __assert_fail ("!SimplifiedValue.hasValue() && \"SimplifiedValue should keep none at this point\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp" , 4276, __extension__ __PRETTY_FUNCTION__)) | |||
4276 | "SimplifiedValue should keep none at this point")(static_cast <bool> (!SimplifiedValue.hasValue() && "SimplifiedValue should keep none at this point") ? void (0) : __assert_fail ("!SimplifiedValue.hasValue() && \"SimplifiedValue should keep none at this point\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp" , 4276, __extension__ __PRETTY_FUNCTION__)); | |||
4277 | return ChangeStatus::UNCHANGED; | |||
4278 | } | |||
4279 | ||||
4280 | unsigned AssumedSPMDCount = 0, KnownSPMDCount = 0; | |||
4281 | unsigned AssumedNonSPMDCount = 0, KnownNonSPMDCount = 0; | |||
4282 | for (Kernel K : CallerKernelInfoAA.ReachingKernelEntries) { | |||
4283 | auto &AA = A.getAAFor<AAKernelInfo>(*this, IRPosition::function(*K), | |||
4284 | DepClassTy::REQUIRED); | |||
4285 | if (!AA.SPMDCompatibilityTracker.isValidState()) | |||
4286 | return indicatePessimisticFixpoint(); | |||
4287 | ||||
4288 | if (AA.SPMDCompatibilityTracker.isAssumed()) { | |||
4289 | if (AA.SPMDCompatibilityTracker.isAtFixpoint()) | |||
4290 | ++KnownSPMDCount; | |||
4291 | else | |||
4292 | ++AssumedSPMDCount; | |||
4293 | } else { | |||
4294 | if (AA.SPMDCompatibilityTracker.isAtFixpoint()) | |||
4295 | ++KnownNonSPMDCount; | |||
4296 | else | |||
4297 | ++AssumedNonSPMDCount; | |||
4298 | } | |||
4299 | } | |||
4300 | ||||
4301 | if ((AssumedSPMDCount + KnownSPMDCount) && | |||
4302 | (AssumedNonSPMDCount + KnownNonSPMDCount)) | |||
4303 | return indicatePessimisticFixpoint(); | |||
4304 | ||||
4305 | auto &Ctx = getAnchorValue().getContext(); | |||
4306 | // If the caller can only be reached by SPMD kernel entries, the parallel | |||
4307 | // level is 1. Similarly, if the caller can only be reached by non-SPMD | |||
4308 | // kernel entries, it is 0. | |||
4309 | if (AssumedSPMDCount || KnownSPMDCount) { | |||
4310 | assert(KnownNonSPMDCount == 0 && AssumedNonSPMDCount == 0 &&(static_cast <bool> (KnownNonSPMDCount == 0 && AssumedNonSPMDCount == 0 && "Expected only SPMD kernels!") ? void (0) : __assert_fail ("KnownNonSPMDCount == 0 && AssumedNonSPMDCount == 0 && \"Expected only SPMD kernels!\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp" , 4311, __extension__ __PRETTY_FUNCTION__)) | |||
4311 | "Expected only SPMD kernels!")(static_cast <bool> (KnownNonSPMDCount == 0 && AssumedNonSPMDCount == 0 && "Expected only SPMD kernels!") ? void (0) : __assert_fail ("KnownNonSPMDCount == 0 && AssumedNonSPMDCount == 0 && \"Expected only SPMD kernels!\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp" , 4311, __extension__ __PRETTY_FUNCTION__)); | |||
4312 | SimplifiedValue = ConstantInt::get(Type::getInt8Ty(Ctx), 1); | |||
4313 | } else { | |||
4314 | assert(KnownSPMDCount == 0 && AssumedSPMDCount == 0 &&(static_cast <bool> (KnownSPMDCount == 0 && AssumedSPMDCount == 0 && "Expected only non-SPMD kernels!") ? void (0 ) : __assert_fail ("KnownSPMDCount == 0 && AssumedSPMDCount == 0 && \"Expected only non-SPMD kernels!\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp" , 4315, __extension__ __PRETTY_FUNCTION__)) | |||
4315 | "Expected only non-SPMD kernels!")(static_cast <bool> (KnownSPMDCount == 0 && AssumedSPMDCount == 0 && "Expected only non-SPMD kernels!") ? void (0 ) : __assert_fail ("KnownSPMDCount == 0 && AssumedSPMDCount == 0 && \"Expected only non-SPMD kernels!\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp" , 4315, __extension__ __PRETTY_FUNCTION__)); | |||
4316 | SimplifiedValue = ConstantInt::get(Type::getInt8Ty(Ctx), 0); | |||
4317 | } | |||
4318 | return SimplifiedValue == SimplifiedValueBefore ? ChangeStatus::UNCHANGED | |||
4319 | : ChangeStatus::CHANGED; | |||
4320 | } | |||
4321 | ||||
4322 | ChangeStatus foldKernelFnAttribute(Attributor &A, llvm::StringRef Attr) { | |||
4323 | // Specialize only if all the calls agree with the attribute constant value | |||
4324 | int32_t CurrentAttrValue = -1; | |||
4325 | Optional<Value *> SimplifiedValueBefore = SimplifiedValue; | |||
4326 | ||||
4327 | auto &CallerKernelInfoAA = A.getAAFor<AAKernelInfo>( | |||
4328 | *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED); | |||
4329 | ||||
4330 | if (!CallerKernelInfoAA.ReachingKernelEntries.isValidState()) | |||
4331 | return indicatePessimisticFixpoint(); | |||
4332 | ||||
4333 | // Iterate over the kernels that reach this function | |||
4334 | for (Kernel K : CallerKernelInfoAA.ReachingKernelEntries) { | |||
4335 | int32_t NextAttrVal = -1; | |||
4336 | if (K->hasFnAttribute(Attr)) | |||
4337 | NextAttrVal = | |||
4338 | std::stoi(K->getFnAttribute(Attr).getValueAsString().str()); | |||
4339 | ||||
4340 | if (NextAttrVal == -1 || | |||
4341 | (CurrentAttrValue != -1 && CurrentAttrValue != NextAttrVal)) | |||
4342 | return indicatePessimisticFixpoint(); | |||
4343 | CurrentAttrValue = NextAttrVal; | |||
4344 | } | |||
4345 | ||||
4346 | if (CurrentAttrValue != -1) { | |||
4347 | auto &Ctx = getAnchorValue().getContext(); | |||
4348 | SimplifiedValue = | |||
4349 | ConstantInt::get(Type::getInt32Ty(Ctx), CurrentAttrValue); | |||
4350 | } | |||
4351 | return SimplifiedValue == SimplifiedValueBefore ? ChangeStatus::UNCHANGED | |||
4352 | : ChangeStatus::CHANGED; | |||
4353 | } | |||
4354 | ||||
4355 | /// An optional value the associated value is assumed to fold to. That is, we | |||
4356 | /// assume the associated value (which is a call) can be replaced by this | |||
4357 | /// simplified value. | |||
4358 | Optional<Value *> SimplifiedValue; | |||
4359 | ||||
4360 | /// The runtime function kind of the callee of the associated call site. | |||
4361 | RuntimeFunction RFKind; | |||
4362 | }; | |||
4363 | ||||
4364 | } // namespace | |||
4365 | ||||
4366 | /// Register folding callsite | |||
4367 | void OpenMPOpt::registerFoldRuntimeCall(RuntimeFunction RF) { | |||
4368 | auto &RFI = OMPInfoCache.RFIs[RF]; | |||
4369 | RFI.foreachUse(SCC, [&](Use &U, Function &F) { | |||
4370 | CallInst *CI = OpenMPOpt::getCallIfRegularCall(U, &RFI); | |||
4371 | if (!CI) | |||
4372 | return false; | |||
4373 | A.getOrCreateAAFor<AAFoldRuntimeCall>( | |||
4374 | IRPosition::callsite_returned(*CI), /* QueryingAA */ nullptr, | |||
4375 | DepClassTy::NONE, /* ForceUpdate */ false, | |||
4376 | /* UpdateAfterInit */ false); | |||
4377 | return false; | |||
4378 | }); | |||
4379 | } | |||
4380 | ||||
4381 | void OpenMPOpt::registerAAs(bool IsModulePass) { | |||
4382 | if (SCC.empty()) | |||
4383 | ||||
4384 | return; | |||
4385 | if (IsModulePass) { | |||
4386 | // Ensure we create the AAKernelInfo AAs first and without triggering an | |||
4387 | // update. This will make sure we register all value simplification | |||
4388 | // callbacks before any other AA has the chance to create an AAValueSimplify | |||
4389 | // or similar. | |||
4390 | for (Function *Kernel : OMPInfoCache.Kernels) | |||
4391 | A.getOrCreateAAFor<AAKernelInfo>( | |||
4392 | IRPosition::function(*Kernel), /* QueryingAA */ nullptr, | |||
4393 | DepClassTy::NONE, /* ForceUpdate */ false, | |||
4394 | /* UpdateAfterInit */ false); | |||
4395 | ||||
4396 | registerFoldRuntimeCall(OMPRTL___kmpc_is_generic_main_thread_id); | |||
4397 | registerFoldRuntimeCall(OMPRTL___kmpc_is_spmd_exec_mode); | |||
4398 | registerFoldRuntimeCall(OMPRTL___kmpc_parallel_level); | |||
4399 | registerFoldRuntimeCall(OMPRTL___kmpc_get_hardware_num_threads_in_block); | |||
4400 | registerFoldRuntimeCall(OMPRTL___kmpc_get_hardware_num_blocks); | |||
4401 | } | |||
4402 | ||||
4403 | // Create CallSite AA for all Getters. | |||
4404 | for (int Idx = 0; Idx < OMPInfoCache.ICVs.size() - 1; ++Idx) { | |||
4405 | auto ICVInfo = OMPInfoCache.ICVs[static_cast<InternalControlVar>(Idx)]; | |||
4406 | ||||
4407 | auto &GetterRFI = OMPInfoCache.RFIs[ICVInfo.Getter]; | |||
4408 | ||||
4409 | auto CreateAA = [&](Use &U, Function &Caller) { | |||
4410 | CallInst *CI = OpenMPOpt::getCallIfRegularCall(U, &GetterRFI); | |||
4411 | if (!CI) | |||
4412 | return false; | |||
4413 | ||||
4414 | auto &CB = cast<CallBase>(*CI); | |||
4415 | ||||
4416 | IRPosition CBPos = IRPosition::callsite_function(CB); | |||
4417 | A.getOrCreateAAFor<AAICVTracker>(CBPos); | |||
4418 | return false; | |||
4419 | }; | |||
4420 | ||||
4421 | GetterRFI.foreachUse(SCC, CreateAA); | |||
4422 | } | |||
4423 | auto &GlobalizationRFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared]; | |||
4424 | auto CreateAA = [&](Use &U, Function &F) { | |||
4425 | A.getOrCreateAAFor<AAHeapToShared>(IRPosition::function(F)); | |||
4426 | return false; | |||
4427 | }; | |||
4428 | if (!DisableOpenMPOptDeglobalization) | |||
4429 | GlobalizationRFI.foreachUse(SCC, CreateAA); | |||
4430 | ||||
4431 | // Create an ExecutionDomain AA for every function and a HeapToStack AA for | |||
4432 | // every function if there is a device kernel. | |||
4433 | if (!isOpenMPDevice(M)) | |||
4434 | return; | |||
4435 | ||||
4436 | for (auto *F : SCC) { | |||
4437 | if (F->isDeclaration()) | |||
4438 | continue; | |||
4439 | ||||
4440 | A.getOrCreateAAFor<AAExecutionDomain>(IRPosition::function(*F)); | |||
4441 | if (!DisableOpenMPOptDeglobalization) | |||
4442 | A.getOrCreateAAFor<AAHeapToStack>(IRPosition::function(*F)); | |||
4443 | ||||
4444 | for (auto &I : instructions(*F)) { | |||
4445 | if (auto *LI = dyn_cast<LoadInst>(&I)) { | |||
4446 | bool UsedAssumedInformation = false; | |||
4447 | A.getAssumedSimplified(IRPosition::value(*LI), /* AA */ nullptr, | |||
4448 | UsedAssumedInformation); | |||
4449 | } | |||
4450 | } | |||
4451 | } | |||
4452 | } | |||
4453 | ||||
4454 | const char AAICVTracker::ID = 0; | |||
4455 | const char AAKernelInfo::ID = 0; | |||
4456 | const char AAExecutionDomain::ID = 0; | |||
4457 | const char AAHeapToShared::ID = 0; | |||
4458 | const char AAFoldRuntimeCall::ID = 0; | |||
4459 | ||||
4460 | AAICVTracker &AAICVTracker::createForPosition(const IRPosition &IRP, | |||
4461 | Attributor &A) { | |||
4462 | AAICVTracker *AA = nullptr; | |||
4463 | switch (IRP.getPositionKind()) { | |||
4464 | case IRPosition::IRP_INVALID: | |||
4465 | case IRPosition::IRP_FLOAT: | |||
4466 | case IRPosition::IRP_ARGUMENT: | |||
4467 | case IRPosition::IRP_CALL_SITE_ARGUMENT: | |||
4468 | llvm_unreachable("ICVTracker can only be created for function position!")::llvm::llvm_unreachable_internal("ICVTracker can only be created for function position!" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp" , 4468); | |||
4469 | case IRPosition::IRP_RETURNED: | |||
4470 | AA = new (A.Allocator) AAICVTrackerFunctionReturned(IRP, A); | |||
4471 | break; | |||
4472 | case IRPosition::IRP_CALL_SITE_RETURNED: | |||
4473 | AA = new (A.Allocator) AAICVTrackerCallSiteReturned(IRP, A); | |||
4474 | break; | |||
4475 | case IRPosition::IRP_CALL_SITE: | |||
4476 | AA = new (A.Allocator) AAICVTrackerCallSite(IRP, A); | |||
4477 | break; | |||
4478 | case IRPosition::IRP_FUNCTION: | |||
4479 | AA = new (A.Allocator) AAICVTrackerFunction(IRP, A); | |||
4480 | break; | |||
4481 | } | |||
4482 | ||||
4483 | return *AA; | |||
4484 | } | |||
4485 | ||||
4486 | AAExecutionDomain &AAExecutionDomain::createForPosition(const IRPosition &IRP, | |||
4487 | Attributor &A) { | |||
4488 | AAExecutionDomainFunction *AA = nullptr; | |||
4489 | switch (IRP.getPositionKind()) { | |||
4490 | case IRPosition::IRP_INVALID: | |||
4491 | case IRPosition::IRP_FLOAT: | |||
4492 | case IRPosition::IRP_ARGUMENT: | |||
4493 | case IRPosition::IRP_CALL_SITE_ARGUMENT: | |||
4494 | case IRPosition::IRP_RETURNED: | |||
4495 | case IRPosition::IRP_CALL_SITE_RETURNED: | |||
4496 | case IRPosition::IRP_CALL_SITE: | |||
4497 | llvm_unreachable(::llvm::llvm_unreachable_internal("AAExecutionDomain can only be created for function position!" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp" , 4498) | |||
4498 | "AAExecutionDomain can only be created for function position!")::llvm::llvm_unreachable_internal("AAExecutionDomain can only be created for function position!" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp" , 4498); | |||
4499 | case IRPosition::IRP_FUNCTION: | |||
4500 | AA = new (A.Allocator) AAExecutionDomainFunction(IRP, A); | |||
4501 | break; | |||
4502 | } | |||
4503 | ||||
4504 | return *AA; | |||
4505 | } | |||
4506 | ||||
4507 | AAHeapToShared &AAHeapToShared::createForPosition(const IRPosition &IRP, | |||
4508 | Attributor &A) { | |||
4509 | AAHeapToSharedFunction *AA = nullptr; | |||
4510 | switch (IRP.getPositionKind()) { | |||
4511 | case IRPosition::IRP_INVALID: | |||
4512 | case IRPosition::IRP_FLOAT: | |||
4513 | case IRPosition::IRP_ARGUMENT: | |||
4514 | case IRPosition::IRP_CALL_SITE_ARGUMENT: | |||
4515 | case IRPosition::IRP_RETURNED: | |||
4516 | case IRPosition::IRP_CALL_SITE_RETURNED: | |||
4517 | case IRPosition::IRP_CALL_SITE: | |||
4518 | llvm_unreachable(::llvm::llvm_unreachable_internal("AAHeapToShared can only be created for function position!" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp" , 4519) | |||
4519 | "AAHeapToShared can only be created for function position!")::llvm::llvm_unreachable_internal("AAHeapToShared can only be created for function position!" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp" , 4519); | |||
4520 | case IRPosition::IRP_FUNCTION: | |||
4521 | AA = new (A.Allocator) AAHeapToSharedFunction(IRP, A); | |||
4522 | break; | |||
4523 | } | |||
4524 | ||||
4525 | return *AA; | |||
4526 | } | |||
4527 | ||||
4528 | AAKernelInfo &AAKernelInfo::createForPosition(const IRPosition &IRP, | |||
4529 | Attributor &A) { | |||
4530 | AAKernelInfo *AA = nullptr; | |||
4531 | switch (IRP.getPositionKind()) { | |||
4532 | case IRPosition::IRP_INVALID: | |||
4533 | case IRPosition::IRP_FLOAT: | |||
4534 | case IRPosition::IRP_ARGUMENT: | |||
4535 | case IRPosition::IRP_RETURNED: | |||
4536 | case IRPosition::IRP_CALL_SITE_RETURNED: | |||
4537 | case IRPosition::IRP_CALL_SITE_ARGUMENT: | |||
4538 | llvm_unreachable("KernelInfo can only be created for function position!")::llvm::llvm_unreachable_internal("KernelInfo can only be created for function position!" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp" , 4538); | |||
4539 | case IRPosition::IRP_CALL_SITE: | |||
4540 | AA = new (A.Allocator) AAKernelInfoCallSite(IRP, A); | |||
4541 | break; | |||
4542 | case IRPosition::IRP_FUNCTION: | |||
4543 | AA = new (A.Allocator) AAKernelInfoFunction(IRP, A); | |||
4544 | break; | |||
4545 | } | |||
4546 | ||||
4547 | return *AA; | |||
4548 | } | |||
4549 | ||||
4550 | AAFoldRuntimeCall &AAFoldRuntimeCall::createForPosition(const IRPosition &IRP, | |||
4551 | Attributor &A) { | |||
4552 | AAFoldRuntimeCall *AA = nullptr; | |||
4553 | switch (IRP.getPositionKind()) { | |||
4554 | case IRPosition::IRP_INVALID: | |||
4555 | case IRPosition::IRP_FLOAT: | |||
4556 | case IRPosition::IRP_ARGUMENT: | |||
4557 | case IRPosition::IRP_RETURNED: | |||
4558 | case IRPosition::IRP_FUNCTION: | |||
4559 | case IRPosition::IRP_CALL_SITE: | |||
4560 | case IRPosition::IRP_CALL_SITE_ARGUMENT: | |||
4561 | llvm_unreachable("KernelInfo can only be created for call site position!")::llvm::llvm_unreachable_internal("KernelInfo can only be created for call site position!" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp" , 4561); | |||
4562 | case IRPosition::IRP_CALL_SITE_RETURNED: | |||
4563 | AA = new (A.Allocator) AAFoldRuntimeCallCallSiteReturned(IRP, A); | |||
4564 | break; | |||
4565 | } | |||
4566 | ||||
4567 | return *AA; | |||
4568 | } | |||
4569 | ||||
4570 | PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) { | |||
4571 | if (!containsOpenMP(M)) | |||
4572 | return PreservedAnalyses::all(); | |||
4573 | if (DisableOpenMPOptimizations) | |||
4574 | return PreservedAnalyses::all(); | |||
4575 | ||||
4576 | FunctionAnalysisManager &FAM = | |||
4577 | AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); | |||
4578 | KernelSet Kernels = getDeviceKernels(M); | |||
4579 | ||||
4580 | auto IsCalled = [&](Function &F) { | |||
4581 | if (Kernels.contains(&F)) | |||
4582 | return true; | |||
4583 | for (const User *U : F.users()) | |||
4584 | if (!isa<BlockAddress>(U)) | |||
4585 | return true; | |||
4586 | return false; | |||
4587 | }; | |||
4588 | ||||
4589 | auto EmitRemark = [&](Function &F) { | |||
4590 | auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); | |||
4591 | ORE.emit([&]() { | |||
4592 | OptimizationRemarkAnalysis ORA(DEBUG_TYPE"openmp-opt", "OMP140", &F); | |||
4593 | return ORA << "Could not internalize function. " | |||
4594 | << "Some optimizations may not be possible. [OMP140]"; | |||
4595 | }); | |||
4596 | }; | |||
4597 | ||||
4598 | // Create internal copies of each function if this is a kernel Module. This | |||
4599 | // allows iterprocedural passes to see every call edge. | |||
4600 | DenseMap<Function *, Function *> InternalizedMap; | |||
4601 | if (isOpenMPDevice(M)) { | |||
4602 | SmallPtrSet<Function *, 16> InternalizeFns; | |||
4603 | for (Function &F : M) | |||
4604 | if (!F.isDeclaration() && !Kernels.contains(&F) && IsCalled(F) && | |||
4605 | !DisableInternalization) { | |||
4606 | if (Attributor::isInternalizable(F)) { | |||
4607 | InternalizeFns.insert(&F); | |||
4608 | } else if (!F.hasLocalLinkage() && !F.hasFnAttribute(Attribute::Cold)) { | |||
4609 | EmitRemark(F); | |||
4610 | } | |||
4611 | } | |||
4612 | ||||
4613 | Attributor::internalizeFunctions(InternalizeFns, InternalizedMap); | |||
4614 | } | |||
4615 | ||||
4616 | // Look at every function in the Module unless it was internalized. | |||
4617 | SmallVector<Function *, 16> SCC; | |||
4618 | for (Function &F : M) | |||
4619 | if (!F.isDeclaration() && !InternalizedMap.lookup(&F)) | |||
4620 | SCC.push_back(&F); | |||
4621 | ||||
4622 | if (SCC.empty()) | |||
4623 | return PreservedAnalyses::all(); | |||
4624 | ||||
4625 | AnalysisGetter AG(FAM); | |||
4626 | ||||
4627 | auto OREGetter = [&FAM](Function *F) -> OptimizationRemarkEmitter & { | |||
4628 | return FAM.getResult<OptimizationRemarkEmitterAnalysis>(*F); | |||
4629 | }; | |||
4630 | ||||
4631 | BumpPtrAllocator Allocator; | |||
4632 | CallGraphUpdater CGUpdater; | |||
4633 | ||||
4634 | SetVector<Function *> Functions(SCC.begin(), SCC.end()); | |||
4635 | OMPInformationCache InfoCache(M, AG, Allocator, /*CGSCC*/ Functions, Kernels); | |||
4636 | ||||
4637 | unsigned MaxFixpointIterations = | |||
4638 | (isOpenMPDevice(M)) ? SetFixpointIterations : 32; | |||
4639 | Attributor A(Functions, InfoCache, CGUpdater, nullptr, true, false, | |||
4640 | MaxFixpointIterations, OREGetter, DEBUG_TYPE"openmp-opt"); | |||
4641 | ||||
4642 | OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A); | |||
4643 | bool Changed = OMPOpt.run(true); | |||
4644 | ||||
4645 | // Optionally inline device functions for potentially better performance. | |||
4646 | if (AlwaysInlineDeviceFunctions && isOpenMPDevice(M)) | |||
4647 | for (Function &F : M) | |||
4648 | if (!F.isDeclaration() && !Kernels.contains(&F) && | |||
4649 | !F.hasFnAttribute(Attribute::NoInline)) | |||
4650 | F.addFnAttr(Attribute::AlwaysInline); | |||
4651 | ||||
4652 | if (PrintModuleAfterOptimizations) | |||
4653 | LLVM_DEBUG(dbgs() << TAG << "Module after OpenMPOpt Module Pass:\n" << M)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { dbgs() << TAG << "Module after OpenMPOpt Module Pass:\n" << M; } } while (false); | |||
4654 | ||||
4655 | if (Changed) | |||
4656 | return PreservedAnalyses::none(); | |||
4657 | ||||
4658 | return PreservedAnalyses::all(); | |||
4659 | } | |||
4660 | ||||
4661 | PreservedAnalyses OpenMPOptCGSCCPass::run(LazyCallGraph::SCC &C, | |||
4662 | CGSCCAnalysisManager &AM, | |||
4663 | LazyCallGraph &CG, | |||
4664 | CGSCCUpdateResult &UR) { | |||
4665 | if (!containsOpenMP(*C.begin()->getFunction().getParent())) | |||
4666 | return PreservedAnalyses::all(); | |||
4667 | if (DisableOpenMPOptimizations) | |||
4668 | return PreservedAnalyses::all(); | |||
4669 | ||||
4670 | SmallVector<Function *, 16> SCC; | |||
4671 | // If there are kernels in the module, we have to run on all SCC's. | |||
4672 | for (LazyCallGraph::Node &N : C) { | |||
4673 | Function *Fn = &N.getFunction(); | |||
4674 | SCC.push_back(Fn); | |||
4675 | } | |||
4676 | ||||
4677 | if (SCC.empty()) | |||
4678 | return PreservedAnalyses::all(); | |||
4679 | ||||
4680 | Module &M = *C.begin()->getFunction().getParent(); | |||
4681 | ||||
4682 | KernelSet Kernels = getDeviceKernels(M); | |||
4683 | ||||
4684 | FunctionAnalysisManager &FAM = | |||
4685 | AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager(); | |||
4686 | ||||
4687 | AnalysisGetter AG(FAM); | |||
4688 | ||||
4689 | auto OREGetter = [&FAM](Function *F) -> OptimizationRemarkEmitter & { | |||
4690 | return FAM.getResult<OptimizationRemarkEmitterAnalysis>(*F); | |||
4691 | }; | |||
4692 | ||||
4693 | BumpPtrAllocator Allocator; | |||
4694 | CallGraphUpdater CGUpdater; | |||
4695 | CGUpdater.initialize(CG, C, AM, UR); | |||
4696 | ||||
4697 | SetVector<Function *> Functions(SCC.begin(), SCC.end()); | |||
4698 | OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG, Allocator, | |||
4699 | /*CGSCC*/ Functions, Kernels); | |||
4700 | ||||
4701 | unsigned MaxFixpointIterations = | |||
4702 | (isOpenMPDevice(M)) ? SetFixpointIterations : 32; | |||
4703 | Attributor A(Functions, InfoCache, CGUpdater, nullptr, false, true, | |||
4704 | MaxFixpointIterations, OREGetter, DEBUG_TYPE"openmp-opt"); | |||
4705 | ||||
4706 | OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A); | |||
4707 | bool Changed = OMPOpt.run(false); | |||
4708 | ||||
4709 | if (PrintModuleAfterOptimizations) | |||
4710 | LLVM_DEBUG(dbgs() << TAG << "Module after OpenMPOpt CGSCC Pass:\n" << M)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { dbgs() << TAG << "Module after OpenMPOpt CGSCC Pass:\n" << M; } } while (false); | |||
4711 | ||||
4712 | if (Changed) | |||
4713 | return PreservedAnalyses::none(); | |||
4714 | ||||
4715 | return PreservedAnalyses::all(); | |||
4716 | } | |||
4717 | ||||
4718 | namespace { | |||
4719 | ||||
4720 | struct OpenMPOptCGSCCLegacyPass : public CallGraphSCCPass { | |||
4721 | CallGraphUpdater CGUpdater; | |||
4722 | static char ID; | |||
4723 | ||||
4724 | OpenMPOptCGSCCLegacyPass() : CallGraphSCCPass(ID) { | |||
4725 | initializeOpenMPOptCGSCCLegacyPassPass(*PassRegistry::getPassRegistry()); | |||
4726 | } | |||
4727 | ||||
4728 | void getAnalysisUsage(AnalysisUsage &AU) const override { | |||
4729 | CallGraphSCCPass::getAnalysisUsage(AU); | |||
4730 | } | |||
4731 | ||||
4732 | bool runOnSCC(CallGraphSCC &CGSCC) override { | |||
4733 | if (!containsOpenMP(CGSCC.getCallGraph().getModule())) | |||
4734 | return false; | |||
4735 | if (DisableOpenMPOptimizations || skipSCC(CGSCC)) | |||
4736 | return false; | |||
4737 | ||||
4738 | SmallVector<Function *, 16> SCC; | |||
4739 | // If there are kernels in the module, we have to run on all SCC's. | |||
4740 | for (CallGraphNode *CGN : CGSCC) { | |||
4741 | Function *Fn = CGN->getFunction(); | |||
4742 | if (!Fn || Fn->isDeclaration()) | |||
4743 | continue; | |||
4744 | SCC.push_back(Fn); | |||
4745 | } | |||
4746 | ||||
4747 | if (SCC.empty()) | |||
4748 | return false; | |||
4749 | ||||
4750 | Module &M = CGSCC.getCallGraph().getModule(); | |||
4751 | KernelSet Kernels = getDeviceKernels(M); | |||
4752 | ||||
4753 | CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph(); | |||
4754 | CGUpdater.initialize(CG, CGSCC); | |||
4755 | ||||
4756 | // Maintain a map of functions to avoid rebuilding the ORE | |||
4757 | DenseMap<Function *, std::unique_ptr<OptimizationRemarkEmitter>> OREMap; | |||
4758 | auto OREGetter = [&OREMap](Function *F) -> OptimizationRemarkEmitter & { | |||
4759 | std::unique_ptr<OptimizationRemarkEmitter> &ORE = OREMap[F]; | |||
4760 | if (!ORE) | |||
4761 | ORE = std::make_unique<OptimizationRemarkEmitter>(F); | |||
4762 | return *ORE; | |||
4763 | }; | |||
4764 | ||||
4765 | AnalysisGetter AG; | |||
4766 | SetVector<Function *> Functions(SCC.begin(), SCC.end()); | |||
4767 | BumpPtrAllocator Allocator; | |||
4768 | OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG, | |||
4769 | Allocator, | |||
4770 | /*CGSCC*/ Functions, Kernels); | |||
4771 | ||||
4772 | unsigned MaxFixpointIterations = | |||
4773 | (isOpenMPDevice(M)) ? SetFixpointIterations : 32; | |||
4774 | Attributor A(Functions, InfoCache, CGUpdater, nullptr, false, true, | |||
4775 | MaxFixpointIterations, OREGetter, DEBUG_TYPE"openmp-opt"); | |||
4776 | ||||
4777 | OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A); | |||
4778 | bool Result = OMPOpt.run(false); | |||
4779 | ||||
4780 | if (PrintModuleAfterOptimizations) | |||
4781 | LLVM_DEBUG(dbgs() << TAG << "Module after OpenMPOpt CGSCC Pass:\n" << M)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("openmp-opt")) { dbgs() << TAG << "Module after OpenMPOpt CGSCC Pass:\n" << M; } } while (false); | |||
4782 | ||||
4783 | return Result; | |||
4784 | } | |||
4785 | ||||
4786 | bool doFinalization(CallGraph &CG) override { return CGUpdater.finalize(); } | |||
4787 | }; | |||
4788 | ||||
4789 | } // end anonymous namespace | |||
4790 | ||||
4791 | KernelSet llvm::omp::getDeviceKernels(Module &M) { | |||
4792 | // TODO: Create a more cross-platform way of determining device kernels. | |||
4793 | NamedMDNode *MD = M.getOrInsertNamedMetadata("nvvm.annotations"); | |||
4794 | KernelSet Kernels; | |||
4795 | ||||
4796 | if (!MD) | |||
4797 | return Kernels; | |||
4798 | ||||
4799 | for (auto *Op : MD->operands()) { | |||
4800 | if (Op->getNumOperands() < 2) | |||
4801 | continue; | |||
4802 | MDString *KindID = dyn_cast<MDString>(Op->getOperand(1)); | |||
4803 | if (!KindID || KindID->getString() != "kernel") | |||
4804 | continue; | |||
4805 | ||||
4806 | Function *KernelFn = | |||
4807 | mdconst::dyn_extract_or_null<Function>(Op->getOperand(0)); | |||
4808 | if (!KernelFn) | |||
4809 | continue; | |||
4810 | ||||
4811 | ++NumOpenMPTargetRegionKernels; | |||
4812 | ||||
4813 | Kernels.insert(KernelFn); | |||
4814 | } | |||
4815 | ||||
4816 | return Kernels; | |||
4817 | } | |||
4818 | ||||
4819 | bool llvm::omp::containsOpenMP(Module &M) { | |||
4820 | Metadata *MD = M.getModuleFlag("openmp"); | |||
4821 | if (!MD) | |||
4822 | return false; | |||
4823 | ||||
4824 | return true; | |||
4825 | } | |||
4826 | ||||
4827 | bool llvm::omp::isOpenMPDevice(Module &M) { | |||
4828 | Metadata *MD = M.getModuleFlag("openmp-device"); | |||
4829 | if (!MD) | |||
4830 | return false; | |||
4831 | ||||
4832 | return true; | |||
4833 | } | |||
4834 | ||||
4835 | char OpenMPOptCGSCCLegacyPass::ID = 0; | |||
4836 | ||||
4837 | INITIALIZE_PASS_BEGIN(OpenMPOptCGSCCLegacyPass, "openmp-opt-cgscc",static void *initializeOpenMPOptCGSCCLegacyPassPassOnce(PassRegistry &Registry) { | |||
4838 | "OpenMP specific optimizations", false, false)static void *initializeOpenMPOptCGSCCLegacyPassPassOnce(PassRegistry &Registry) { | |||
4839 | INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)initializeCallGraphWrapperPassPass(Registry); | |||
4840 | INITIALIZE_PASS_END(OpenMPOptCGSCCLegacyPass, "openmp-opt-cgscc",PassInfo *PI = new PassInfo( "OpenMP specific optimizations", "openmp-opt-cgscc", &OpenMPOptCGSCCLegacyPass::ID, PassInfo ::NormalCtor_t(callDefaultCtor<OpenMPOptCGSCCLegacyPass> ), false, false); Registry.registerPass(*PI, true); return PI ; } static llvm::once_flag InitializeOpenMPOptCGSCCLegacyPassPassFlag ; void llvm::initializeOpenMPOptCGSCCLegacyPassPass(PassRegistry &Registry) { llvm::call_once(InitializeOpenMPOptCGSCCLegacyPassPassFlag , initializeOpenMPOptCGSCCLegacyPassPassOnce, std::ref(Registry )); } | |||
4841 | "OpenMP specific optimizations", false, false)PassInfo *PI = new PassInfo( "OpenMP specific optimizations", "openmp-opt-cgscc", &OpenMPOptCGSCCLegacyPass::ID, PassInfo ::NormalCtor_t(callDefaultCtor<OpenMPOptCGSCCLegacyPass> ), false, false); Registry.registerPass(*PI, true); return PI ; } static llvm::once_flag InitializeOpenMPOptCGSCCLegacyPassPassFlag ; void llvm::initializeOpenMPOptCGSCCLegacyPassPass(PassRegistry &Registry) { llvm::call_once(InitializeOpenMPOptCGSCCLegacyPassPassFlag , initializeOpenMPOptCGSCCLegacyPassPassOnce, std::ref(Registry )); } | |||
4842 | ||||
4843 | Pass *llvm::createOpenMPOptCGSCCLegacyPass() { | |||
4844 | return new OpenMPOptCGSCCLegacyPass(); | |||
4845 | } |
1 | //===- Attributor.h --- Module-wide attribute deduction ---------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // Attributor: An inter procedural (abstract) "attribute" deduction framework. |
10 | // |
11 | // The Attributor framework is an inter procedural abstract analysis (fixpoint |
12 | // iteration analysis). The goal is to allow easy deduction of new attributes as |
13 | // well as information exchange between abstract attributes in-flight. |
14 | // |
15 | // The Attributor class is the driver and the link between the various abstract |
16 | // attributes. The Attributor will iterate until a fixpoint state is reached by |
17 | // all abstract attributes in-flight, or until it will enforce a pessimistic fix |
18 | // point because an iteration limit is reached. |
19 | // |
20 | // Abstract attributes, derived from the AbstractAttribute class, actually |
21 | // describe properties of the code. They can correspond to actual LLVM-IR |
22 | // attributes, or they can be more general, ultimately unrelated to LLVM-IR |
23 | // attributes. The latter is useful when an abstract attributes provides |
24 | // information to other abstract attributes in-flight but we might not want to |
25 | // manifest the information. The Attributor allows to query in-flight abstract |
26 | // attributes through the `Attributor::getAAFor` method (see the method |
27 | // description for an example). If the method is used by an abstract attribute |
28 | // P, and it results in an abstract attribute Q, the Attributor will |
29 | // automatically capture a potential dependence from Q to P. This dependence |
30 | // will cause P to be reevaluated whenever Q changes in the future. |
31 | // |
32 | // The Attributor will only reevaluate abstract attributes that might have |
33 | // changed since the last iteration. That means that the Attribute will not |
34 | // revisit all instructions/blocks/functions in the module but only query |
35 | // an update from a subset of the abstract attributes. |
36 | // |
37 | // The update method `AbstractAttribute::updateImpl` is implemented by the |
38 | // specific "abstract attribute" subclasses. The method is invoked whenever the |
39 | // currently assumed state (see the AbstractState class) might not be valid |
40 | // anymore. This can, for example, happen if the state was dependent on another |
41 | // abstract attribute that changed. In every invocation, the update method has |
42 | // to adjust the internal state of an abstract attribute to a point that is |
43 | // justifiable by the underlying IR and the current state of abstract attributes |
44 | // in-flight. Since the IR is given and assumed to be valid, the information |
45 | // derived from it can be assumed to hold. However, information derived from |
46 | // other abstract attributes is conditional on various things. If the justifying |
47 | // state changed, the `updateImpl` has to revisit the situation and potentially |
48 | // find another justification or limit the optimistic assumes made. |
49 | // |
50 | // Change is the key in this framework. Until a state of no-change, thus a |
51 | // fixpoint, is reached, the Attributor will query the abstract attributes |
52 | // in-flight to re-evaluate their state. If the (current) state is too |
53 | // optimistic, hence it cannot be justified anymore through other abstract |
54 | // attributes or the state of the IR, the state of the abstract attribute will |
55 | // have to change. Generally, we assume abstract attribute state to be a finite |
56 | // height lattice and the update function to be monotone. However, these |
57 | // conditions are not enforced because the iteration limit will guarantee |
58 | // termination. If an optimistic fixpoint is reached, or a pessimistic fix |
59 | // point is enforced after a timeout, the abstract attributes are tasked to |
60 | // manifest their result in the IR for passes to come. |
61 | // |
62 | // Attribute manifestation is not mandatory. If desired, there is support to |
63 | // generate a single or multiple LLVM-IR attributes already in the helper struct |
64 | // IRAttribute. In the simplest case, a subclass inherits from IRAttribute with |
65 | // a proper Attribute::AttrKind as template parameter. The Attributor |
66 | // manifestation framework will then create and place a new attribute if it is |
67 | // allowed to do so (based on the abstract state). Other use cases can be |
68 | // achieved by overloading AbstractAttribute or IRAttribute methods. |
69 | // |
70 | // |
71 | // The "mechanics" of adding a new "abstract attribute": |
72 | // - Define a class (transitively) inheriting from AbstractAttribute and one |
73 | // (which could be the same) that (transitively) inherits from AbstractState. |
74 | // For the latter, consider the already available BooleanState and |
75 | // {Inc,Dec,Bit}IntegerState if they fit your needs, e.g., you require only a |
76 | // number tracking or bit-encoding. |
77 | // - Implement all pure methods. Also use overloading if the attribute is not |
78 | // conforming with the "default" behavior: A (set of) LLVM-IR attribute(s) for |
79 | // an argument, call site argument, function return value, or function. See |
80 | // the class and method descriptions for more information on the two |
81 | // "Abstract" classes and their respective methods. |
82 | // - Register opportunities for the new abstract attribute in the |
83 | // `Attributor::identifyDefaultAbstractAttributes` method if it should be |
84 | // counted as a 'default' attribute. |
85 | // - Add sufficient tests. |
86 | // - Add a Statistics object for bookkeeping. If it is a simple (set of) |
87 | // attribute(s) manifested through the Attributor manifestation framework, see |
88 | // the bookkeeping function in Attributor.cpp. |
89 | // - If instructions with a certain opcode are interesting to the attribute, add |
90 | // that opcode to the switch in `Attributor::identifyAbstractAttributes`. This |
91 | // will make it possible to query all those instructions through the |
92 | // `InformationCache::getOpcodeInstMapForFunction` interface and eliminate the |
93 | // need to traverse the IR repeatedly. |
94 | // |
95 | //===----------------------------------------------------------------------===// |
96 | |
97 | #ifndef LLVM_TRANSFORMS_IPO_ATTRIBUTOR_H |
98 | #define LLVM_TRANSFORMS_IPO_ATTRIBUTOR_H |
99 | |
100 | #include "llvm/ADT/DenseSet.h" |
101 | #include "llvm/ADT/GraphTraits.h" |
102 | #include "llvm/ADT/MapVector.h" |
103 | #include "llvm/ADT/STLExtras.h" |
104 | #include "llvm/ADT/SetOperations.h" |
105 | #include "llvm/ADT/SetVector.h" |
106 | #include "llvm/ADT/Triple.h" |
107 | #include "llvm/ADT/iterator.h" |
108 | #include "llvm/Analysis/AssumeBundleQueries.h" |
109 | #include "llvm/Analysis/CFG.h" |
110 | #include "llvm/Analysis/CGSCCPassManager.h" |
111 | #include "llvm/Analysis/LazyCallGraph.h" |
112 | #include "llvm/Analysis/LoopInfo.h" |
113 | #include "llvm/Analysis/MustExecute.h" |
114 | #include "llvm/Analysis/OptimizationRemarkEmitter.h" |
115 | #include "llvm/Analysis/PostDominators.h" |
116 | #include "llvm/Analysis/TargetLibraryInfo.h" |
117 | #include "llvm/IR/AbstractCallSite.h" |
118 | #include "llvm/IR/ConstantRange.h" |
119 | #include "llvm/IR/PassManager.h" |
120 | #include "llvm/Support/Allocator.h" |
121 | #include "llvm/Support/Casting.h" |
122 | #include "llvm/Support/GraphWriter.h" |
123 | #include "llvm/Support/TimeProfiler.h" |
124 | #include "llvm/Transforms/Utils/CallGraphUpdater.h" |
125 | |
126 | namespace llvm { |
127 | |
128 | struct AADepGraphNode; |
129 | struct AADepGraph; |
130 | struct Attributor; |
131 | struct AbstractAttribute; |
132 | struct InformationCache; |
133 | struct AAIsDead; |
134 | struct AttributorCallGraph; |
135 | |
136 | class AAManager; |
137 | class AAResults; |
138 | class Function; |
139 | |
140 | /// Abstract Attribute helper functions. |
141 | namespace AA { |
142 | |
143 | /// Return true if \p V is dynamically unique, that is, there are no two |
144 | /// "instances" of \p V at runtime with different values. |
145 | bool isDynamicallyUnique(Attributor &A, const AbstractAttribute &QueryingAA, |
146 | const Value &V); |
147 | |
148 | /// Return true if \p V is a valid value in \p Scope, that is a constant or an |
149 | /// instruction/argument of \p Scope. |
150 | bool isValidInScope(const Value &V, const Function *Scope); |
151 | |
152 | /// Return true if \p V is a valid value at position \p CtxI, that is a |
153 | /// constant, an argument of the same function as \p CtxI, or an instruction in |
154 | /// that function that dominates \p CtxI. |
155 | bool isValidAtPosition(const Value &V, const Instruction &CtxI, |
156 | InformationCache &InfoCache); |
157 | |
158 | /// Try to convert \p V to type \p Ty without introducing new instructions. If |
159 | /// this is not possible return `nullptr`. Note: this function basically knows |
160 | /// how to cast various constants. |
161 | Value *getWithType(Value &V, Type &Ty); |
162 | |
163 | /// Return the combination of \p A and \p B such that the result is a possible |
164 | /// value of both. \p B is potentially casted to match the type \p Ty or the |
165 | /// type of \p A if \p Ty is null. |
166 | /// |
167 | /// Examples: |
168 | /// X + none => X |
169 | /// not_none + undef => not_none |
170 | /// V1 + V2 => nullptr |
171 | Optional<Value *> |
172 | combineOptionalValuesInAAValueLatice(const Optional<Value *> &A, |
173 | const Optional<Value *> &B, Type *Ty); |
174 | |
175 | /// Return the initial value of \p Obj with type \p Ty if that is a constant. |
176 | Constant *getInitialValueForObj(Value &Obj, Type &Ty); |
177 | |
178 | /// Collect all potential underlying objects of \p Ptr at position \p CtxI in |
179 | /// \p Objects. Assumed information is used and dependences onto \p QueryingAA |
180 | /// are added appropriately. |
181 | /// |
182 | /// \returns True if \p Objects contains all assumed underlying objects, and |
183 | /// false if something went wrong and the objects could not be |
184 | /// determined. |
185 | bool getAssumedUnderlyingObjects(Attributor &A, const Value &Ptr, |
186 | SmallVectorImpl<Value *> &Objects, |
187 | const AbstractAttribute &QueryingAA, |
188 | const Instruction *CtxI); |
189 | |
190 | /// Collect all potential values of the one stored by \p SI into |
191 | /// \p PotentialCopies. That is, the only copies that were made via the |
192 | /// store are assumed to be known and all in \p PotentialCopies. Dependences |
193 | /// onto \p QueryingAA are properly tracked, \p UsedAssumedInformation will |
194 | /// inform the caller if assumed information was used. |
195 | /// |
196 | /// \returns True if the assumed potential copies are all in \p PotentialCopies, |
197 | /// false if something went wrong and the copies could not be |
198 | /// determined. |
199 | bool getPotentialCopiesOfStoredValue( |
200 | Attributor &A, StoreInst &SI, SmallSetVector<Value *, 4> &PotentialCopies, |
201 | const AbstractAttribute &QueryingAA, bool &UsedAssumedInformation); |
202 | |
203 | } // namespace AA |
204 | |
205 | /// The value passed to the line option that defines the maximal initialization |
206 | /// chain length. |
207 | extern unsigned MaxInitializationChainLength; |
208 | |
209 | ///{ |
210 | enum class ChangeStatus { |
211 | CHANGED, |
212 | UNCHANGED, |
213 | }; |
214 | |
215 | ChangeStatus operator|(ChangeStatus l, ChangeStatus r); |
216 | ChangeStatus &operator|=(ChangeStatus &l, ChangeStatus r); |
217 | ChangeStatus operator&(ChangeStatus l, ChangeStatus r); |
218 | ChangeStatus &operator&=(ChangeStatus &l, ChangeStatus r); |
219 | |
220 | enum class DepClassTy { |
221 | REQUIRED, ///< The target cannot be valid if the source is not. |
222 | OPTIONAL, ///< The target may be valid if the source is not. |
223 | NONE, ///< Do not track a dependence between source and target. |
224 | }; |
225 | ///} |
226 | |
227 | /// The data structure for the nodes of a dependency graph |
228 | struct AADepGraphNode { |
229 | public: |
230 | virtual ~AADepGraphNode(){}; |
231 | using DepTy = PointerIntPair<AADepGraphNode *, 1>; |
232 | |
233 | protected: |
234 | /// Set of dependency graph nodes which should be updated if this one |
235 | /// is updated. The bit encodes if it is optional. |
236 | TinyPtrVector<DepTy> Deps; |
237 | |
238 | static AADepGraphNode *DepGetVal(DepTy &DT) { return DT.getPointer(); } |
239 | static AbstractAttribute *DepGetValAA(DepTy &DT) { |
240 | return cast<AbstractAttribute>(DT.getPointer()); |
241 | } |
242 | |
243 | operator AbstractAttribute *() { return cast<AbstractAttribute>(this); } |
244 | |
245 | public: |
246 | using iterator = |
247 | mapped_iterator<TinyPtrVector<DepTy>::iterator, decltype(&DepGetVal)>; |
248 | using aaiterator = |
249 | mapped_iterator<TinyPtrVector<DepTy>::iterator, decltype(&DepGetValAA)>; |
250 | |
251 | aaiterator begin() { return aaiterator(Deps.begin(), &DepGetValAA); } |
252 | aaiterator end() { return aaiterator(Deps.end(), &DepGetValAA); } |
253 | iterator child_begin() { return iterator(Deps.begin(), &DepGetVal); } |
254 | iterator child_end() { return iterator(Deps.end(), &DepGetVal); } |
255 | |
256 | virtual void print(raw_ostream &OS) const { OS << "AADepNode Impl\n"; } |
257 | TinyPtrVector<DepTy> &getDeps() { return Deps; } |
258 | |
259 | friend struct Attributor; |
260 | friend struct AADepGraph; |
261 | }; |
262 | |
263 | /// The data structure for the dependency graph |
264 | /// |
265 | /// Note that in this graph if there is an edge from A to B (A -> B), |
266 | /// then it means that B depends on A, and when the state of A is |
267 | /// updated, node B should also be updated |
268 | struct AADepGraph { |
269 | AADepGraph() {} |
270 | ~AADepGraph() {} |
271 | |
272 | using DepTy = AADepGraphNode::DepTy; |
273 | static AADepGraphNode *DepGetVal(DepTy &DT) { return DT.getPointer(); } |
274 | using iterator = |
275 | mapped_iterator<TinyPtrVector<DepTy>::iterator, decltype(&DepGetVal)>; |
276 | |
277 | /// There is no root node for the dependency graph. But the SCCIterator |
278 | /// requires a single entry point, so we maintain a fake("synthetic") root |
279 | /// node that depends on every node. |
280 | AADepGraphNode SyntheticRoot; |
281 | AADepGraphNode *GetEntryNode() { return &SyntheticRoot; } |
282 | |
283 | iterator begin() { return SyntheticRoot.child_begin(); } |
284 | iterator end() { return SyntheticRoot.child_end(); } |
285 | |
286 | void viewGraph(); |
287 | |
288 | /// Dump graph to file |
289 | void dumpGraph(); |
290 | |
291 | /// Print dependency graph |
292 | void print(); |
293 | }; |
294 | |
295 | /// Helper to describe and deal with positions in the LLVM-IR. |
296 | /// |
297 | /// A position in the IR is described by an anchor value and an "offset" that |
298 | /// could be the argument number, for call sites and arguments, or an indicator |
299 | /// of the "position kind". The kinds, specified in the Kind enum below, include |
300 | /// the locations in the attribute list, i.a., function scope and return value, |
301 | /// as well as a distinction between call sites and functions. Finally, there |
302 | /// are floating values that do not have a corresponding attribute list |
303 | /// position. |
304 | struct IRPosition { |
305 | // NOTE: In the future this definition can be changed to support recursive |
306 | // functions. |
307 | using CallBaseContext = CallBase; |
308 | |
309 | /// The positions we distinguish in the IR. |
310 | enum Kind : char { |
311 | IRP_INVALID, ///< An invalid position. |
312 | IRP_FLOAT, ///< A position that is not associated with a spot suitable |
313 | ///< for attributes. This could be any value or instruction. |
314 | IRP_RETURNED, ///< An attribute for the function return value. |
315 | IRP_CALL_SITE_RETURNED, ///< An attribute for a call site return value. |
316 | IRP_FUNCTION, ///< An attribute for a function (scope). |
317 | IRP_CALL_SITE, ///< An attribute for a call site (function scope). |
318 | IRP_ARGUMENT, ///< An attribute for a function argument. |
319 | IRP_CALL_SITE_ARGUMENT, ///< An attribute for a call site argument. |
320 | }; |
321 | |
322 | /// Default constructor available to create invalid positions implicitly. All |
323 | /// other positions need to be created explicitly through the appropriate |
324 | /// static member function. |
325 | IRPosition() : Enc(nullptr, ENC_VALUE) { verify(); } |
326 | |
327 | /// Create a position describing the value of \p V. |
328 | static const IRPosition value(const Value &V, |
329 | const CallBaseContext *CBContext = nullptr) { |
330 | if (auto *Arg = dyn_cast<Argument>(&V)) |
331 | return IRPosition::argument(*Arg, CBContext); |
332 | if (auto *CB = dyn_cast<CallBase>(&V)) |
333 | return IRPosition::callsite_returned(*CB); |
334 | return IRPosition(const_cast<Value &>(V), IRP_FLOAT, CBContext); |
335 | } |
336 | |
337 | /// Create a position describing the function scope of \p F. |
338 | /// \p CBContext is used for call base specific analysis. |
339 | static const IRPosition function(const Function &F, |
340 | const CallBaseContext *CBContext = nullptr) { |
341 | return IRPosition(const_cast<Function &>(F), IRP_FUNCTION, CBContext); |
342 | } |
343 | |
344 | /// Create a position describing the returned value of \p F. |
345 | /// \p CBContext is used for call base specific analysis. |
346 | static const IRPosition returned(const Function &F, |
347 | const CallBaseContext *CBContext = nullptr) { |
348 | return IRPosition(const_cast<Function &>(F), IRP_RETURNED, CBContext); |
349 | } |
350 | |
351 | /// Create a position describing the argument \p Arg. |
352 | /// \p CBContext is used for call base specific analysis. |
353 | static const IRPosition argument(const Argument &Arg, |
354 | const CallBaseContext *CBContext = nullptr) { |
355 | return IRPosition(const_cast<Argument &>(Arg), IRP_ARGUMENT, CBContext); |
356 | } |
357 | |
358 | /// Create a position describing the function scope of \p CB. |
359 | static const IRPosition callsite_function(const CallBase &CB) { |
360 | return IRPosition(const_cast<CallBase &>(CB), IRP_CALL_SITE); |
361 | } |
362 | |
363 | /// Create a position describing the returned value of \p CB. |
364 | static const IRPosition callsite_returned(const CallBase &CB) { |
365 | return IRPosition(const_cast<CallBase &>(CB), IRP_CALL_SITE_RETURNED); |
366 | } |
367 | |
368 | /// Create a position describing the argument of \p CB at position \p ArgNo. |
369 | static const IRPosition callsite_argument(const CallBase &CB, |
370 | unsigned ArgNo) { |
371 | return IRPosition(const_cast<Use &>(CB.getArgOperandUse(ArgNo)), |
372 | IRP_CALL_SITE_ARGUMENT); |
373 | } |
374 | |
375 | /// Create a position describing the argument of \p ACS at position \p ArgNo. |
376 | static const IRPosition callsite_argument(AbstractCallSite ACS, |
377 | unsigned ArgNo) { |
378 | if (ACS.getNumArgOperands() <= ArgNo) |
379 | return IRPosition(); |
380 | int CSArgNo = ACS.getCallArgOperandNo(ArgNo); |
381 | if (CSArgNo >= 0) |
382 | return IRPosition::callsite_argument( |
383 | cast<CallBase>(*ACS.getInstruction()), CSArgNo); |
384 | return IRPosition(); |
385 | } |
386 | |
387 | /// Create a position with function scope matching the "context" of \p IRP. |
388 | /// If \p IRP is a call site (see isAnyCallSitePosition()) then the result |
389 | /// will be a call site position, otherwise the function position of the |
390 | /// associated function. |
391 | static const IRPosition |
392 | function_scope(const IRPosition &IRP, |
393 | const CallBaseContext *CBContext = nullptr) { |
394 | if (IRP.isAnyCallSitePosition()) { |
395 | return IRPosition::callsite_function( |
396 | cast<CallBase>(IRP.getAnchorValue())); |
397 | } |
398 | assert(IRP.getAssociatedFunction())(static_cast <bool> (IRP.getAssociatedFunction()) ? void (0) : __assert_fail ("IRP.getAssociatedFunction()", "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/include/llvm/Transforms/IPO/Attributor.h" , 398, __extension__ __PRETTY_FUNCTION__)); |
399 | return IRPosition::function(*IRP.getAssociatedFunction(), CBContext); |
400 | } |
401 | |
402 | bool operator==(const IRPosition &RHS) const { |
403 | return Enc == RHS.Enc && RHS.CBContext == CBContext; |
404 | } |
405 | bool operator!=(const IRPosition &RHS) const { return !(*this == RHS); } |
406 | |
407 | /// Return the value this abstract attribute is anchored with. |
408 | /// |
409 | /// The anchor value might not be the associated value if the latter is not |
410 | /// sufficient to determine where arguments will be manifested. This is, so |
411 | /// far, only the case for call site arguments as the value is not sufficient |
412 | /// to pinpoint them. Instead, we can use the call site as an anchor. |
413 | Value &getAnchorValue() const { |
414 | switch (getEncodingBits()) { |
415 | case ENC_VALUE: |
416 | case ENC_RETURNED_VALUE: |
417 | case ENC_FLOATING_FUNCTION: |
418 | return *getAsValuePtr(); |
419 | case ENC_CALL_SITE_ARGUMENT_USE: |
420 | return *(getAsUsePtr()->getUser()); |
421 | default: |
422 | llvm_unreachable("Unkown encoding!")::llvm::llvm_unreachable_internal("Unkown encoding!", "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/include/llvm/Transforms/IPO/Attributor.h" , 422); |
423 | }; |
424 | } |
425 | |
426 | /// Return the associated function, if any. |
427 | Function *getAssociatedFunction() const { |
428 | if (auto *CB = dyn_cast<CallBase>(&getAnchorValue())) { |
429 | // We reuse the logic that associates callback calles to arguments of a |
430 | // call site here to identify the callback callee as the associated |
431 | // function. |
432 | if (Argument *Arg = getAssociatedArgument()) |
433 | return Arg->getParent(); |
434 | return CB->getCalledFunction(); |
435 | } |
436 | return getAnchorScope(); |
437 | } |
438 | |
439 | /// Return the associated argument, if any. |
440 | Argument *getAssociatedArgument() const; |
441 | |
442 | /// Return true if the position refers to a function interface, that is the |
443 | /// function scope, the function return, or an argument. |
444 | bool isFnInterfaceKind() const { |
445 | switch (getPositionKind()) { |
446 | case IRPosition::IRP_FUNCTION: |
447 | case IRPosition::IRP_RETURNED: |
448 | case IRPosition::IRP_ARGUMENT: |
449 | return true; |
450 | default: |
451 | return false; |
452 | } |
453 | } |
454 | |
455 | /// Return the Function surrounding the anchor value. |
456 | Function *getAnchorScope() const { |
457 | Value &V = getAnchorValue(); |
458 | if (isa<Function>(V)) |
459 | return &cast<Function>(V); |
460 | if (isa<Argument>(V)) |
461 | return cast<Argument>(V).getParent(); |
462 | if (isa<Instruction>(V)) |
463 | return cast<Instruction>(V).getFunction(); |
464 | return nullptr; |
465 | } |
466 | |
467 | /// Return the context instruction, if any. |
468 | Instruction *getCtxI() const { |
469 | Value &V = getAnchorValue(); |
470 | if (auto *I = dyn_cast<Instruction>(&V)) |
471 | return I; |
472 | if (auto *Arg = dyn_cast<Argument>(&V)) |
473 | if (!Arg->getParent()->isDeclaration()) |
474 | return &Arg->getParent()->getEntryBlock().front(); |
475 | if (auto *F = dyn_cast<Function>(&V)) |
476 | if (!F->isDeclaration()) |
477 | return &(F->getEntryBlock().front()); |
478 | return nullptr; |
479 | } |
480 | |
481 | /// Return the value this abstract attribute is associated with. |
482 | Value &getAssociatedValue() const { |
483 | if (getCallSiteArgNo() < 0 || isa<Argument>(&getAnchorValue())) |
484 | return getAnchorValue(); |
485 | assert(isa<CallBase>(&getAnchorValue()) && "Expected a call base!")(static_cast <bool> (isa<CallBase>(&getAnchorValue ()) && "Expected a call base!") ? void (0) : __assert_fail ("isa<CallBase>(&getAnchorValue()) && \"Expected a call base!\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/include/llvm/Transforms/IPO/Attributor.h" , 485, __extension__ __PRETTY_FUNCTION__)); |
486 | return *cast<CallBase>(&getAnchorValue()) |
487 | ->getArgOperand(getCallSiteArgNo()); |
488 | } |
489 | |
490 | /// Return the type this abstract attribute is associated with. |
491 | Type *getAssociatedType() const { |
492 | if (getPositionKind() == IRPosition::IRP_RETURNED) |
493 | return getAssociatedFunction()->getReturnType(); |
494 | return getAssociatedValue().getType(); |
495 | } |
496 | |
497 | /// Return the callee argument number of the associated value if it is an |
498 | /// argument or call site argument, otherwise a negative value. In contrast to |
499 | /// `getCallSiteArgNo` this method will always return the "argument number" |
500 | /// from the perspective of the callee. This may not the same as the call site |
501 | /// if this is a callback call. |
502 | int getCalleeArgNo() const { |
503 | return getArgNo(/* CallbackCalleeArgIfApplicable */ true); |
504 | } |
505 | |
506 | /// Return the call site argument number of the associated value if it is an |
507 | /// argument or call site argument, otherwise a negative value. In contrast to |
508 | /// `getCalleArgNo` this method will always return the "operand number" from |
509 | /// the perspective of the call site. This may not the same as the callee |
510 | /// perspective if this is a callback call. |
511 | int getCallSiteArgNo() const { |
512 | return getArgNo(/* CallbackCalleeArgIfApplicable */ false); |
513 | } |
514 | |
515 | /// Return the index in the attribute list for this position. |
516 | unsigned getAttrIdx() const { |
517 | switch (getPositionKind()) { |
518 | case IRPosition::IRP_INVALID: |
519 | case IRPosition::IRP_FLOAT: |
520 | break; |
521 | case IRPosition::IRP_FUNCTION: |
522 | case IRPosition::IRP_CALL_SITE: |
523 | return AttributeList::FunctionIndex; |
524 | case IRPosition::IRP_RETURNED: |
525 | case IRPosition::IRP_CALL_SITE_RETURNED: |
526 | return AttributeList::ReturnIndex; |
527 | case IRPosition::IRP_ARGUMENT: |
528 | case IRPosition::IRP_CALL_SITE_ARGUMENT: |
529 | return getCallSiteArgNo() + AttributeList::FirstArgIndex; |
530 | } |
531 | llvm_unreachable(::llvm::llvm_unreachable_internal("There is no attribute index for a floating or invalid position!" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/include/llvm/Transforms/IPO/Attributor.h" , 532) |
532 | "There is no attribute index for a floating or invalid position!")::llvm::llvm_unreachable_internal("There is no attribute index for a floating or invalid position!" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/include/llvm/Transforms/IPO/Attributor.h" , 532); |
533 | } |
534 | |
535 | /// Return the associated position kind. |
536 | Kind getPositionKind() const { |
537 | char EncodingBits = getEncodingBits(); |
538 | if (EncodingBits == ENC_CALL_SITE_ARGUMENT_USE) |
539 | return IRP_CALL_SITE_ARGUMENT; |
540 | if (EncodingBits == ENC_FLOATING_FUNCTION) |
541 | return IRP_FLOAT; |
542 | |
543 | Value *V = getAsValuePtr(); |
544 | if (!V) |
545 | return IRP_INVALID; |
546 | if (isa<Argument>(V)) |
547 | return IRP_ARGUMENT; |
548 | if (isa<Function>(V)) |
549 | return isReturnPosition(EncodingBits) ? IRP_RETURNED : IRP_FUNCTION; |
550 | if (isa<CallBase>(V)) |
551 | return isReturnPosition(EncodingBits) ? IRP_CALL_SITE_RETURNED |
552 | : IRP_CALL_SITE; |
553 | return IRP_FLOAT; |
554 | } |
555 | |
556 | /// TODO: Figure out if the attribute related helper functions should live |
557 | /// here or somewhere else. |
558 | |
559 | /// Return true if any kind in \p AKs existing in the IR at a position that |
560 | /// will affect this one. See also getAttrs(...). |
561 | /// \param IgnoreSubsumingPositions Flag to determine if subsuming positions, |
562 | /// e.g., the function position if this is an |
563 | /// argument position, should be ignored. |
564 | bool hasAttr(ArrayRef<Attribute::AttrKind> AKs, |
565 | bool IgnoreSubsumingPositions = false, |
566 | Attributor *A = nullptr) const; |
567 | |
568 | /// Return the attributes of any kind in \p AKs existing in the IR at a |
569 | /// position that will affect this one. While each position can only have a |
570 | /// single attribute of any kind in \p AKs, there are "subsuming" positions |
571 | /// that could have an attribute as well. This method returns all attributes |
572 | /// found in \p Attrs. |
573 | /// \param IgnoreSubsumingPositions Flag to determine if subsuming positions, |
574 | /// e.g., the function position if this is an |
575 | /// argument position, should be ignored. |
576 | void getAttrs(ArrayRef<Attribute::AttrKind> AKs, |
577 | SmallVectorImpl<Attribute> &Attrs, |
578 | bool IgnoreSubsumingPositions = false, |
579 | Attributor *A = nullptr) const; |
580 | |
581 | /// Remove the attribute of kind \p AKs existing in the IR at this position. |
582 | void removeAttrs(ArrayRef<Attribute::AttrKind> AKs) const { |
583 | if (getPositionKind() == IRP_INVALID || getPositionKind() == IRP_FLOAT) |
584 | return; |
585 | |
586 | AttributeList AttrList; |
587 | auto *CB = dyn_cast<CallBase>(&getAnchorValue()); |
588 | if (CB) |
589 | AttrList = CB->getAttributes(); |
590 | else |
591 | AttrList = getAssociatedFunction()->getAttributes(); |
592 | |
593 | LLVMContext &Ctx = getAnchorValue().getContext(); |
594 | for (Attribute::AttrKind AK : AKs) |
595 | AttrList = AttrList.removeAttributeAtIndex(Ctx, getAttrIdx(), AK); |
596 | |
597 | if (CB) |
598 | CB->setAttributes(AttrList); |
599 | else |
600 | getAssociatedFunction()->setAttributes(AttrList); |
601 | } |
602 | |
603 | bool isAnyCallSitePosition() const { |
604 | switch (getPositionKind()) { |
605 | case IRPosition::IRP_CALL_SITE: |
606 | case IRPosition::IRP_CALL_SITE_RETURNED: |
607 | case IRPosition::IRP_CALL_SITE_ARGUMENT: |
608 | return true; |
609 | default: |
610 | return false; |
611 | } |
612 | } |
613 | |
614 | /// Return true if the position is an argument or call site argument. |
615 | bool isArgumentPosition() const { |
616 | switch (getPositionKind()) { |
617 | case IRPosition::IRP_ARGUMENT: |
618 | case IRPosition::IRP_CALL_SITE_ARGUMENT: |
619 | return true; |
620 | default: |
621 | return false; |
622 | } |
623 | } |
624 | |
625 | /// Return the same position without the call base context. |
626 | IRPosition stripCallBaseContext() const { |
627 | IRPosition Result = *this; |
628 | Result.CBContext = nullptr; |
629 | return Result; |
630 | } |
631 | |
632 | /// Get the call base context from the position. |
633 | const CallBaseContext *getCallBaseContext() const { return CBContext; } |
634 | |
635 | /// Check if the position has any call base context. |
636 | bool hasCallBaseContext() const { return CBContext != nullptr; } |
637 | |
638 | /// Special DenseMap key values. |
639 | /// |
640 | ///{ |
641 | static const IRPosition EmptyKey; |
642 | static const IRPosition TombstoneKey; |
643 | ///} |
644 | |
645 | /// Conversion into a void * to allow reuse of pointer hashing. |
646 | operator void *() const { return Enc.getOpaqueValue(); } |
647 | |
648 | private: |
649 | /// Private constructor for special values only! |
650 | explicit IRPosition(void *Ptr, const CallBaseContext *CBContext = nullptr) |
651 | : CBContext(CBContext) { |
652 | Enc.setFromOpaqueValue(Ptr); |
653 | } |
654 | |
655 | /// IRPosition anchored at \p AnchorVal with kind/argument numbet \p PK. |
656 | explicit IRPosition(Value &AnchorVal, Kind PK, |
657 | const CallBaseContext *CBContext = nullptr) |
658 | : CBContext(CBContext) { |
659 | switch (PK) { |
660 | case IRPosition::IRP_INVALID: |
661 | llvm_unreachable("Cannot create invalid IRP with an anchor value!")::llvm::llvm_unreachable_internal("Cannot create invalid IRP with an anchor value!" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/include/llvm/Transforms/IPO/Attributor.h" , 661); |
662 | break; |
663 | case IRPosition::IRP_FLOAT: |
664 | // Special case for floating functions. |
665 | if (isa<Function>(AnchorVal)) |
666 | Enc = {&AnchorVal, ENC_FLOATING_FUNCTION}; |
667 | else |
668 | Enc = {&AnchorVal, ENC_VALUE}; |
669 | break; |
670 | case IRPosition::IRP_FUNCTION: |
671 | case IRPosition::IRP_CALL_SITE: |
672 | Enc = {&AnchorVal, ENC_VALUE}; |
673 | break; |
674 | case IRPosition::IRP_RETURNED: |
675 | case IRPosition::IRP_CALL_SITE_RETURNED: |
676 | Enc = {&AnchorVal, ENC_RETURNED_VALUE}; |
677 | break; |
678 | case IRPosition::IRP_ARGUMENT: |
679 | Enc = {&AnchorVal, ENC_VALUE}; |
680 | break; |
681 | case IRPosition::IRP_CALL_SITE_ARGUMENT: |
682 | llvm_unreachable(::llvm::llvm_unreachable_internal("Cannot create call site argument IRP with an anchor value!" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/include/llvm/Transforms/IPO/Attributor.h" , 683) |
683 | "Cannot create call site argument IRP with an anchor value!")::llvm::llvm_unreachable_internal("Cannot create call site argument IRP with an anchor value!" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/include/llvm/Transforms/IPO/Attributor.h" , 683); |
684 | break; |
685 | } |
686 | verify(); |
687 | } |
688 | |
689 | /// Return the callee argument number of the associated value if it is an |
690 | /// argument or call site argument. See also `getCalleeArgNo` and |
691 | /// `getCallSiteArgNo`. |
692 | int getArgNo(bool CallbackCalleeArgIfApplicable) const { |
693 | if (CallbackCalleeArgIfApplicable) |
694 | if (Argument *Arg = getAssociatedArgument()) |
695 | return Arg->getArgNo(); |
696 | switch (getPositionKind()) { |
697 | case IRPosition::IRP_ARGUMENT: |
698 | return cast<Argument>(getAsValuePtr())->getArgNo(); |
699 | case IRPosition::IRP_CALL_SITE_ARGUMENT: { |
700 | Use &U = *getAsUsePtr(); |
701 | return cast<CallBase>(U.getUser())->getArgOperandNo(&U); |
702 | } |
703 | default: |
704 | return -1; |
705 | } |
706 | } |
707 | |
708 | /// IRPosition for the use \p U. The position kind \p PK needs to be |
709 | /// IRP_CALL_SITE_ARGUMENT, the anchor value is the user, the associated value |
710 | /// the used value. |
711 | explicit IRPosition(Use &U, Kind PK) { |
712 | assert(PK == IRP_CALL_SITE_ARGUMENT &&(static_cast <bool> (PK == IRP_CALL_SITE_ARGUMENT && "Use constructor is for call site arguments only!") ? void ( 0) : __assert_fail ("PK == IRP_CALL_SITE_ARGUMENT && \"Use constructor is for call site arguments only!\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/include/llvm/Transforms/IPO/Attributor.h" , 713, __extension__ __PRETTY_FUNCTION__)) |
713 | "Use constructor is for call site arguments only!")(static_cast <bool> (PK == IRP_CALL_SITE_ARGUMENT && "Use constructor is for call site arguments only!") ? void ( 0) : __assert_fail ("PK == IRP_CALL_SITE_ARGUMENT && \"Use constructor is for call site arguments only!\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/include/llvm/Transforms/IPO/Attributor.h" , 713, __extension__ __PRETTY_FUNCTION__)); |
714 | Enc = {&U, ENC_CALL_SITE_ARGUMENT_USE}; |
715 | verify(); |
716 | } |
717 | |
718 | /// Verify internal invariants. |
719 | void verify(); |
720 | |
721 | /// Return the attributes of kind \p AK existing in the IR as attribute. |
722 | bool getAttrsFromIRAttr(Attribute::AttrKind AK, |
723 | SmallVectorImpl<Attribute> &Attrs) const; |
724 | |
725 | /// Return the attributes of kind \p AK existing in the IR as operand bundles |
726 | /// of an llvm.assume. |
727 | bool getAttrsFromAssumes(Attribute::AttrKind AK, |
728 | SmallVectorImpl<Attribute> &Attrs, |
729 | Attributor &A) const; |
730 | |
731 | /// Return the underlying pointer as Value *, valid for all positions but |
732 | /// IRP_CALL_SITE_ARGUMENT. |
733 | Value *getAsValuePtr() const { |
734 | assert(getEncodingBits() != ENC_CALL_SITE_ARGUMENT_USE &&(static_cast <bool> (getEncodingBits() != ENC_CALL_SITE_ARGUMENT_USE && "Not a value pointer!") ? void (0) : __assert_fail ("getEncodingBits() != ENC_CALL_SITE_ARGUMENT_USE && \"Not a value pointer!\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/include/llvm/Transforms/IPO/Attributor.h" , 735, __extension__ __PRETTY_FUNCTION__)) |
735 | "Not a value pointer!")(static_cast <bool> (getEncodingBits() != ENC_CALL_SITE_ARGUMENT_USE && "Not a value pointer!") ? void (0) : __assert_fail ("getEncodingBits() != ENC_CALL_SITE_ARGUMENT_USE && \"Not a value pointer!\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/include/llvm/Transforms/IPO/Attributor.h" , 735, __extension__ __PRETTY_FUNCTION__)); |
736 | return reinterpret_cast<Value *>(Enc.getPointer()); |
737 | } |
738 | |
739 | /// Return the underlying pointer as Use *, valid only for |
740 | /// IRP_CALL_SITE_ARGUMENT positions. |
741 | Use *getAsUsePtr() const { |
742 | assert(getEncodingBits() == ENC_CALL_SITE_ARGUMENT_USE &&(static_cast <bool> (getEncodingBits() == ENC_CALL_SITE_ARGUMENT_USE && "Not a value pointer!") ? void (0) : __assert_fail ("getEncodingBits() == ENC_CALL_SITE_ARGUMENT_USE && \"Not a value pointer!\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/include/llvm/Transforms/IPO/Attributor.h" , 743, __extension__ __PRETTY_FUNCTION__)) |
743 | "Not a value pointer!")(static_cast <bool> (getEncodingBits() == ENC_CALL_SITE_ARGUMENT_USE && "Not a value pointer!") ? void (0) : __assert_fail ("getEncodingBits() == ENC_CALL_SITE_ARGUMENT_USE && \"Not a value pointer!\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/include/llvm/Transforms/IPO/Attributor.h" , 743, __extension__ __PRETTY_FUNCTION__)); |
744 | return reinterpret_cast<Use *>(Enc.getPointer()); |
745 | } |
746 | |
747 | /// Return true if \p EncodingBits describe a returned or call site returned |
748 | /// position. |
749 | static bool isReturnPosition(char EncodingBits) { |
750 | return EncodingBits == ENC_RETURNED_VALUE; |
751 | } |
752 | |
753 | /// Return true if the encoding bits describe a returned or call site returned |
754 | /// position. |
755 | bool isReturnPosition() const { return isReturnPosition(getEncodingBits()); } |
756 | |
757 | /// The encoding of the IRPosition is a combination of a pointer and two |
758 | /// encoding bits. The values of the encoding bits are defined in the enum |
759 | /// below. The pointer is either a Value* (for the first three encoding bit |
760 | /// combinations) or Use* (for ENC_CALL_SITE_ARGUMENT_USE). |
761 | /// |
762 | ///{ |
763 | enum { |
764 | ENC_VALUE = 0b00, |
765 | ENC_RETURNED_VALUE = 0b01, |
766 | ENC_FLOATING_FUNCTION = 0b10, |
767 | ENC_CALL_SITE_ARGUMENT_USE = 0b11, |
768 | }; |
769 | |
770 | // Reserve the maximal amount of bits so there is no need to mask out the |
771 | // remaining ones. We will not encode anything else in the pointer anyway. |
772 | static constexpr int NumEncodingBits = |
773 | PointerLikeTypeTraits<void *>::NumLowBitsAvailable; |
774 | static_assert(NumEncodingBits >= 2, "At least two bits are required!"); |
775 | |
776 | /// The pointer with the encoding bits. |
777 | PointerIntPair<void *, NumEncodingBits, char> Enc; |
778 | ///} |
779 | |
780 | /// Call base context. Used for callsite specific analysis. |
781 | const CallBaseContext *CBContext = nullptr; |
782 | |
783 | /// Return the encoding bits. |
784 | char getEncodingBits() const { return Enc.getInt(); } |
785 | }; |
786 | |
787 | /// Helper that allows IRPosition as a key in a DenseMap. |
788 | template <> struct DenseMapInfo<IRPosition> { |
789 | static inline IRPosition getEmptyKey() { return IRPosition::EmptyKey; } |
790 | static inline IRPosition getTombstoneKey() { |
791 | return IRPosition::TombstoneKey; |
792 | } |
793 | static unsigned getHashValue(const IRPosition &IRP) { |
794 | return (DenseMapInfo<void *>::getHashValue(IRP) << 4) ^ |
795 | (DenseMapInfo<Value *>::getHashValue(IRP.getCallBaseContext())); |
796 | } |
797 | |
798 | static bool isEqual(const IRPosition &a, const IRPosition &b) { |
799 | return a == b; |
800 | } |
801 | }; |
802 | |
803 | /// A visitor class for IR positions. |
804 | /// |
805 | /// Given a position P, the SubsumingPositionIterator allows to visit "subsuming |
806 | /// positions" wrt. attributes/information. Thus, if a piece of information |
807 | /// holds for a subsuming position, it also holds for the position P. |
808 | /// |
809 | /// The subsuming positions always include the initial position and then, |
810 | /// depending on the position kind, additionally the following ones: |
811 | /// - for IRP_RETURNED: |
812 | /// - the function (IRP_FUNCTION) |
813 | /// - for IRP_ARGUMENT: |
814 | /// - the function (IRP_FUNCTION) |
815 | /// - for IRP_CALL_SITE: |
816 | /// - the callee (IRP_FUNCTION), if known |
817 | /// - for IRP_CALL_SITE_RETURNED: |
818 | /// - the callee (IRP_RETURNED), if known |
819 | /// - the call site (IRP_FUNCTION) |
820 | /// - the callee (IRP_FUNCTION), if known |
821 | /// - for IRP_CALL_SITE_ARGUMENT: |
822 | /// - the argument of the callee (IRP_ARGUMENT), if known |
823 | /// - the callee (IRP_FUNCTION), if known |
824 | /// - the position the call site argument is associated with if it is not |
825 | /// anchored to the call site, e.g., if it is an argument then the argument |
826 | /// (IRP_ARGUMENT) |
827 | class SubsumingPositionIterator { |
828 | SmallVector<IRPosition, 4> IRPositions; |
829 | using iterator = decltype(IRPositions)::iterator; |
830 | |
831 | public: |
832 | SubsumingPositionIterator(const IRPosition &IRP); |
833 | iterator begin() { return IRPositions.begin(); } |
834 | iterator end() { return IRPositions.end(); } |
835 | }; |
836 | |
837 | /// Wrapper for FunctoinAnalysisManager. |
838 | struct AnalysisGetter { |
839 | template <typename Analysis> |
840 | typename Analysis::Result *getAnalysis(const Function &F) { |
841 | if (!FAM || !F.getParent()) |
842 | return nullptr; |
843 | return &FAM->getResult<Analysis>(const_cast<Function &>(F)); |
844 | } |
845 | |
846 | AnalysisGetter(FunctionAnalysisManager &FAM) : FAM(&FAM) {} |
847 | AnalysisGetter() {} |
848 | |
849 | private: |
850 | FunctionAnalysisManager *FAM = nullptr; |
851 | }; |
852 | |
853 | /// Data structure to hold cached (LLVM-IR) information. |
854 | /// |
855 | /// All attributes are given an InformationCache object at creation time to |
856 | /// avoid inspection of the IR by all of them individually. This default |
857 | /// InformationCache will hold information required by 'default' attributes, |
858 | /// thus the ones deduced when Attributor::identifyDefaultAbstractAttributes(..) |
859 | /// is called. |
860 | /// |
861 | /// If custom abstract attributes, registered manually through |
862 | /// Attributor::registerAA(...), need more information, especially if it is not |
863 | /// reusable, it is advised to inherit from the InformationCache and cast the |
864 | /// instance down in the abstract attributes. |
865 | struct InformationCache { |
866 | InformationCache(const Module &M, AnalysisGetter &AG, |
867 | BumpPtrAllocator &Allocator, SetVector<Function *> *CGSCC) |
868 | : DL(M.getDataLayout()), Allocator(Allocator), |
869 | Explorer( |
870 | /* ExploreInterBlock */ true, /* ExploreCFGForward */ true, |
871 | /* ExploreCFGBackward */ true, |
872 | /* LIGetter */ |
873 | [&](const Function &F) { return AG.getAnalysis<LoopAnalysis>(F); }, |
874 | /* DTGetter */ |
875 | [&](const Function &F) { |
876 | return AG.getAnalysis<DominatorTreeAnalysis>(F); |
877 | }, |
878 | /* PDTGetter */ |
879 | [&](const Function &F) { |
880 | return AG.getAnalysis<PostDominatorTreeAnalysis>(F); |
881 | }), |
882 | AG(AG), CGSCC(CGSCC), TargetTriple(M.getTargetTriple()) { |
883 | if (CGSCC) |
884 | initializeModuleSlice(*CGSCC); |
885 | } |
886 | |
887 | ~InformationCache() { |
888 | // The FunctionInfo objects are allocated via a BumpPtrAllocator, we call |
889 | // the destructor manually. |
890 | for (auto &It : FuncInfoMap) |
891 | It.getSecond()->~FunctionInfo(); |
892 | } |
893 | |
894 | /// Apply \p CB to all uses of \p F. If \p LookThroughConstantExprUses is |
895 | /// true, constant expression users are not given to \p CB but their uses are |
896 | /// traversed transitively. |
897 | template <typename CBTy> |
898 | static void foreachUse(Function &F, CBTy CB, |
899 | bool LookThroughConstantExprUses = true) { |
900 | SmallVector<Use *, 8> Worklist(make_pointer_range(F.uses())); |
901 | |
902 | for (unsigned Idx = 0; Idx < Worklist.size(); ++Idx) { |
903 | Use &U = *Worklist[Idx]; |
904 | |
905 | // Allow use in constant bitcasts and simply look through them. |
906 | if (LookThroughConstantExprUses && isa<ConstantExpr>(U.getUser())) { |
907 | for (Use &CEU : cast<ConstantExpr>(U.getUser())->uses()) |
908 | Worklist.push_back(&CEU); |
909 | continue; |
910 | } |
911 | |
912 | CB(U); |
913 | } |
914 | } |
915 | |
916 | /// Initialize the ModuleSlice member based on \p SCC. ModuleSlices contains |
917 | /// (a subset of) all functions that we can look at during this SCC traversal. |
918 | /// This includes functions (transitively) called from the SCC and the |
919 | /// (transitive) callers of SCC functions. We also can look at a function if |
920 | /// there is a "reference edge", i.a., if the function somehow uses (!=calls) |
921 | /// a function in the SCC or a caller of a function in the SCC. |
922 | void initializeModuleSlice(SetVector<Function *> &SCC) { |
923 | ModuleSlice.insert(SCC.begin(), SCC.end()); |
924 | |
925 | SmallPtrSet<Function *, 16> Seen; |
926 | SmallVector<Function *, 16> Worklist(SCC.begin(), SCC.end()); |
927 | while (!Worklist.empty()) { |
928 | Function *F = Worklist.pop_back_val(); |
929 | ModuleSlice.insert(F); |
930 | |
931 | for (Instruction &I : instructions(*F)) |
932 | if (auto *CB = dyn_cast<CallBase>(&I)) |
933 | if (Function *Callee = CB->getCalledFunction()) |
934 | if (Seen.insert(Callee).second) |
935 | Worklist.push_back(Callee); |
936 | } |
937 | |
938 | Seen.clear(); |
939 | Worklist.append(SCC.begin(), SCC.end()); |
940 | while (!Worklist.empty()) { |
941 | Function *F = Worklist.pop_back_val(); |
942 | ModuleSlice.insert(F); |
943 | |
944 | // Traverse all transitive uses. |
945 | foreachUse(*F, [&](Use &U) { |
946 | if (auto *UsrI = dyn_cast<Instruction>(U.getUser())) |
947 | if (Seen.insert(UsrI->getFunction()).second) |
948 | Worklist.push_back(UsrI->getFunction()); |
949 | }); |
950 | } |
951 | } |
952 | |
953 | /// The slice of the module we are allowed to look at. |
954 | SmallPtrSet<Function *, 8> ModuleSlice; |
955 | |
956 | /// A vector type to hold instructions. |
957 | using InstructionVectorTy = SmallVector<Instruction *, 8>; |
958 | |
959 | /// A map type from opcodes to instructions with this opcode. |
960 | using OpcodeInstMapTy = DenseMap<unsigned, InstructionVectorTy *>; |
961 | |
962 | /// Return the map that relates "interesting" opcodes with all instructions |
963 | /// with that opcode in \p F. |
964 | OpcodeInstMapTy &getOpcodeInstMapForFunction(const Function &F) { |
965 | return getFunctionInfo(F).OpcodeInstMap; |
966 | } |
967 | |
968 | /// Return the instructions in \p F that may read or write memory. |
969 | InstructionVectorTy &getReadOrWriteInstsForFunction(const Function &F) { |
970 | return getFunctionInfo(F).RWInsts; |
971 | } |
972 | |
973 | /// Return MustBeExecutedContextExplorer |
974 | MustBeExecutedContextExplorer &getMustBeExecutedContextExplorer() { |
975 | return Explorer; |
976 | } |
977 | |
978 | /// Return TargetLibraryInfo for function \p F. |
979 | TargetLibraryInfo *getTargetLibraryInfoForFunction(const Function &F) { |
980 | return AG.getAnalysis<TargetLibraryAnalysis>(F); |
981 | } |
982 | |
983 | /// Return AliasAnalysis Result for function \p F. |
984 | AAResults *getAAResultsForFunction(const Function &F); |
985 | |
986 | /// Return true if \p Arg is involved in a must-tail call, thus the argument |
987 | /// of the caller or callee. |
988 | bool isInvolvedInMustTailCall(const Argument &Arg) { |
989 | FunctionInfo &FI = getFunctionInfo(*Arg.getParent()); |
990 | return FI.CalledViaMustTail || FI.ContainsMustTailCall; |
991 | } |
992 | |
993 | /// Return the analysis result from a pass \p AP for function \p F. |
994 | template <typename AP> |
995 | typename AP::Result *getAnalysisResultForFunction(const Function &F) { |
996 | return AG.getAnalysis<AP>(F); |
997 | } |
998 | |
999 | /// Return SCC size on call graph for function \p F or 0 if unknown. |
1000 | unsigned getSccSize(const Function &F) { |
1001 | if (CGSCC && CGSCC->count(const_cast<Function *>(&F))) |
1002 | return CGSCC->size(); |
1003 | return 0; |
1004 | } |
1005 | |
1006 | /// Return datalayout used in the module. |
1007 | const DataLayout &getDL() { return DL; } |
1008 | |
1009 | /// Return the map conaining all the knowledge we have from `llvm.assume`s. |
1010 | const RetainedKnowledgeMap &getKnowledgeMap() const { return KnowledgeMap; } |
1011 | |
1012 | /// Return if \p To is potentially reachable form \p From or not |
1013 | /// If the same query was answered, return cached result |
1014 | bool getPotentiallyReachable(const Instruction &From, const Instruction &To) { |
1015 | auto KeyPair = std::make_pair(&From, &To); |
1016 | auto Iter = PotentiallyReachableMap.find(KeyPair); |
1017 | if (Iter != PotentiallyReachableMap.end()) |
1018 | return Iter->second; |
1019 | const Function &F = *From.getFunction(); |
1020 | bool Result = true; |
1021 | if (From.getFunction() == To.getFunction()) |
1022 | Result = isPotentiallyReachable(&From, &To, nullptr, |
1023 | AG.getAnalysis<DominatorTreeAnalysis>(F), |
1024 | AG.getAnalysis<LoopAnalysis>(F)); |
1025 | PotentiallyReachableMap.insert(std::make_pair(KeyPair, Result)); |
1026 | return Result; |
1027 | } |
1028 | |
1029 | /// Check whether \p F is part of module slice. |
1030 | bool isInModuleSlice(const Function &F) { |
1031 | return ModuleSlice.count(const_cast<Function *>(&F)); |
1032 | } |
1033 | |
1034 | /// Return true if the stack (llvm::Alloca) can be accessed by other threads. |
1035 | bool stackIsAccessibleByOtherThreads() { return !targetIsGPU(); } |
1036 | |
1037 | /// Return true if the target is a GPU. |
1038 | bool targetIsGPU() { |
1039 | return TargetTriple.isAMDGPU() || TargetTriple.isNVPTX(); |
1040 | } |
1041 | |
1042 | private: |
1043 | struct FunctionInfo { |
1044 | ~FunctionInfo(); |
1045 | |
1046 | /// A nested map that remembers all instructions in a function with a |
1047 | /// certain instruction opcode (Instruction::getOpcode()). |
1048 | OpcodeInstMapTy OpcodeInstMap; |
1049 | |
1050 | /// A map from functions to their instructions that may read or write |
1051 | /// memory. |
1052 | InstructionVectorTy RWInsts; |
1053 | |
1054 | /// Function is called by a `musttail` call. |
1055 | bool CalledViaMustTail; |
1056 | |
1057 | /// Function contains a `musttail` call. |
1058 | bool ContainsMustTailCall; |
1059 | }; |
1060 | |
1061 | /// A map type from functions to informatio about it. |
1062 | DenseMap<const Function *, FunctionInfo *> FuncInfoMap; |
1063 | |
1064 | /// Return information about the function \p F, potentially by creating it. |
1065 | FunctionInfo &getFunctionInfo(const Function &F) { |
1066 | FunctionInfo *&FI = FuncInfoMap[&F]; |
1067 | if (!FI) { |
1068 | FI = new (Allocator) FunctionInfo(); |
1069 | initializeInformationCache(F, *FI); |
1070 | } |
1071 | return *FI; |
1072 | } |
1073 | |
1074 | /// Initialize the function information cache \p FI for the function \p F. |
1075 | /// |
1076 | /// This method needs to be called for all function that might be looked at |
1077 | /// through the information cache interface *prior* to looking at them. |
1078 | void initializeInformationCache(const Function &F, FunctionInfo &FI); |
1079 | |
1080 | /// The datalayout used in the module. |
1081 | const DataLayout &DL; |
1082 | |
1083 | /// The allocator used to allocate memory, e.g. for `FunctionInfo`s. |
1084 | BumpPtrAllocator &Allocator; |
1085 | |
1086 | /// MustBeExecutedContextExplorer |
1087 | MustBeExecutedContextExplorer Explorer; |
1088 | |
1089 | /// A map with knowledge retained in `llvm.assume` instructions. |
1090 | RetainedKnowledgeMap KnowledgeMap; |
1091 | |
1092 | /// Getters for analysis. |
1093 | AnalysisGetter &AG; |
1094 | |
1095 | /// The underlying CGSCC, or null if not available. |
1096 | SetVector<Function *> *CGSCC; |
1097 | |
1098 | /// Set of inlineable functions |
1099 | SmallPtrSet<const Function *, 8> InlineableFunctions; |
1100 | |
1101 | /// A map for caching results of queries for isPotentiallyReachable |
1102 | DenseMap<std::pair<const Instruction *, const Instruction *>, bool> |
1103 | PotentiallyReachableMap; |
1104 | |
1105 | /// The triple describing the target machine. |
1106 | Triple TargetTriple; |
1107 | |
1108 | /// Give the Attributor access to the members so |
1109 | /// Attributor::identifyDefaultAbstractAttributes(...) can initialize them. |
1110 | friend struct Attributor; |
1111 | }; |
1112 | |
1113 | /// The fixpoint analysis framework that orchestrates the attribute deduction. |
1114 | /// |
1115 | /// The Attributor provides a general abstract analysis framework (guided |
1116 | /// fixpoint iteration) as well as helper functions for the deduction of |
1117 | /// (LLVM-IR) attributes. However, also other code properties can be deduced, |
1118 | /// propagated, and ultimately manifested through the Attributor framework. This |
1119 | /// is particularly useful if these properties interact with attributes and a |
1120 | /// co-scheduled deduction allows to improve the solution. Even if not, thus if |
1121 | /// attributes/properties are completely isolated, they should use the |
1122 | /// Attributor framework to reduce the number of fixpoint iteration frameworks |
1123 | /// in the code base. Note that the Attributor design makes sure that isolated |
1124 | /// attributes are not impacted, in any way, by others derived at the same time |
1125 | /// if there is no cross-reasoning performed. |
1126 | /// |
1127 | /// The public facing interface of the Attributor is kept simple and basically |
1128 | /// allows abstract attributes to one thing, query abstract attributes |
1129 | /// in-flight. There are two reasons to do this: |
1130 | /// a) The optimistic state of one abstract attribute can justify an |
1131 | /// optimistic state of another, allowing to framework to end up with an |
1132 | /// optimistic (=best possible) fixpoint instead of one based solely on |
1133 | /// information in the IR. |
1134 | /// b) This avoids reimplementing various kinds of lookups, e.g., to check |
1135 | /// for existing IR attributes, in favor of a single lookups interface |
1136 | /// provided by an abstract attribute subclass. |
1137 | /// |
1138 | /// NOTE: The mechanics of adding a new "concrete" abstract attribute are |
1139 | /// described in the file comment. |
1140 | struct Attributor { |
1141 | |
1142 | using OptimizationRemarkGetter = |
1143 | function_ref<OptimizationRemarkEmitter &(Function *)>; |
1144 | |
1145 | /// Constructor |
1146 | /// |
1147 | /// \param Functions The set of functions we are deriving attributes for. |
1148 | /// \param InfoCache Cache to hold various information accessible for |
1149 | /// the abstract attributes. |
1150 | /// \param CGUpdater Helper to update an underlying call graph. |
1151 | /// \param Allowed If not null, a set limiting the attribute opportunities. |
1152 | /// \param DeleteFns Whether to delete functions. |
1153 | /// \param RewriteSignatures Whether to rewrite function signatures. |
1154 | Attributor(SetVector<Function *> &Functions, InformationCache &InfoCache, |
1155 | CallGraphUpdater &CGUpdater, |
1156 | DenseSet<const char *> *Allowed = nullptr, bool DeleteFns = true, |
1157 | bool RewriteSignatures = true) |
1158 | : Allocator(InfoCache.Allocator), Functions(Functions), |
1159 | InfoCache(InfoCache), CGUpdater(CGUpdater), Allowed(Allowed), |
1160 | DeleteFns(DeleteFns), RewriteSignatures(RewriteSignatures), |
1161 | MaxFixpointIterations(None), OREGetter(None), PassName("") {} |
1162 | |
1163 | /// Constructor |
1164 | /// |
1165 | /// \param Functions The set of functions we are deriving attributes for. |
1166 | /// \param InfoCache Cache to hold various information accessible for |
1167 | /// the abstract attributes. |
1168 | /// \param CGUpdater Helper to update an underlying call graph. |
1169 | /// \param Allowed If not null, a set limiting the attribute opportunities. |
1170 | /// \param DeleteFns Whether to delete functions |
1171 | /// \param RewriteSignatures Whether to rewrite function signatures. |
1172 | /// \param MaxFixpointIterations Maximum number of iterations to run until |
1173 | /// fixpoint. |
1174 | /// \param OREGetter A callback function that returns an ORE object from a |
1175 | /// Function pointer. |
1176 | /// \param PassName The name of the pass emitting remarks. |
1177 | Attributor(SetVector<Function *> &Functions, InformationCache &InfoCache, |
1178 | CallGraphUpdater &CGUpdater, DenseSet<const char *> *Allowed, |
1179 | bool DeleteFns, bool RewriteSignatures, |
1180 | Optional<unsigned> MaxFixpointIterations, |
1181 | OptimizationRemarkGetter OREGetter, const char *PassName) |
1182 | : Allocator(InfoCache.Allocator), Functions(Functions), |
1183 | InfoCache(InfoCache), CGUpdater(CGUpdater), Allowed(Allowed), |
1184 | DeleteFns(DeleteFns), RewriteSignatures(RewriteSignatures), |
1185 | MaxFixpointIterations(MaxFixpointIterations), |
1186 | OREGetter(Optional<OptimizationRemarkGetter>(OREGetter)), |
1187 | PassName(PassName) {} |
1188 | |
1189 | ~Attributor(); |
1190 | |
1191 | /// Run the analyses until a fixpoint is reached or enforced (timeout). |
1192 | /// |
1193 | /// The attributes registered with this Attributor can be used after as long |
1194 | /// as the Attributor is not destroyed (it owns the attributes now). |
1195 | /// |
1196 | /// \Returns CHANGED if the IR was changed, otherwise UNCHANGED. |
1197 | ChangeStatus run(); |
1198 | |
1199 | /// Lookup an abstract attribute of type \p AAType at position \p IRP. While |
1200 | /// no abstract attribute is found equivalent positions are checked, see |
1201 | /// SubsumingPositionIterator. Thus, the returned abstract attribute |
1202 | /// might be anchored at a different position, e.g., the callee if \p IRP is a |
1203 | /// call base. |
1204 | /// |
1205 | /// This method is the only (supported) way an abstract attribute can retrieve |
1206 | /// information from another abstract attribute. As an example, take an |
1207 | /// abstract attribute that determines the memory access behavior for a |
1208 | /// argument (readnone, readonly, ...). It should use `getAAFor` to get the |
1209 | /// most optimistic information for other abstract attributes in-flight, e.g. |
1210 | /// the one reasoning about the "captured" state for the argument or the one |
1211 | /// reasoning on the memory access behavior of the function as a whole. |
1212 | /// |
1213 | /// If the DepClass enum is set to `DepClassTy::None` the dependence from |
1214 | /// \p QueryingAA to the return abstract attribute is not automatically |
1215 | /// recorded. This should only be used if the caller will record the |
1216 | /// dependence explicitly if necessary, thus if it the returned abstract |
1217 | /// attribute is used for reasoning. To record the dependences explicitly use |
1218 | /// the `Attributor::recordDependence` method. |
1219 | template <typename AAType> |
1220 | const AAType &getAAFor(const AbstractAttribute &QueryingAA, |
1221 | const IRPosition &IRP, DepClassTy DepClass) { |
1222 | return getOrCreateAAFor<AAType>(IRP, &QueryingAA, DepClass, |
1223 | /* ForceUpdate */ false); |
1224 | } |
1225 | |
1226 | /// Similar to getAAFor but the return abstract attribute will be updated (via |
1227 | /// `AbstractAttribute::update`) even if it is found in the cache. This is |
1228 | /// especially useful for AAIsDead as changes in liveness can make updates |
1229 | /// possible/useful that were not happening before as the abstract attribute |
1230 | /// was assumed dead. |
1231 | template <typename AAType> |
1232 | const AAType &getAndUpdateAAFor(const AbstractAttribute &QueryingAA, |
1233 | const IRPosition &IRP, DepClassTy DepClass) { |
1234 | return getOrCreateAAFor<AAType>(IRP, &QueryingAA, DepClass, |
1235 | /* ForceUpdate */ true); |
1236 | } |
1237 | |
1238 | /// The version of getAAFor that allows to omit a querying abstract |
1239 | /// attribute. Using this after Attributor started running is restricted to |
1240 | /// only the Attributor itself. Initial seeding of AAs can be done via this |
1241 | /// function. |
1242 | /// NOTE: ForceUpdate is ignored in any stage other than the update stage. |
1243 | template <typename AAType> |
1244 | const AAType &getOrCreateAAFor(IRPosition IRP, |
1245 | const AbstractAttribute *QueryingAA, |
1246 | DepClassTy DepClass, bool ForceUpdate = false, |
1247 | bool UpdateAfterInit = true) { |
1248 | if (!shouldPropagateCallBaseContext(IRP)) |
1249 | IRP = IRP.stripCallBaseContext(); |
1250 | |
1251 | if (AAType *AAPtr = lookupAAFor<AAType>(IRP, QueryingAA, DepClass, |
1252 | /* AllowInvalidState */ true)) { |
1253 | if (ForceUpdate && Phase == AttributorPhase::UPDATE) |
1254 | updateAA(*AAPtr); |
1255 | return *AAPtr; |
1256 | } |
1257 | |
1258 | // No matching attribute found, create one. |
1259 | // Use the static create method. |
1260 | auto &AA = AAType::createForPosition(IRP, *this); |
1261 | |
1262 | // If we are currenty seeding attributes, enforce seeding rules. |
1263 | if (Phase == AttributorPhase::SEEDING && !shouldSeedAttribute(AA)) { |
1264 | AA.getState().indicatePessimisticFixpoint(); |
1265 | return AA; |
1266 | } |
1267 | |
1268 | registerAA(AA); |
1269 | |
1270 | // For now we ignore naked and optnone functions. |
1271 | bool Invalidate = Allowed && !Allowed->count(&AAType::ID); |
1272 | const Function *FnScope = IRP.getAnchorScope(); |
1273 | if (FnScope) |
1274 | Invalidate |= FnScope->hasFnAttribute(Attribute::Naked) || |
1275 | FnScope->hasFnAttribute(Attribute::OptimizeNone); |
1276 | |
1277 | // Avoid too many nested initializations to prevent a stack overflow. |
1278 | Invalidate |= InitializationChainLength > MaxInitializationChainLength; |
1279 | |
1280 | // Bootstrap the new attribute with an initial update to propagate |
1281 | // information, e.g., function -> call site. If it is not on a given |
1282 | // Allowed we will not perform updates at all. |
1283 | if (Invalidate) { |
1284 | AA.getState().indicatePessimisticFixpoint(); |
1285 | return AA; |
1286 | } |
1287 | |
1288 | { |
1289 | TimeTraceScope TimeScope(AA.getName() + "::initialize"); |
1290 | ++InitializationChainLength; |
1291 | AA.initialize(*this); |
1292 | --InitializationChainLength; |
1293 | } |
1294 | |
1295 | // Initialize and update is allowed for code outside of the current function |
1296 | // set, but only if it is part of module slice we are allowed to look at. |
1297 | // Only exception is AAIsDeadFunction whose initialization is prevented |
1298 | // directly, since we don't to compute it twice. |
1299 | if (FnScope && !Functions.count(const_cast<Function *>(FnScope))) { |
1300 | if (!getInfoCache().isInModuleSlice(*FnScope)) { |
1301 | AA.getState().indicatePessimisticFixpoint(); |
1302 | return AA; |
1303 | } |
1304 | } |
1305 | |
1306 | // If this is queried in the manifest stage, we force the AA to indicate |
1307 | // pessimistic fixpoint immediately. |
1308 | if (Phase == AttributorPhase::MANIFEST) { |
1309 | AA.getState().indicatePessimisticFixpoint(); |
1310 | return AA; |
1311 | } |
1312 | |
1313 | // Allow seeded attributes to declare dependencies. |
1314 | // Remember the seeding state. |
1315 | if (UpdateAfterInit) { |
1316 | AttributorPhase OldPhase = Phase; |
1317 | Phase = AttributorPhase::UPDATE; |
1318 | |
1319 | updateAA(AA); |
1320 | |
1321 | Phase = OldPhase; |
1322 | } |
1323 | |
1324 | if (QueryingAA && AA.getState().isValidState()) |
1325 | recordDependence(AA, const_cast<AbstractAttribute &>(*QueryingAA), |
1326 | DepClass); |
1327 | return AA; |
1328 | } |
1329 | template <typename AAType> |
1330 | const AAType &getOrCreateAAFor(const IRPosition &IRP) { |
1331 | return getOrCreateAAFor<AAType>(IRP, /* QueryingAA */ nullptr, |
1332 | DepClassTy::NONE); |
1333 | } |
1334 | |
1335 | /// Return the attribute of \p AAType for \p IRP if existing and valid. This |
1336 | /// also allows non-AA users lookup. |
1337 | template <typename AAType> |
1338 | AAType *lookupAAFor(const IRPosition &IRP, |
1339 | const AbstractAttribute *QueryingAA = nullptr, |
1340 | DepClassTy DepClass = DepClassTy::OPTIONAL, |
1341 | bool AllowInvalidState = false) { |
1342 | static_assert(std::is_base_of<AbstractAttribute, AAType>::value, |
1343 | "Cannot query an attribute with a type not derived from " |
1344 | "'AbstractAttribute'!"); |
1345 | // Lookup the abstract attribute of type AAType. If found, return it after |
1346 | // registering a dependence of QueryingAA on the one returned attribute. |
1347 | AbstractAttribute *AAPtr = AAMap.lookup({&AAType::ID, IRP}); |
1348 | if (!AAPtr) |
1349 | return nullptr; |
1350 | |
1351 | AAType *AA = static_cast<AAType *>(AAPtr); |
1352 | |
1353 | // Do not register a dependence on an attribute with an invalid state. |
1354 | if (DepClass != DepClassTy::NONE && QueryingAA && |
1355 | AA->getState().isValidState()) |
1356 | recordDependence(*AA, const_cast<AbstractAttribute &>(*QueryingAA), |
1357 | DepClass); |
1358 | |
1359 | // Return nullptr if this attribute has an invalid state. |
1360 | if (!AllowInvalidState && !AA->getState().isValidState()) |
1361 | return nullptr; |
1362 | return AA; |
1363 | } |
1364 | |
1365 | /// Explicitly record a dependence from \p FromAA to \p ToAA, that is if |
1366 | /// \p FromAA changes \p ToAA should be updated as well. |
1367 | /// |
1368 | /// This method should be used in conjunction with the `getAAFor` method and |
1369 | /// with the DepClass enum passed to the method set to None. This can |
1370 | /// be beneficial to avoid false dependences but it requires the users of |
1371 | /// `getAAFor` to explicitly record true dependences through this method. |
1372 | /// The \p DepClass flag indicates if the dependence is striclty necessary. |
1373 | /// That means for required dependences, if \p FromAA changes to an invalid |
1374 | /// state, \p ToAA can be moved to a pessimistic fixpoint because it required |
1375 | /// information from \p FromAA but none are available anymore. |
1376 | void recordDependence(const AbstractAttribute &FromAA, |
1377 | const AbstractAttribute &ToAA, DepClassTy DepClass); |
1378 | |
1379 | /// Introduce a new abstract attribute into the fixpoint analysis. |
1380 | /// |
1381 | /// Note that ownership of the attribute is given to the Attributor. It will |
1382 | /// invoke delete for the Attributor on destruction of the Attributor. |
1383 | /// |
1384 | /// Attributes are identified by their IR position (AAType::getIRPosition()) |
1385 | /// and the address of their static member (see AAType::ID). |
1386 | template <typename AAType> AAType ®isterAA(AAType &AA) { |
1387 | static_assert(std::is_base_of<AbstractAttribute, AAType>::value, |
1388 | "Cannot register an attribute with a type not derived from " |
1389 | "'AbstractAttribute'!"); |
1390 | // Put the attribute in the lookup map structure and the container we use to |
1391 | // keep track of all attributes. |
1392 | const IRPosition &IRP = AA.getIRPosition(); |
1393 | AbstractAttribute *&AAPtr = AAMap[{&AAType::ID, IRP}]; |
1394 | |
1395 | assert(!AAPtr && "Attribute already in map!")(static_cast <bool> (!AAPtr && "Attribute already in map!" ) ? void (0) : __assert_fail ("!AAPtr && \"Attribute already in map!\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/include/llvm/Transforms/IPO/Attributor.h" , 1395, __extension__ __PRETTY_FUNCTION__)); |
1396 | AAPtr = &AA; |
1397 | |
1398 | // Register AA with the synthetic root only before the manifest stage. |
1399 | if (Phase == AttributorPhase::SEEDING || Phase == AttributorPhase::UPDATE) |
1400 | DG.SyntheticRoot.Deps.push_back( |
1401 | AADepGraphNode::DepTy(&AA, unsigned(DepClassTy::REQUIRED))); |
1402 | |
1403 | return AA; |
1404 | } |
1405 | |
1406 | /// Return the internal information cache. |
1407 | InformationCache &getInfoCache() { return InfoCache; } |
1408 | |
1409 | /// Return true if this is a module pass, false otherwise. |
1410 | bool isModulePass() const { |
1411 | return !Functions.empty() && |
1412 | Functions.size() == Functions.front()->getParent()->size(); |
1413 | } |
1414 | |
1415 | /// Return true if we derive attributes for \p Fn |
1416 | bool isRunOn(Function &Fn) const { |
1417 | return Functions.empty() || Functions.count(&Fn); |
1418 | } |
1419 | |
1420 | /// Determine opportunities to derive 'default' attributes in \p F and create |
1421 | /// abstract attribute objects for them. |
1422 | /// |
1423 | /// \param F The function that is checked for attribute opportunities. |
1424 | /// |
1425 | /// Note that abstract attribute instances are generally created even if the |
1426 | /// IR already contains the information they would deduce. The most important |
1427 | /// reason for this is the single interface, the one of the abstract attribute |
1428 | /// instance, which can be queried without the need to look at the IR in |
1429 | /// various places. |
1430 | void identifyDefaultAbstractAttributes(Function &F); |
1431 | |
1432 | /// Determine whether the function \p F is IPO amendable |
1433 | /// |
1434 | /// If a function is exactly defined or it has alwaysinline attribute |
1435 | /// and is viable to be inlined, we say it is IPO amendable |
1436 | bool isFunctionIPOAmendable(const Function &F) { |
1437 | return F.hasExactDefinition() || InfoCache.InlineableFunctions.count(&F); |
1438 | } |
1439 | |
1440 | /// Mark the internal function \p F as live. |
1441 | /// |
1442 | /// This will trigger the identification and initialization of attributes for |
1443 | /// \p F. |
1444 | void markLiveInternalFunction(const Function &F) { |
1445 | assert(F.hasLocalLinkage() &&(static_cast <bool> (F.hasLocalLinkage() && "Only local linkage is assumed dead initially." ) ? void (0) : __assert_fail ("F.hasLocalLinkage() && \"Only local linkage is assumed dead initially.\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/include/llvm/Transforms/IPO/Attributor.h" , 1446, __extension__ __PRETTY_FUNCTION__)) |
1446 | "Only local linkage is assumed dead initially.")(static_cast <bool> (F.hasLocalLinkage() && "Only local linkage is assumed dead initially." ) ? void (0) : __assert_fail ("F.hasLocalLinkage() && \"Only local linkage is assumed dead initially.\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/include/llvm/Transforms/IPO/Attributor.h" , 1446, __extension__ __PRETTY_FUNCTION__)); |
1447 | |
1448 | identifyDefaultAbstractAttributes(const_cast<Function &>(F)); |
1449 | } |
1450 | |
1451 | /// Helper function to remove callsite. |
1452 | void removeCallSite(CallInst *CI) { |
1453 | if (!CI) |
1454 | return; |
1455 | |
1456 | CGUpdater.removeCallSite(*CI); |
1457 | } |
1458 | |
1459 | /// Record that \p U is to be replaces with \p NV after information was |
1460 | /// manifested. This also triggers deletion of trivially dead istructions. |
1461 | bool changeUseAfterManifest(Use &U, Value &NV) { |
1462 | Value *&V = ToBeChangedUses[&U]; |
1463 | if (V && (V->stripPointerCasts() == NV.stripPointerCasts() || |
1464 | isa_and_nonnull<UndefValue>(V))) |
1465 | return false; |
1466 | assert((!V || V == &NV || isa<UndefValue>(NV)) &&(static_cast <bool> ((!V || V == &NV || isa<UndefValue >(NV)) && "Use was registered twice for replacement with different values!" ) ? void (0) : __assert_fail ("(!V || V == &NV || isa<UndefValue>(NV)) && \"Use was registered twice for replacement with different values!\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/include/llvm/Transforms/IPO/Attributor.h" , 1467, __extension__ __PRETTY_FUNCTION__)) |
1467 | "Use was registered twice for replacement with different values!")(static_cast <bool> ((!V || V == &NV || isa<UndefValue >(NV)) && "Use was registered twice for replacement with different values!" ) ? void (0) : __assert_fail ("(!V || V == &NV || isa<UndefValue>(NV)) && \"Use was registered twice for replacement with different values!\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/include/llvm/Transforms/IPO/Attributor.h" , 1467, __extension__ __PRETTY_FUNCTION__)); |
1468 | V = &NV; |
1469 | return true; |
1470 | } |
1471 | |
1472 | /// Helper function to replace all uses of \p V with \p NV. Return true if |
1473 | /// there is any change. The flag \p ChangeDroppable indicates if dropppable |
1474 | /// uses should be changed too. |
1475 | bool changeValueAfterManifest(Value &V, Value &NV, |
1476 | bool ChangeDroppable = true) { |
1477 | auto &Entry = ToBeChangedValues[&V]; |
1478 | Value *&CurNV = Entry.first; |
1479 | if (CurNV && (CurNV->stripPointerCasts() == NV.stripPointerCasts() || |
1480 | isa<UndefValue>(CurNV))) |
1481 | return false; |
1482 | assert((!CurNV || CurNV == &NV || isa<UndefValue>(NV)) &&(static_cast <bool> ((!CurNV || CurNV == &NV || isa <UndefValue>(NV)) && "Value replacement was registered twice with different values!" ) ? void (0) : __assert_fail ("(!CurNV || CurNV == &NV || isa<UndefValue>(NV)) && \"Value replacement was registered twice with different values!\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/include/llvm/Transforms/IPO/Attributor.h" , 1483, __extension__ __PRETTY_FUNCTION__)) |
1483 | "Value replacement was registered twice with different values!")(static_cast <bool> ((!CurNV || CurNV == &NV || isa <UndefValue>(NV)) && "Value replacement was registered twice with different values!" ) ? void (0) : __assert_fail ("(!CurNV || CurNV == &NV || isa<UndefValue>(NV)) && \"Value replacement was registered twice with different values!\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/include/llvm/Transforms/IPO/Attributor.h" , 1483, __extension__ __PRETTY_FUNCTION__)); |
1484 | CurNV = &NV; |
1485 | Entry.second = ChangeDroppable; |
1486 | return true; |
1487 | } |
1488 | |
1489 | /// Record that \p I is to be replaced with `unreachable` after information |
1490 | /// was manifested. |
1491 | void changeToUnreachableAfterManifest(Instruction *I) { |
1492 | ToBeChangedToUnreachableInsts.insert(I); |
1493 | } |
1494 | |
1495 | /// Record that \p II has at least one dead successor block. This information |
1496 | /// is used, e.g., to replace \p II with a call, after information was |
1497 | /// manifested. |
1498 | void registerInvokeWithDeadSuccessor(InvokeInst &II) { |
1499 | InvokeWithDeadSuccessor.push_back(&II); |
1500 | } |
1501 | |
1502 | /// Record that \p I is deleted after information was manifested. This also |
1503 | /// triggers deletion of trivially dead istructions. |
1504 | void deleteAfterManifest(Instruction &I) { ToBeDeletedInsts.insert(&I); } |
1505 | |
1506 | /// Record that \p BB is deleted after information was manifested. This also |
1507 | /// triggers deletion of trivially dead istructions. |
1508 | void deleteAfterManifest(BasicBlock &BB) { ToBeDeletedBlocks.insert(&BB); } |
1509 | |
1510 | // Record that \p BB is added during the manifest of an AA. Added basic blocks |
1511 | // are preserved in the IR. |
1512 | void registerManifestAddedBasicBlock(BasicBlock &BB) { |
1513 | ManifestAddedBlocks.insert(&BB); |
1514 | } |
1515 | |
1516 | /// Record that \p F is deleted after information was manifested. |
1517 | void deleteAfterManifest(Function &F) { |
1518 | if (DeleteFns) |
1519 | ToBeDeletedFunctions.insert(&F); |
1520 | } |
1521 | |
1522 | /// If \p IRP is assumed to be a constant, return it, if it is unclear yet, |
1523 | /// return None, otherwise return `nullptr`. |
1524 | Optional<Constant *> getAssumedConstant(const IRPosition &IRP, |
1525 | const AbstractAttribute &AA, |
1526 | bool &UsedAssumedInformation); |
1527 | Optional<Constant *> getAssumedConstant(const Value &V, |
1528 | const AbstractAttribute &AA, |
1529 | bool &UsedAssumedInformation) { |
1530 | return getAssumedConstant(IRPosition::value(V), AA, UsedAssumedInformation); |
1531 | } |
1532 | |
1533 | /// If \p V is assumed simplified, return it, if it is unclear yet, |
1534 | /// return None, otherwise return `nullptr`. |
1535 | Optional<Value *> getAssumedSimplified(const IRPosition &IRP, |
1536 | const AbstractAttribute &AA, |
1537 | bool &UsedAssumedInformation) { |
1538 | return getAssumedSimplified(IRP, &AA, UsedAssumedInformation); |
1539 | } |
1540 | Optional<Value *> getAssumedSimplified(const Value &V, |
1541 | const AbstractAttribute &AA, |
1542 | bool &UsedAssumedInformation) { |
1543 | return getAssumedSimplified(IRPosition::value(V), AA, |
1544 | UsedAssumedInformation); |
1545 | } |
1546 | |
1547 | /// If \p V is assumed simplified, return it, if it is unclear yet, |
1548 | /// return None, otherwise return `nullptr`. Same as the public version |
1549 | /// except that it can be used without recording dependences on any \p AA. |
1550 | Optional<Value *> getAssumedSimplified(const IRPosition &V, |
1551 | const AbstractAttribute *AA, |
1552 | bool &UsedAssumedInformation); |
1553 | |
1554 | /// Register \p CB as a simplification callback. |
1555 | /// `Attributor::getAssumedSimplified` will use these callbacks before |
1556 | /// we it will ask `AAValueSimplify`. It is important to ensure this |
1557 | /// is called before `identifyDefaultAbstractAttributes`, assuming the |
1558 | /// latter is called at all. |
1559 | using SimplifictionCallbackTy = std::function<Optional<Value *>( |
1560 | const IRPosition &, const AbstractAttribute *, bool &)>; |
1561 | void registerSimplificationCallback(const IRPosition &IRP, |
1562 | const SimplifictionCallbackTy &CB) { |
1563 | SimplificationCallbacks[IRP].emplace_back(CB); |
1564 | } |
1565 | |
1566 | /// Return true if there is a simplification callback for \p IRP. |
1567 | bool hasSimplificationCallback(const IRPosition &IRP) { |
1568 | return SimplificationCallbacks.count(IRP); |
1569 | } |
1570 | |
1571 | private: |
1572 | /// The vector with all simplification callbacks registered by outside AAs. |
1573 | DenseMap<IRPosition, SmallVector<SimplifictionCallbackTy, 1>> |
1574 | SimplificationCallbacks; |
1575 | |
1576 | public: |
1577 | /// Translate \p V from the callee context into the call site context. |
1578 | Optional<Value *> |
1579 | translateArgumentToCallSiteContent(Optional<Value *> V, CallBase &CB, |
1580 | const AbstractAttribute &AA, |
1581 | bool &UsedAssumedInformation); |
1582 | |
1583 | /// Return true if \p AA (or its context instruction) is assumed dead. |
1584 | /// |
1585 | /// If \p LivenessAA is not provided it is queried. |
1586 | bool isAssumedDead(const AbstractAttribute &AA, const AAIsDead *LivenessAA, |
1587 | bool &UsedAssumedInformation, |
1588 | bool CheckBBLivenessOnly = false, |
1589 | DepClassTy DepClass = DepClassTy::OPTIONAL); |
1590 | |
1591 | /// Return true if \p I is assumed dead. |
1592 | /// |
1593 | /// If \p LivenessAA is not provided it is queried. |
1594 | bool isAssumedDead(const Instruction &I, const AbstractAttribute *QueryingAA, |
1595 | const AAIsDead *LivenessAA, bool &UsedAssumedInformation, |
1596 | bool CheckBBLivenessOnly = false, |
1597 | DepClassTy DepClass = DepClassTy::OPTIONAL); |
1598 | |
1599 | /// Return true if \p U is assumed dead. |
1600 | /// |
1601 | /// If \p FnLivenessAA is not provided it is queried. |
1602 | bool isAssumedDead(const Use &U, const AbstractAttribute *QueryingAA, |
1603 | const AAIsDead *FnLivenessAA, bool &UsedAssumedInformation, |
1604 | bool CheckBBLivenessOnly = false, |
1605 | DepClassTy DepClass = DepClassTy::OPTIONAL); |
1606 | |
1607 | /// Return true if \p IRP is assumed dead. |
1608 | /// |
1609 | /// If \p FnLivenessAA is not provided it is queried. |
1610 | bool isAssumedDead(const IRPosition &IRP, const AbstractAttribute *QueryingAA, |
1611 | const AAIsDead *FnLivenessAA, bool &UsedAssumedInformation, |
1612 | bool CheckBBLivenessOnly = false, |
1613 | DepClassTy DepClass = DepClassTy::OPTIONAL); |
1614 | |
1615 | /// Return true if \p BB is assumed dead. |
1616 | /// |
1617 | /// If \p LivenessAA is not provided it is queried. |
1618 | bool isAssumedDead(const BasicBlock &BB, const AbstractAttribute *QueryingAA, |
1619 | const AAIsDead *FnLivenessAA, |
1620 | DepClassTy DepClass = DepClassTy::OPTIONAL); |
1621 | |
1622 | /// Check \p Pred on all (transitive) uses of \p V. |
1623 | /// |
1624 | /// This method will evaluate \p Pred on all (transitive) uses of the |
1625 | /// associated value and return true if \p Pred holds every time. |
1626 | bool checkForAllUses(function_ref<bool(const Use &, bool &)> Pred, |
1627 | const AbstractAttribute &QueryingAA, const Value &V, |
1628 | bool CheckBBLivenessOnly = false, |
1629 | DepClassTy LivenessDepClass = DepClassTy::OPTIONAL); |
1630 | |
1631 | /// Emit a remark generically. |
1632 | /// |
1633 | /// This template function can be used to generically emit a remark. The |
1634 | /// RemarkKind should be one of the following: |
1635 | /// - OptimizationRemark to indicate a successful optimization attempt |
1636 | /// - OptimizationRemarkMissed to report a failed optimization attempt |
1637 | /// - OptimizationRemarkAnalysis to provide additional information about an |
1638 | /// optimization attempt |
1639 | /// |
1640 | /// The remark is built using a callback function \p RemarkCB that takes a |
1641 | /// RemarkKind as input and returns a RemarkKind. |
1642 | template <typename RemarkKind, typename RemarkCallBack> |
1643 | void emitRemark(Instruction *I, StringRef RemarkName, |
1644 | RemarkCallBack &&RemarkCB) const { |
1645 | if (!OREGetter) |
1646 | return; |
1647 | |
1648 | Function *F = I->getFunction(); |
1649 | auto &ORE = OREGetter.getValue()(F); |
1650 | |
1651 | if (RemarkName.startswith("OMP")) |
1652 | ORE.emit([&]() { |
1653 | return RemarkCB(RemarkKind(PassName, RemarkName, I)) |
1654 | << " [" << RemarkName << "]"; |
1655 | }); |
1656 | else |
1657 | ORE.emit([&]() { return RemarkCB(RemarkKind(PassName, RemarkName, I)); }); |
1658 | } |
1659 | |
1660 | /// Emit a remark on a function. |
1661 | template <typename RemarkKind, typename RemarkCallBack> |
1662 | void emitRemark(Function *F, StringRef RemarkName, |
1663 | RemarkCallBack &&RemarkCB) const { |
1664 | if (!OREGetter) |
1665 | return; |
1666 | |
1667 | auto &ORE = OREGetter.getValue()(F); |
1668 | |
1669 | if (RemarkName.startswith("OMP")) |
1670 | ORE.emit([&]() { |
1671 | return RemarkCB(RemarkKind(PassName, RemarkName, F)) |
1672 | << " [" << RemarkName << "]"; |
1673 | }); |
1674 | else |
1675 | ORE.emit([&]() { return RemarkCB(RemarkKind(PassName, RemarkName, F)); }); |
1676 | } |
1677 | |
1678 | /// Helper struct used in the communication between an abstract attribute (AA) |
1679 | /// that wants to change the signature of a function and the Attributor which |
1680 | /// applies the changes. The struct is partially initialized with the |
1681 | /// information from the AA (see the constructor). All other members are |
1682 | /// provided by the Attributor prior to invoking any callbacks. |
1683 | struct ArgumentReplacementInfo { |
1684 | /// Callee repair callback type |
1685 | /// |
1686 | /// The function repair callback is invoked once to rewire the replacement |
1687 | /// arguments in the body of the new function. The argument replacement info |
1688 | /// is passed, as build from the registerFunctionSignatureRewrite call, as |
1689 | /// well as the replacement function and an iteratore to the first |
1690 | /// replacement argument. |
1691 | using CalleeRepairCBTy = std::function<void( |
1692 | const ArgumentReplacementInfo &, Function &, Function::arg_iterator)>; |
1693 | |
1694 | /// Abstract call site (ACS) repair callback type |
1695 | /// |
1696 | /// The abstract call site repair callback is invoked once on every abstract |
1697 | /// call site of the replaced function (\see ReplacedFn). The callback needs |
1698 | /// to provide the operands for the call to the new replacement function. |
1699 | /// The number and type of the operands appended to the provided vector |
1700 | /// (second argument) is defined by the number and types determined through |
1701 | /// the replacement type vector (\see ReplacementTypes). The first argument |
1702 | /// is the ArgumentReplacementInfo object registered with the Attributor |
1703 | /// through the registerFunctionSignatureRewrite call. |
1704 | using ACSRepairCBTy = |
1705 | std::function<void(const ArgumentReplacementInfo &, AbstractCallSite, |
1706 | SmallVectorImpl<Value *> &)>; |
1707 | |
1708 | /// Simple getters, see the corresponding members for details. |
1709 | ///{ |
1710 | |
1711 | Attributor &getAttributor() const { return A; } |
1712 | const Function &getReplacedFn() const { return ReplacedFn; } |
1713 | const Argument &getReplacedArg() const { return ReplacedArg; } |
1714 | unsigned getNumReplacementArgs() const { return ReplacementTypes.size(); } |
1715 | const SmallVectorImpl<Type *> &getReplacementTypes() const { |
1716 | return ReplacementTypes; |
1717 | } |
1718 | |
1719 | ///} |
1720 | |
1721 | private: |
1722 | /// Constructor that takes the argument to be replaced, the types of |
1723 | /// the replacement arguments, as well as callbacks to repair the call sites |
1724 | /// and new function after the replacement happened. |
1725 | ArgumentReplacementInfo(Attributor &A, Argument &Arg, |
1726 | ArrayRef<Type *> ReplacementTypes, |
1727 | CalleeRepairCBTy &&CalleeRepairCB, |
1728 | ACSRepairCBTy &&ACSRepairCB) |
1729 | : A(A), ReplacedFn(*Arg.getParent()), ReplacedArg(Arg), |
1730 | ReplacementTypes(ReplacementTypes.begin(), ReplacementTypes.end()), |
1731 | CalleeRepairCB(std::move(CalleeRepairCB)), |
1732 | ACSRepairCB(std::move(ACSRepairCB)) {} |
1733 | |
1734 | /// Reference to the attributor to allow access from the callbacks. |
1735 | Attributor &A; |
1736 | |
1737 | /// The "old" function replaced by ReplacementFn. |
1738 | const Function &ReplacedFn; |
1739 | |
1740 | /// The "old" argument replaced by new ones defined via ReplacementTypes. |
1741 | const Argument &ReplacedArg; |
1742 | |
1743 | /// The types of the arguments replacing ReplacedArg. |
1744 | const SmallVector<Type *, 8> ReplacementTypes; |
1745 | |
1746 | /// Callee repair callback, see CalleeRepairCBTy. |
1747 | const CalleeRepairCBTy CalleeRepairCB; |
1748 | |
1749 | /// Abstract call site (ACS) repair callback, see ACSRepairCBTy. |
1750 | const ACSRepairCBTy ACSRepairCB; |
1751 | |
1752 | /// Allow access to the private members from the Attributor. |
1753 | friend struct Attributor; |
1754 | }; |
1755 | |
1756 | /// Check if we can rewrite a function signature. |
1757 | /// |
1758 | /// The argument \p Arg is replaced with new ones defined by the number, |
1759 | /// order, and types in \p ReplacementTypes. |
1760 | /// |
1761 | /// \returns True, if the replacement can be registered, via |
1762 | /// registerFunctionSignatureRewrite, false otherwise. |
1763 | bool isValidFunctionSignatureRewrite(Argument &Arg, |
1764 | ArrayRef<Type *> ReplacementTypes); |
1765 | |
1766 | /// Register a rewrite for a function signature. |
1767 | /// |
1768 | /// The argument \p Arg is replaced with new ones defined by the number, |
1769 | /// order, and types in \p ReplacementTypes. The rewiring at the call sites is |
1770 | /// done through \p ACSRepairCB and at the callee site through |
1771 | /// \p CalleeRepairCB. |
1772 | /// |
1773 | /// \returns True, if the replacement was registered, false otherwise. |
1774 | bool registerFunctionSignatureRewrite( |
1775 | Argument &Arg, ArrayRef<Type *> ReplacementTypes, |
1776 | ArgumentReplacementInfo::CalleeRepairCBTy &&CalleeRepairCB, |
1777 | ArgumentReplacementInfo::ACSRepairCBTy &&ACSRepairCB); |
1778 | |
1779 | /// Check \p Pred on all function call sites. |
1780 | /// |
1781 | /// This method will evaluate \p Pred on call sites and return |
1782 | /// true if \p Pred holds in every call sites. However, this is only possible |
1783 | /// all call sites are known, hence the function has internal linkage. |
1784 | /// If true is returned, \p AllCallSitesKnown is set if all possible call |
1785 | /// sites of the function have been visited. |
1786 | bool checkForAllCallSites(function_ref<bool(AbstractCallSite)> Pred, |
1787 | const AbstractAttribute &QueryingAA, |
1788 | bool RequireAllCallSites, bool &AllCallSitesKnown); |
1789 | |
1790 | /// Check \p Pred on all values potentially returned by \p F. |
1791 | /// |
1792 | /// This method will evaluate \p Pred on all values potentially returned by |
1793 | /// the function associated with \p QueryingAA. The returned values are |
1794 | /// matched with their respective return instructions. Returns true if \p Pred |
1795 | /// holds on all of them. |
1796 | bool checkForAllReturnedValuesAndReturnInsts( |
1797 | function_ref<bool(Value &, const SmallSetVector<ReturnInst *, 4> &)> Pred, |
1798 | const AbstractAttribute &QueryingAA); |
1799 | |
1800 | /// Check \p Pred on all values potentially returned by the function |
1801 | /// associated with \p QueryingAA. |
1802 | /// |
1803 | /// This is the context insensitive version of the method above. |
1804 | bool checkForAllReturnedValues(function_ref<bool(Value &)> Pred, |
1805 | const AbstractAttribute &QueryingAA); |
1806 | |
1807 | /// Check \p Pred on all instructions with an opcode present in \p Opcodes. |
1808 | /// |
1809 | /// This method will evaluate \p Pred on all instructions with an opcode |
1810 | /// present in \p Opcode and return true if \p Pred holds on all of them. |
1811 | bool checkForAllInstructions(function_ref<bool(Instruction &)> Pred, |
1812 | const AbstractAttribute &QueryingAA, |
1813 | const ArrayRef<unsigned> &Opcodes, |
1814 | bool &UsedAssumedInformation, |
1815 | bool CheckBBLivenessOnly = false, |
1816 | bool CheckPotentiallyDead = false); |
1817 | |
1818 | /// Check \p Pred on all call-like instructions (=CallBased derived). |
1819 | /// |
1820 | /// See checkForAllCallLikeInstructions(...) for more information. |
1821 | bool checkForAllCallLikeInstructions(function_ref<bool(Instruction &)> Pred, |
1822 | const AbstractAttribute &QueryingAA, |
1823 | bool &UsedAssumedInformation, |
1824 | bool CheckBBLivenessOnly = false, |
1825 | bool CheckPotentiallyDead = false) { |
1826 | return checkForAllInstructions( |
1827 | Pred, QueryingAA, |
1828 | {(unsigned)Instruction::Invoke, (unsigned)Instruction::CallBr, |
1829 | (unsigned)Instruction::Call}, |
1830 | UsedAssumedInformation, CheckBBLivenessOnly, CheckPotentiallyDead); |
1831 | } |
1832 | |
1833 | /// Check \p Pred on all Read/Write instructions. |
1834 | /// |
1835 | /// This method will evaluate \p Pred on all instructions that read or write |
1836 | /// to memory present in the information cache and return true if \p Pred |
1837 | /// holds on all of them. |
1838 | bool checkForAllReadWriteInstructions(function_ref<bool(Instruction &)> Pred, |
1839 | AbstractAttribute &QueryingAA, |
1840 | bool &UsedAssumedInformation); |
1841 | |
1842 | /// Create a shallow wrapper for \p F such that \p F has internal linkage |
1843 | /// afterwards. It also sets the original \p F 's name to anonymous |
1844 | /// |
1845 | /// A wrapper is a function with the same type (and attributes) as \p F |
1846 | /// that will only call \p F and return the result, if any. |
1847 | /// |
1848 | /// Assuming the declaration of looks like: |
1849 | /// rty F(aty0 arg0, ..., atyN argN); |
1850 | /// |
1851 | /// The wrapper will then look as follows: |
1852 | /// rty wrapper(aty0 arg0, ..., atyN argN) { |
1853 | /// return F(arg0, ..., argN); |
1854 | /// } |
1855 | /// |
1856 | static void createShallowWrapper(Function &F); |
1857 | |
1858 | /// Returns true if the function \p F can be internalized. i.e. it has a |
1859 | /// compatible linkage. |
1860 | static bool isInternalizable(Function &F); |
1861 | |
1862 | /// Make another copy of the function \p F such that the copied version has |
1863 | /// internal linkage afterwards and can be analysed. Then we replace all uses |
1864 | /// of the original function to the copied one |
1865 | /// |
1866 | /// Only non-locally linked functions that have `linkonce_odr` or `weak_odr` |
1867 | /// linkage can be internalized because these linkages guarantee that other |
1868 | /// definitions with the same name have the same semantics as this one. |
1869 | /// |
1870 | /// This will only be run if the `attributor-allow-deep-wrappers` option is |
1871 | /// set, or if the function is called with \p Force set to true. |
1872 | /// |
1873 | /// If the function \p F failed to be internalized the return value will be a |
1874 | /// null pointer. |
1875 | static Function *internalizeFunction(Function &F, bool Force = false); |
1876 | |
1877 | /// Make copies of each function in the set \p FnSet such that the copied |
1878 | /// version has internal linkage afterwards and can be analysed. Then we |
1879 | /// replace all uses of the original function to the copied one. The map |
1880 | /// \p FnMap contains a mapping of functions to their internalized versions. |
1881 | /// |
1882 | /// Only non-locally linked functions that have `linkonce_odr` or `weak_odr` |
1883 | /// linkage can be internalized because these linkages guarantee that other |
1884 | /// definitions with the same name have the same semantics as this one. |
1885 | /// |
1886 | /// This version will internalize all the functions in the set \p FnSet at |
1887 | /// once and then replace the uses. This prevents internalized functions being |
1888 | /// called by external functions when there is an internalized version in the |
1889 | /// module. |
1890 | static bool internalizeFunctions(SmallPtrSetImpl<Function *> &FnSet, |
1891 | DenseMap<Function *, Function *> &FnMap); |
1892 | |
1893 | /// Return the data layout associated with the anchor scope. |
1894 | const DataLayout &getDataLayout() const { return InfoCache.DL; } |
1895 | |
1896 | /// The allocator used to allocate memory, e.g. for `AbstractAttribute`s. |
1897 | BumpPtrAllocator &Allocator; |
1898 | |
1899 | private: |
1900 | /// This method will do fixpoint iteration until fixpoint or the |
1901 | /// maximum iteration count is reached. |
1902 | /// |
1903 | /// If the maximum iteration count is reached, This method will |
1904 | /// indicate pessimistic fixpoint on attributes that transitively depend |
1905 | /// on attributes that were scheduled for an update. |
1906 | void runTillFixpoint(); |
1907 | |
1908 | /// Gets called after scheduling, manifests attributes to the LLVM IR. |
1909 | ChangeStatus manifestAttributes(); |
1910 | |
1911 | /// Gets called after attributes have been manifested, cleans up the IR. |
1912 | /// Deletes dead functions, blocks and instructions. |
1913 | /// Rewrites function signitures and updates the call graph. |
1914 | ChangeStatus cleanupIR(); |
1915 | |
1916 | /// Identify internal functions that are effectively dead, thus not reachable |
1917 | /// from a live entry point. The functions are added to ToBeDeletedFunctions. |
1918 | void identifyDeadInternalFunctions(); |
1919 | |
1920 | /// Run `::update` on \p AA and track the dependences queried while doing so. |
1921 | /// Also adjust the state if we know further updates are not necessary. |
1922 | ChangeStatus updateAA(AbstractAttribute &AA); |
1923 | |
1924 | /// Remember the dependences on the top of the dependence stack such that they |
1925 | /// may trigger further updates. (\see DependenceStack) |
1926 | void rememberDependences(); |
1927 | |
1928 | /// Check \p Pred on all call sites of \p Fn. |
1929 | /// |
1930 | /// This method will evaluate \p Pred on call sites and return |
1931 | /// true if \p Pred holds in every call sites. However, this is only possible |
1932 | /// all call sites are known, hence the function has internal linkage. |
1933 | /// If true is returned, \p AllCallSitesKnown is set if all possible call |
1934 | /// sites of the function have been visited. |
1935 | bool checkForAllCallSites(function_ref<bool(AbstractCallSite)> Pred, |
1936 | const Function &Fn, bool RequireAllCallSites, |
1937 | const AbstractAttribute *QueryingAA, |
1938 | bool &AllCallSitesKnown); |
1939 | |
1940 | /// Determine if CallBase context in \p IRP should be propagated. |
1941 | bool shouldPropagateCallBaseContext(const IRPosition &IRP); |
1942 | |
1943 | /// Apply all requested function signature rewrites |
1944 | /// (\see registerFunctionSignatureRewrite) and return Changed if the module |
1945 | /// was altered. |
1946 | ChangeStatus |
1947 | rewriteFunctionSignatures(SmallPtrSetImpl<Function *> &ModifiedFns); |
1948 | |
1949 | /// Check if the Attribute \p AA should be seeded. |
1950 | /// See getOrCreateAAFor. |
1951 | bool shouldSeedAttribute(AbstractAttribute &AA); |
1952 | |
1953 | /// A nested map to lookup abstract attributes based on the argument position |
1954 | /// on the outer level, and the addresses of the static member (AAType::ID) on |
1955 | /// the inner level. |
1956 | ///{ |
1957 | using AAMapKeyTy = std::pair<const char *, IRPosition>; |
1958 | DenseMap<AAMapKeyTy, AbstractAttribute *> AAMap; |
1959 | ///} |
1960 | |
1961 | /// Map to remember all requested signature changes (= argument replacements). |
1962 | DenseMap<Function *, SmallVector<std::unique_ptr<ArgumentReplacementInfo>, 8>> |
1963 | ArgumentReplacementMap; |
1964 | |
1965 | /// The set of functions we are deriving attributes for. |
1966 | SetVector<Function *> &Functions; |
1967 | |
1968 | /// The information cache that holds pre-processed (LLVM-IR) information. |
1969 | InformationCache &InfoCache; |
1970 | |
1971 | /// Helper to update an underlying call graph. |
1972 | CallGraphUpdater &CGUpdater; |
1973 | |
1974 | /// Abstract Attribute dependency graph |
1975 | AADepGraph DG; |
1976 | |
1977 | /// Set of functions for which we modified the content such that it might |
1978 | /// impact the call graph. |
1979 | SmallPtrSet<Function *, 8> CGModifiedFunctions; |
1980 | |
1981 | /// Information about a dependence. If FromAA is changed ToAA needs to be |
1982 | /// updated as well. |
1983 | struct DepInfo { |
1984 | const AbstractAttribute *FromAA; |
1985 | const AbstractAttribute *ToAA; |
1986 | DepClassTy DepClass; |
1987 | }; |
1988 | |
1989 | /// The dependence stack is used to track dependences during an |
1990 | /// `AbstractAttribute::update` call. As `AbstractAttribute::update` can be |
1991 | /// recursive we might have multiple vectors of dependences in here. The stack |
1992 | /// size, should be adjusted according to the expected recursion depth and the |
1993 | /// inner dependence vector size to the expected number of dependences per |
1994 | /// abstract attribute. Since the inner vectors are actually allocated on the |
1995 | /// stack we can be generous with their size. |
1996 | using DependenceVector = SmallVector<DepInfo, 8>; |
1997 | SmallVector<DependenceVector *, 16> DependenceStack; |
1998 | |
1999 | /// If not null, a set limiting the attribute opportunities. |
2000 | const DenseSet<const char *> *Allowed; |
2001 | |
2002 | /// Whether to delete functions. |
2003 | const bool DeleteFns; |
2004 | |
2005 | /// Whether to rewrite signatures. |
2006 | const bool RewriteSignatures; |
2007 | |
2008 | /// Maximum number of fixedpoint iterations. |
2009 | Optional<unsigned> MaxFixpointIterations; |
2010 | |
2011 | /// A set to remember the functions we already assume to be live and visited. |
2012 | DenseSet<const Function *> VisitedFunctions; |
2013 | |
2014 | /// Uses we replace with a new value after manifest is done. We will remove |
2015 | /// then trivially dead instructions as well. |
2016 | DenseMap<Use *, Value *> ToBeChangedUses; |
2017 | |
2018 | /// Values we replace with a new value after manifest is done. We will remove |
2019 | /// then trivially dead instructions as well. |
2020 | DenseMap<Value *, std::pair<Value *, bool>> ToBeChangedValues; |
2021 | |
2022 | /// Instructions we replace with `unreachable` insts after manifest is done. |
2023 | SmallDenseSet<WeakVH, 16> ToBeChangedToUnreachableInsts; |
2024 | |
2025 | /// Invoke instructions with at least a single dead successor block. |
2026 | SmallVector<WeakVH, 16> InvokeWithDeadSuccessor; |
2027 | |
2028 | /// A flag that indicates which stage of the process we are in. Initially, the |
2029 | /// phase is SEEDING. Phase is changed in `Attributor::run()` |
2030 | enum class AttributorPhase { |
2031 | SEEDING, |
2032 | UPDATE, |
2033 | MANIFEST, |
2034 | CLEANUP, |
2035 | } Phase = AttributorPhase::SEEDING; |
2036 | |
2037 | /// The current initialization chain length. Tracked to avoid stack overflows. |
2038 | unsigned InitializationChainLength = 0; |
2039 | |
2040 | /// Functions, blocks, and instructions we delete after manifest is done. |
2041 | /// |
2042 | ///{ |
2043 | SmallPtrSet<Function *, 8> ToBeDeletedFunctions; |
2044 | SmallPtrSet<BasicBlock *, 8> ToBeDeletedBlocks; |
2045 | SmallPtrSet<BasicBlock *, 8> ManifestAddedBlocks; |
2046 | SmallDenseSet<WeakVH, 8> ToBeDeletedInsts; |
2047 | ///} |
2048 | |
2049 | /// Callback to get an OptimizationRemarkEmitter from a Function *. |
2050 | Optional<OptimizationRemarkGetter> OREGetter; |
2051 | |
2052 | /// The name of the pass to emit remarks for. |
2053 | const char *PassName = ""; |
2054 | |
2055 | friend AADepGraph; |
2056 | friend AttributorCallGraph; |
2057 | }; |
2058 | |
2059 | /// An interface to query the internal state of an abstract attribute. |
2060 | /// |
2061 | /// The abstract state is a minimal interface that allows the Attributor to |
2062 | /// communicate with the abstract attributes about their internal state without |
2063 | /// enforcing or exposing implementation details, e.g., the (existence of an) |
2064 | /// underlying lattice. |
2065 | /// |
2066 | /// It is sufficient to be able to query if a state is (1) valid or invalid, (2) |
2067 | /// at a fixpoint, and to indicate to the state that (3) an optimistic fixpoint |
2068 | /// was reached or (4) a pessimistic fixpoint was enforced. |
2069 | /// |
2070 | /// All methods need to be implemented by the subclass. For the common use case, |
2071 | /// a single boolean state or a bit-encoded state, the BooleanState and |
2072 | /// {Inc,Dec,Bit}IntegerState classes are already provided. An abstract |
2073 | /// attribute can inherit from them to get the abstract state interface and |
2074 | /// additional methods to directly modify the state based if needed. See the |
2075 | /// class comments for help. |
2076 | struct AbstractState { |
2077 | virtual ~AbstractState() {} |
2078 | |
2079 | /// Return if this abstract state is in a valid state. If false, no |
2080 | /// information provided should be used. |
2081 | virtual bool isValidState() const = 0; |
2082 | |
2083 | /// Return if this abstract state is fixed, thus does not need to be updated |
2084 | /// if information changes as it cannot change itself. |
2085 | virtual bool isAtFixpoint() const = 0; |
2086 | |
2087 | /// Indicate that the abstract state should converge to the optimistic state. |
2088 | /// |
2089 | /// This will usually make the optimistically assumed state the known to be |
2090 | /// true state. |
2091 | /// |
2092 | /// \returns ChangeStatus::UNCHANGED as the assumed value should not change. |
2093 | virtual ChangeStatus indicateOptimisticFixpoint() = 0; |
2094 | |
2095 | /// Indicate that the abstract state should converge to the pessimistic state. |
2096 | /// |
2097 | /// This will usually revert the optimistically assumed state to the known to |
2098 | /// be true state. |
2099 | /// |
2100 | /// \returns ChangeStatus::CHANGED as the assumed value may change. |
2101 | virtual ChangeStatus indicatePessimisticFixpoint() = 0; |
2102 | }; |
2103 | |
2104 | /// Simple state with integers encoding. |
2105 | /// |
2106 | /// The interface ensures that the assumed bits are always a subset of the known |
2107 | /// bits. Users can only add known bits and, except through adding known bits, |
2108 | /// they can only remove assumed bits. This should guarantee monotoniticy and |
2109 | /// thereby the existence of a fixpoint (if used corretly). The fixpoint is |
2110 | /// reached when the assumed and known state/bits are equal. Users can |
2111 | /// force/inidicate a fixpoint. If an optimistic one is indicated, the known |
2112 | /// state will catch up with the assumed one, for a pessimistic fixpoint it is |
2113 | /// the other way around. |
2114 | template <typename base_ty, base_ty BestState, base_ty WorstState> |
2115 | struct IntegerStateBase : public AbstractState { |
2116 | using base_t = base_ty; |
2117 | |
2118 | IntegerStateBase() {} |
2119 | IntegerStateBase(base_t Assumed) : Assumed(Assumed) {} |
2120 | |
2121 | /// Return the best possible representable state. |
2122 | static constexpr base_t getBestState() { return BestState; } |
2123 | static constexpr base_t getBestState(const IntegerStateBase &) { |
2124 | return getBestState(); |
2125 | } |
2126 | |
2127 | /// Return the worst possible representable state. |
2128 | static constexpr base_t getWorstState() { return WorstState; } |
2129 | static constexpr base_t getWorstState(const IntegerStateBase &) { |
2130 | return getWorstState(); |
2131 | } |
2132 | |
2133 | /// See AbstractState::isValidState() |
2134 | /// NOTE: For now we simply pretend that the worst possible state is invalid. |
2135 | bool isValidState() const override { return Assumed != getWorstState(); } |
2136 | |
2137 | /// See AbstractState::isAtFixpoint() |
2138 | bool isAtFixpoint() const override { return Assumed == Known; } |
2139 | |
2140 | /// See AbstractState::indicateOptimisticFixpoint(...) |
2141 | ChangeStatus indicateOptimisticFixpoint() override { |
2142 | Known = Assumed; |
2143 | return ChangeStatus::UNCHANGED; |
2144 | } |
2145 | |
2146 | /// See AbstractState::indicatePessimisticFixpoint(...) |
2147 | ChangeStatus indicatePessimisticFixpoint() override { |
2148 | Assumed = Known; |
2149 | return ChangeStatus::CHANGED; |
2150 | } |
2151 | |
2152 | /// Return the known state encoding |
2153 | base_t getKnown() const { return Known; } |
2154 | |
2155 | /// Return the assumed state encoding. |
2156 | base_t getAssumed() const { return Assumed; } |
2157 | |
2158 | /// Equality for IntegerStateBase. |
2159 | bool |
2160 | operator==(const IntegerStateBase<base_t, BestState, WorstState> &R) const { |
2161 | return this->getAssumed() == R.getAssumed() && |
2162 | this->getKnown() == R.getKnown(); |
2163 | } |
2164 | |
2165 | /// Inequality for IntegerStateBase. |
2166 | bool |
2167 | operator!=(const IntegerStateBase<base_t, BestState, WorstState> &R) const { |
2168 | return !(*this == R); |
2169 | } |
2170 | |
2171 | /// "Clamp" this state with \p R. The result is subtype dependent but it is |
2172 | /// intended that only information assumed in both states will be assumed in |
2173 | /// this one afterwards. |
2174 | void operator^=(const IntegerStateBase<base_t, BestState, WorstState> &R) { |
2175 | handleNewAssumedValue(R.getAssumed()); |
2176 | } |
2177 | |
2178 | /// "Clamp" this state with \p R. The result is subtype dependent but it is |
2179 | /// intended that information known in either state will be known in |
2180 | /// this one afterwards. |
2181 | void operator+=(const IntegerStateBase<base_t, BestState, WorstState> &R) { |
2182 | handleNewKnownValue(R.getKnown()); |
2183 | } |
2184 | |
2185 | void operator|=(const IntegerStateBase<base_t, BestState, WorstState> &R) { |
2186 | joinOR(R.getAssumed(), R.getKnown()); |
2187 | } |
2188 | |
2189 | void operator&=(const IntegerStateBase<base_t, BestState, WorstState> &R) { |
2190 | joinAND(R.getAssumed(), R.getKnown()); |
2191 | } |
2192 | |
2193 | protected: |
2194 | /// Handle a new assumed value \p Value. Subtype dependent. |
2195 | virtual void handleNewAssumedValue(base_t Value) = 0; |
2196 | |
2197 | /// Handle a new known value \p Value. Subtype dependent. |
2198 | virtual void handleNewKnownValue(base_t Value) = 0; |
2199 | |
2200 | /// Handle a value \p Value. Subtype dependent. |
2201 | virtual void joinOR(base_t AssumedValue, base_t KnownValue) = 0; |
2202 | |
2203 | /// Handle a new assumed value \p Value. Subtype dependent. |
2204 | virtual void joinAND(base_t AssumedValue, base_t KnownValue) = 0; |
2205 | |
2206 | /// The known state encoding in an integer of type base_t. |
2207 | base_t Known = getWorstState(); |
2208 | |
2209 | /// The assumed state encoding in an integer of type base_t. |
2210 | base_t Assumed = getBestState(); |
2211 | }; |
2212 | |
2213 | /// Specialization of the integer state for a bit-wise encoding. |
2214 | template <typename base_ty = uint32_t, base_ty BestState = ~base_ty(0), |
2215 | base_ty WorstState = 0> |
2216 | struct BitIntegerState |
2217 | : public IntegerStateBase<base_ty, BestState, WorstState> { |
2218 | using base_t = base_ty; |
2219 | |
2220 | /// Return true if the bits set in \p BitsEncoding are "known bits". |
2221 | bool isKnown(base_t BitsEncoding) const { |
2222 | return (this->Known & BitsEncoding) == BitsEncoding; |
2223 | } |
2224 | |
2225 | /// Return true if the bits set in \p BitsEncoding are "assumed bits". |
2226 | bool isAssumed(base_t BitsEncoding) const { |
2227 | return (this->Assumed & BitsEncoding) == BitsEncoding; |
2228 | } |
2229 | |
2230 | /// Add the bits in \p BitsEncoding to the "known bits". |
2231 | BitIntegerState &addKnownBits(base_t Bits) { |
2232 | // Make sure we never miss any "known bits". |
2233 | this->Assumed |= Bits; |
2234 | this->Known |= Bits; |
2235 | return *this; |
2236 | } |
2237 | |
2238 | /// Remove the bits in \p BitsEncoding from the "assumed bits" if not known. |
2239 | BitIntegerState &removeAssumedBits(base_t BitsEncoding) { |
2240 | return intersectAssumedBits(~BitsEncoding); |
2241 | } |
2242 | |
2243 | /// Remove the bits in \p BitsEncoding from the "known bits". |
2244 | BitIntegerState &removeKnownBits(base_t BitsEncoding) { |
2245 | this->Known = (this->Known & ~BitsEncoding); |
2246 | return *this; |
2247 | } |
2248 | |
2249 | /// Keep only "assumed bits" also set in \p BitsEncoding but all known ones. |
2250 | BitIntegerState &intersectAssumedBits(base_t BitsEncoding) { |
2251 | // Make sure we never loose any "known bits". |
2252 | this->Assumed = (this->Assumed & BitsEncoding) | this->Known; |
2253 | return *this; |
2254 | } |
2255 | |
2256 | private: |
2257 | void handleNewAssumedValue(base_t Value) override { |
2258 | intersectAssumedBits(Value); |
2259 | } |
2260 | void handleNewKnownValue(base_t Value) override { addKnownBits(Value); } |
2261 | void joinOR(base_t AssumedValue, base_t KnownValue) override { |
2262 | this->Known |= KnownValue; |
2263 | this->Assumed |= AssumedValue; |
2264 | } |
2265 | void joinAND(base_t AssumedValue, base_t KnownValue) override { |
2266 | this->Known &= KnownValue; |
2267 | this->Assumed &= AssumedValue; |
2268 | } |
2269 | }; |
2270 | |
2271 | /// Specialization of the integer state for an increasing value, hence ~0u is |
2272 | /// the best state and 0 the worst. |
2273 | template <typename base_ty = uint32_t, base_ty BestState = ~base_ty(0), |
2274 | base_ty WorstState = 0> |
2275 | struct IncIntegerState |
2276 | : public IntegerStateBase<base_ty, BestState, WorstState> { |
2277 | using super = IntegerStateBase<base_ty, BestState, WorstState>; |
2278 | using base_t = base_ty; |
2279 | |
2280 | IncIntegerState() : super() {} |
2281 | IncIntegerState(base_t Assumed) : super(Assumed) {} |
2282 | |
2283 | /// Return the best possible representable state. |
2284 | static constexpr base_t getBestState() { return BestState; } |
2285 | static constexpr base_t |
2286 | getBestState(const IncIntegerState<base_ty, BestState, WorstState> &) { |
2287 | return getBestState(); |
2288 | } |
2289 | |
2290 | /// Take minimum of assumed and \p Value. |
2291 | IncIntegerState &takeAssumedMinimum(base_t Value) { |
2292 | // Make sure we never loose "known value". |
2293 | this->Assumed = std::max(std::min(this->Assumed, Value), this->Known); |
2294 | return *this; |
2295 | } |
2296 | |
2297 | /// Take maximum of known and \p Value. |
2298 | IncIntegerState &takeKnownMaximum(base_t Value) { |
2299 | // Make sure we never loose "known value". |
2300 | this->Assumed = std::max(Value, this->Assumed); |
2301 | this->Known = std::max(Value, this->Known); |
2302 | return *this; |
2303 | } |
2304 | |
2305 | private: |
2306 | void handleNewAssumedValue(base_t Value) override { |
2307 | takeAssumedMinimum(Value); |
2308 | } |
2309 | void handleNewKnownValue(base_t Value) override { takeKnownMaximum(Value); } |
2310 | void joinOR(base_t AssumedValue, base_t KnownValue) override { |
2311 | this->Known = std::max(this->Known, KnownValue); |
2312 | this->Assumed = std::max(this->Assumed, AssumedValue); |
2313 | } |
2314 | void joinAND(base_t AssumedValue, base_t KnownValue) override { |
2315 | this->Known = std::min(this->Known, KnownValue); |
2316 | this->Assumed = std::min(this->Assumed, AssumedValue); |
2317 | } |
2318 | }; |
2319 | |
2320 | /// Specialization of the integer state for a decreasing value, hence 0 is the |
2321 | /// best state and ~0u the worst. |
2322 | template <typename base_ty = uint32_t> |
2323 | struct DecIntegerState : public IntegerStateBase<base_ty, 0, ~base_ty(0)> { |
2324 | using base_t = base_ty; |
2325 | |
2326 | /// Take maximum of assumed and \p Value. |
2327 | DecIntegerState &takeAssumedMaximum(base_t Value) { |
2328 | // Make sure we never loose "known value". |
2329 | this->Assumed = std::min(std::max(this->Assumed, Value), this->Known); |
2330 | return *this; |
2331 | } |
2332 | |
2333 | /// Take minimum of known and \p Value. |
2334 | DecIntegerState &takeKnownMinimum(base_t Value) { |
2335 | // Make sure we never loose "known value". |
2336 | this->Assumed = std::min(Value, this->Assumed); |
2337 | this->Known = std::min(Value, this->Known); |
2338 | return *this; |
2339 | } |
2340 | |
2341 | private: |
2342 | void handleNewAssumedValue(base_t Value) override { |
2343 | takeAssumedMaximum(Value); |
2344 | } |
2345 | void handleNewKnownValue(base_t Value) override { takeKnownMinimum(Value); } |
2346 | void joinOR(base_t AssumedValue, base_t KnownValue) override { |
2347 | this->Assumed = std::min(this->Assumed, KnownValue); |
2348 | this->Assumed = std::min(this->Assumed, AssumedValue); |
2349 | } |
2350 | void joinAND(base_t AssumedValue, base_t KnownValue) override { |
2351 | this->Assumed = std::max(this->Assumed, KnownValue); |
2352 | this->Assumed = std::max(this->Assumed, AssumedValue); |
2353 | } |
2354 | }; |
2355 | |
2356 | /// Simple wrapper for a single bit (boolean) state. |
2357 | struct BooleanState : public IntegerStateBase<bool, 1, 0> { |
2358 | using super = IntegerStateBase<bool, 1, 0>; |
2359 | using base_t = IntegerStateBase::base_t; |
2360 | |
2361 | BooleanState() : super() {} |
2362 | BooleanState(base_t Assumed) : super(Assumed) {} |
2363 | |
2364 | /// Set the assumed value to \p Value but never below the known one. |
2365 | void setAssumed(bool Value) { Assumed &= (Known | Value); } |
2366 | |
2367 | /// Set the known and asssumed value to \p Value. |
2368 | void setKnown(bool Value) { |
2369 | Known |= Value; |
2370 | Assumed |= Value; |
2371 | } |
2372 | |
2373 | /// Return true if the state is assumed to hold. |
2374 | bool isAssumed() const { return getAssumed(); } |
2375 | |
2376 | /// Return true if the state is known to hold. |
2377 | bool isKnown() const { return getKnown(); } |
2378 | |
2379 | private: |
2380 | void handleNewAssumedValue(base_t Value) override { |
2381 | if (!Value) |
2382 | Assumed = Known; |
2383 | } |
2384 | void handleNewKnownValue(base_t Value) override { |
2385 | if (Value) |
2386 | Known = (Assumed = Value); |
2387 | } |
2388 | void joinOR(base_t AssumedValue, base_t KnownValue) override { |
2389 | Known |= KnownValue; |
2390 | Assumed |= AssumedValue; |
2391 | } |
2392 | void joinAND(base_t AssumedValue, base_t KnownValue) override { |
2393 | Known &= KnownValue; |
2394 | Assumed &= AssumedValue; |
2395 | } |
2396 | }; |
2397 | |
2398 | /// State for an integer range. |
2399 | struct IntegerRangeState : public AbstractState { |
2400 | |
2401 | /// Bitwidth of the associated value. |
2402 | uint32_t BitWidth; |
2403 | |
2404 | /// State representing assumed range, initially set to empty. |
2405 | ConstantRange Assumed; |
2406 | |
2407 | /// State representing known range, initially set to [-inf, inf]. |
2408 | ConstantRange Known; |
2409 | |
2410 | IntegerRangeState(uint32_t BitWidth) |
2411 | : BitWidth(BitWidth), Assumed(ConstantRange::getEmpty(BitWidth)), |
2412 | Known(ConstantRange::getFull(BitWidth)) {} |
2413 | |
2414 | IntegerRangeState(const ConstantRange &CR) |
2415 | : BitWidth(CR.getBitWidth()), Assumed(CR), |
2416 | Known(getWorstState(CR.getBitWidth())) {} |
2417 | |
2418 | /// Return the worst possible representable state. |
2419 | static ConstantRange getWorstState(uint32_t BitWidth) { |
2420 | return ConstantRange::getFull(BitWidth); |
2421 | } |
2422 | |
2423 | /// Return the best possible representable state. |
2424 | static ConstantRange getBestState(uint32_t BitWidth) { |
2425 | return ConstantRange::getEmpty(BitWidth); |
2426 | } |
2427 | static ConstantRange getBestState(const IntegerRangeState &IRS) { |
2428 | return getBestState(IRS.getBitWidth()); |
2429 | } |
2430 | |
2431 | /// Return associated values' bit width. |
2432 | uint32_t getBitWidth() const { return BitWidth; } |
2433 | |
2434 | /// See AbstractState::isValidState() |
2435 | bool isValidState() const override { |
2436 | return BitWidth > 0 && !Assumed.isFullSet(); |
2437 | } |
2438 | |
2439 | /// See AbstractState::isAtFixpoint() |
2440 | bool isAtFixpoint() const override { return Assumed == Known; } |
2441 | |
2442 | /// See AbstractState::indicateOptimisticFixpoint(...) |
2443 | ChangeStatus indicateOptimisticFixpoint() override { |
2444 | Known = Assumed; |
2445 | return ChangeStatus::CHANGED; |
2446 | } |
2447 | |
2448 | /// See AbstractState::indicatePessimisticFixpoint(...) |
2449 | ChangeStatus indicatePessimisticFixpoint() override { |
2450 | Assumed = Known; |
2451 | return ChangeStatus::CHANGED; |
2452 | } |
2453 | |
2454 | /// Return the known state encoding |
2455 | ConstantRange getKnown() const { return Known; } |
2456 | |
2457 | /// Return the assumed state encoding. |
2458 | ConstantRange getAssumed() const { return Assumed; } |
2459 | |
2460 | /// Unite assumed range with the passed state. |
2461 | void unionAssumed(const ConstantRange &R) { |
2462 | // Don't loose a known range. |
2463 | Assumed = Assumed.unionWith(R).intersectWith(Known); |
2464 | } |
2465 | |
2466 | /// See IntegerRangeState::unionAssumed(..). |
2467 | void unionAssumed(const IntegerRangeState &R) { |
2468 | unionAssumed(R.getAssumed()); |
2469 | } |
2470 | |
2471 | /// Unite known range with the passed state. |
2472 | void unionKnown(const ConstantRange &R) { |
2473 | // Don't loose a known range. |
2474 | Known = Known.unionWith(R); |
2475 | Assumed = Assumed.unionWith(Known); |
2476 | } |
2477 | |
2478 | /// See IntegerRangeState::unionKnown(..). |
2479 | void unionKnown(const IntegerRangeState &R) { unionKnown(R.getKnown()); } |
2480 | |
2481 | /// Intersect known range with the passed state. |
2482 | void intersectKnown(const ConstantRange &R) { |
2483 | Assumed = Assumed.intersectWith(R); |
2484 | Known = Known.intersectWith(R); |
2485 | } |
2486 | |
2487 | /// See IntegerRangeState::intersectKnown(..). |
2488 | void intersectKnown(const IntegerRangeState &R) { |
2489 | intersectKnown(R.getKnown()); |
2490 | } |
2491 | |
2492 | /// Equality for IntegerRangeState. |
2493 | bool operator==(const IntegerRangeState &R) const { |
2494 | return getAssumed() == R.getAssumed() && getKnown() == R.getKnown(); |
2495 | } |
2496 | |
2497 | /// "Clamp" this state with \p R. The result is subtype dependent but it is |
2498 | /// intended that only information assumed in both states will be assumed in |
2499 | /// this one afterwards. |
2500 | IntegerRangeState operator^=(const IntegerRangeState &R) { |
2501 | // NOTE: `^=` operator seems like `intersect` but in this case, we need to |
2502 | // take `union`. |
2503 | unionAssumed(R); |
2504 | return *this; |
2505 | } |
2506 | |
2507 | IntegerRangeState operator&=(const IntegerRangeState &R) { |
2508 | // NOTE: `&=` operator seems like `intersect` but in this case, we need to |
2509 | // take `union`. |
2510 | unionKnown(R); |
2511 | unionAssumed(R); |
2512 | return *this; |
2513 | } |
2514 | }; |
2515 | |
2516 | /// Simple state for a set. |
2517 | /// |
2518 | /// This represents a state containing a set of values. The interface supports |
2519 | /// modelling sets that contain all possible elements. The state's internal |
2520 | /// value is modified using union or intersection operations. |
2521 | template <typename BaseTy> struct SetState : public AbstractState { |
2522 | /// A wrapper around a set that has semantics for handling unions and |
2523 | /// intersections with a "universal" set that contains all elements. |
2524 | struct SetContents { |
2525 | /// Creates a universal set with no concrete elements or an empty set. |
2526 | SetContents(bool Universal) : Universal(Universal) {} |
2527 | |
2528 | /// Creates a non-universal set with concrete values. |
2529 | SetContents(const DenseSet<BaseTy> &Assumptions) |
2530 | : Universal(false), Set(Assumptions) {} |
2531 | |
2532 | SetContents(bool Universal, const DenseSet<BaseTy> &Assumptions) |
2533 | : Universal(Universal), Set(Assumptions) {} |
2534 | |
2535 | const DenseSet<BaseTy> &getSet() const { return Set; } |
2536 | |
2537 | bool isUniversal() const { return Universal; } |
2538 | |
2539 | bool empty() const { return Set.empty() && !Universal; } |
2540 | |
2541 | /// Finds A := A ^ B where A or B could be the "Universal" set which |
2542 | /// contains every possible attribute. Returns true if changes were made. |
2543 | bool getIntersection(const SetContents &RHS) { |
2544 | bool IsUniversal = Universal; |
2545 | unsigned Size = Set.size(); |
2546 | |
2547 | // A := A ^ U = A |
2548 | if (RHS.isUniversal()) |
2549 | return false; |
2550 | |
2551 | // A := U ^ B = B |
2552 | if (Universal) |
2553 | Set = RHS.getSet(); |
2554 | else |
2555 | set_intersect(Set, RHS.getSet()); |
2556 | |
2557 | Universal &= RHS.isUniversal(); |
2558 | return IsUniversal != Universal || Size != Set.size(); |
2559 | } |
2560 | |
2561 | /// Finds A := A u B where A or B could be the "Universal" set which |
2562 | /// contains every possible attribute. returns true if changes were made. |
2563 | bool getUnion(const SetContents &RHS) { |
2564 | bool IsUniversal = Universal; |
2565 | unsigned Size = Set.size(); |
2566 | |
2567 | // A := A u U = U = U u B |
2568 | if (!RHS.isUniversal() && !Universal) |
2569 | set_union(Set, RHS.getSet()); |
2570 | |
2571 | Universal |= RHS.isUniversal(); |
2572 | return IsUniversal != Universal || Size != Set.size(); |
2573 | } |
2574 | |
2575 | private: |
2576 | /// Indicates if this set is "universal", containing every possible element. |
2577 | bool Universal; |
2578 | |
2579 | /// The set of currently active assumptions. |
2580 | DenseSet<BaseTy> Set; |
2581 | }; |
2582 | |
2583 | SetState() : Known(false), Assumed(true), IsAtFixedpoint(false) {} |
2584 | |
2585 | /// Initializes the known state with an initial set and initializes the |
2586 | /// assumed state as universal. |
2587 | SetState(const DenseSet<BaseTy> &Known) |
2588 | : Known(Known), Assumed(true), IsAtFixedpoint(false) {} |
2589 | |
2590 | /// See AbstractState::isValidState() |
2591 | bool isValidState() const override { return !Assumed.empty(); } |
2592 | |
2593 | /// See AbstractState::isAtFixpoint() |
2594 | bool isAtFixpoint() const override { return IsAtFixedpoint; } |
2595 | |
2596 | /// See AbstractState::indicateOptimisticFixpoint(...) |
2597 | ChangeStatus indicateOptimisticFixpoint() override { |
2598 | IsAtFixedpoint = true; |
2599 | Known = Assumed; |
2600 | return ChangeStatus::UNCHANGED; |
2601 | } |
2602 | |
2603 | /// See AbstractState::indicatePessimisticFixpoint(...) |
2604 | ChangeStatus indicatePessimisticFixpoint() override { |
2605 | IsAtFixedpoint = true; |
2606 | Assumed = Known; |
2607 | return ChangeStatus::CHANGED; |
2608 | } |
2609 | |
2610 | /// Return the known state encoding. |
2611 | const SetContents &getKnown() const { return Known; } |
2612 | |
2613 | /// Return the assumed state encoding. |
2614 | const SetContents &getAssumed() const { return Assumed; } |
2615 | |
2616 | /// Returns if the set state contains the element. |
2617 | bool setContains(const BaseTy &Elem) const { |
2618 | return Assumed.getSet().contains(Elem) || Known.getSet().contains(Elem); |
2619 | } |
2620 | |
2621 | /// Performs the set intersection between this set and \p RHS. Returns true if |
2622 | /// changes were made. |
2623 | bool getIntersection(const SetContents &RHS) { |
2624 | unsigned SizeBefore = Assumed.getSet().size(); |
2625 | |
2626 | // Get intersection and make sure that the known set is still a proper |
2627 | // subset of the assumed set. A := K u (A ^ R). |
2628 | Assumed.getIntersection(RHS); |
2629 | Assumed.getUnion(Known); |
2630 | |
2631 | return SizeBefore != Assumed.getSet().size(); |
2632 | } |
2633 | |
2634 | /// Performs the set union between this set and \p RHS. Returns true if |
2635 | /// changes were made. |
2636 | bool getUnion(const SetContents &RHS) { return Assumed.getUnion(RHS); } |
2637 | |
2638 | private: |
2639 | /// The set of values known for this state. |
2640 | SetContents Known; |
2641 | |
2642 | /// The set of assumed values for this state. |
2643 | SetContents Assumed; |
2644 | |
2645 | bool IsAtFixedpoint; |
2646 | }; |
2647 | |
2648 | /// Helper struct necessary as the modular build fails if the virtual method |
2649 | /// IRAttribute::manifest is defined in the Attributor.cpp. |
2650 | struct IRAttributeManifest { |
2651 | static ChangeStatus manifestAttrs(Attributor &A, const IRPosition &IRP, |
2652 | const ArrayRef<Attribute> &DeducedAttrs, |
2653 | bool ForceReplace = false); |
2654 | }; |
2655 | |
2656 | /// Helper to tie a abstract state implementation to an abstract attribute. |
2657 | template <typename StateTy, typename BaseType, class... Ts> |
2658 | struct StateWrapper : public BaseType, public StateTy { |
2659 | /// Provide static access to the type of the state. |
2660 | using StateType = StateTy; |
2661 | |
2662 | StateWrapper(const IRPosition &IRP, Ts... Args) |
2663 | : BaseType(IRP), StateTy(Args...) {} |
2664 | |
2665 | /// See AbstractAttribute::getState(...). |
2666 | StateType &getState() override { return *this; } |
2667 | |
2668 | /// See AbstractAttribute::getState(...). |
2669 | const StateType &getState() const override { return *this; } |
2670 | }; |
2671 | |
2672 | /// Helper class that provides common functionality to manifest IR attributes. |
2673 | template <Attribute::AttrKind AK, typename BaseType> |
2674 | struct IRAttribute : public BaseType { |
2675 | IRAttribute(const IRPosition &IRP) : BaseType(IRP) {} |
2676 | |
2677 | /// See AbstractAttribute::initialize(...). |
2678 | virtual void initialize(Attributor &A) override { |
2679 | const IRPosition &IRP = this->getIRPosition(); |
2680 | if (isa<UndefValue>(IRP.getAssociatedValue()) || |
2681 | this->hasAttr(getAttrKind(), /* IgnoreSubsumingPositions */ false, |
2682 | &A)) { |
2683 | this->getState().indicateOptimisticFixpoint(); |
2684 | return; |
2685 | } |
2686 | |
2687 | bool IsFnInterface = IRP.isFnInterfaceKind(); |
2688 | const Function *FnScope = IRP.getAnchorScope(); |
2689 | // TODO: Not all attributes require an exact definition. Find a way to |
2690 | // enable deduction for some but not all attributes in case the |
2691 | // definition might be changed at runtime, see also |
2692 | // http://lists.llvm.org/pipermail/llvm-dev/2018-February/121275.html. |
2693 | // TODO: We could always determine abstract attributes and if sufficient |
2694 | // information was found we could duplicate the functions that do not |
2695 | // have an exact definition. |
2696 | if (IsFnInterface && (!FnScope || !A.isFunctionIPOAmendable(*FnScope))) |
2697 | this->getState().indicatePessimisticFixpoint(); |
2698 | } |
2699 | |
2700 | /// See AbstractAttribute::manifest(...). |
2701 | ChangeStatus manifest(Attributor &A) override { |
2702 | if (isa<UndefValue>(this->getIRPosition().getAssociatedValue())) |
2703 | return ChangeStatus::UNCHANGED; |
2704 | SmallVector<Attribute, 4> DeducedAttrs; |
2705 | getDeducedAttributes(this->getAnchorValue().getContext(), DeducedAttrs); |
2706 | return IRAttributeManifest::manifestAttrs(A, this->getIRPosition(), |
2707 | DeducedAttrs); |
2708 | } |
2709 | |
2710 | /// Return the kind that identifies the abstract attribute implementation. |
2711 | Attribute::AttrKind getAttrKind() const { return AK; } |
2712 | |
2713 | /// Return the deduced attributes in \p Attrs. |
2714 | virtual void getDeducedAttributes(LLVMContext &Ctx, |
2715 | SmallVectorImpl<Attribute> &Attrs) const { |
2716 | Attrs.emplace_back(Attribute::get(Ctx, getAttrKind())); |
2717 | } |
2718 | }; |
2719 | |
2720 | /// Base struct for all "concrete attribute" deductions. |
2721 | /// |
2722 | /// The abstract attribute is a minimal interface that allows the Attributor to |
2723 | /// orchestrate the abstract/fixpoint analysis. The design allows to hide away |
2724 | /// implementation choices made for the subclasses but also to structure their |
2725 | /// implementation and simplify the use of other abstract attributes in-flight. |
2726 | /// |
2727 | /// To allow easy creation of new attributes, most methods have default |
2728 | /// implementations. The ones that do not are generally straight forward, except |
2729 | /// `AbstractAttribute::updateImpl` which is the location of most reasoning |
2730 | /// associated with the abstract attribute. The update is invoked by the |
2731 | /// Attributor in case the situation used to justify the current optimistic |
2732 | /// state might have changed. The Attributor determines this automatically |
2733 | /// by monitoring the `Attributor::getAAFor` calls made by abstract attributes. |
2734 | /// |
2735 | /// The `updateImpl` method should inspect the IR and other abstract attributes |
2736 | /// in-flight to justify the best possible (=optimistic) state. The actual |
2737 | /// implementation is, similar to the underlying abstract state encoding, not |
2738 | /// exposed. In the most common case, the `updateImpl` will go through a list of |
2739 | /// reasons why its optimistic state is valid given the current information. If |
2740 | /// any combination of them holds and is sufficient to justify the current |
2741 | /// optimistic state, the method shall return UNCHAGED. If not, the optimistic |
2742 | /// state is adjusted to the situation and the method shall return CHANGED. |
2743 | /// |
2744 | /// If the manifestation of the "concrete attribute" deduced by the subclass |
2745 | /// differs from the "default" behavior, which is a (set of) LLVM-IR |
2746 | /// attribute(s) for an argument, call site argument, function return value, or |
2747 | /// function, the `AbstractAttribute::manifest` method should be overloaded. |
2748 | /// |
2749 | /// NOTE: If the state obtained via getState() is INVALID, thus if |
2750 | /// AbstractAttribute::getState().isValidState() returns false, no |
2751 | /// information provided by the methods of this class should be used. |
2752 | /// NOTE: The Attributor currently has certain limitations to what we can do. |
2753 | /// As a general rule of thumb, "concrete" abstract attributes should *for |
2754 | /// now* only perform "backward" information propagation. That means |
2755 | /// optimistic information obtained through abstract attributes should |
2756 | /// only be used at positions that precede the origin of the information |
2757 | /// with regards to the program flow. More practically, information can |
2758 | /// *now* be propagated from instructions to their enclosing function, but |
2759 | /// *not* from call sites to the called function. The mechanisms to allow |
2760 | /// both directions will be added in the future. |
2761 | /// NOTE: The mechanics of adding a new "concrete" abstract attribute are |
2762 | /// described in the file comment. |
2763 | struct AbstractAttribute : public IRPosition, public AADepGraphNode { |
2764 | using StateType = AbstractState; |
2765 | |
2766 | AbstractAttribute(const IRPosition &IRP) : IRPosition(IRP) {} |
2767 | |
2768 | /// Virtual destructor. |
2769 | virtual ~AbstractAttribute() {} |
2770 | |
2771 | /// This function is used to identify if an \p DGN is of type |
2772 | /// AbstractAttribute so that the dyn_cast and cast can use such information |
2773 | /// to cast an AADepGraphNode to an AbstractAttribute. |
2774 | /// |
2775 | /// We eagerly return true here because all AADepGraphNodes except for the |
2776 | /// Synthethis Node are of type AbstractAttribute |
2777 | static bool classof(const AADepGraphNode *DGN) { return true; } |
2778 | |
2779 | /// Initialize the state with the information in the Attributor \p A. |
2780 | /// |
2781 | /// This function is called by the Attributor once all abstract attributes |
2782 | /// have been identified. It can and shall be used for task like: |
2783 | /// - identify existing knowledge in the IR and use it for the "known state" |
2784 | /// - perform any work that is not going to change over time, e.g., determine |
2785 | /// a subset of the IR, or attributes in-flight, that have to be looked at |
2786 | /// in the `updateImpl` method. |
2787 | virtual void initialize(Attributor &A) {} |
2788 | |
2789 | /// Return the internal abstract state for inspection. |
2790 | virtual StateType &getState() = 0; |
2791 | virtual const StateType &getState() const = 0; |
2792 | |
2793 | /// Return an IR position, see struct IRPosition. |
2794 | const IRPosition &getIRPosition() const { return *this; }; |
2795 | IRPosition &getIRPosition() { return *this; }; |
2796 | |
2797 | /// Helper functions, for debug purposes only. |
2798 | ///{ |
2799 | void print(raw_ostream &OS) const override; |
2800 | virtual void printWithDeps(raw_ostream &OS) const; |
2801 | void dump() const { print(dbgs()); } |
2802 | |
2803 | /// This function should return the "summarized" assumed state as string. |
2804 | virtual const std::string getAsStr() const = 0; |
2805 | |
2806 | /// This function should return the name of the AbstractAttribute |
2807 | virtual const std::string getName() const = 0; |
2808 | |
2809 | /// This function should return the address of the ID of the AbstractAttribute |
2810 | virtual const char *getIdAddr() const = 0; |
2811 | ///} |
2812 | |
2813 | /// Allow the Attributor access to the protected methods. |
2814 | friend struct Attributor; |
2815 | |
2816 | protected: |
2817 | /// Hook for the Attributor to trigger an update of the internal state. |
2818 | /// |
2819 | /// If this attribute is already fixed, this method will return UNCHANGED, |
2820 | /// otherwise it delegates to `AbstractAttribute::updateImpl`. |
2821 | /// |
2822 | /// \Return CHANGED if the internal state changed, otherwise UNCHANGED. |
2823 | ChangeStatus update(Attributor &A); |
2824 | |
2825 | /// Hook for the Attributor to trigger the manifestation of the information |
2826 | /// represented by the abstract attribute in the LLVM-IR. |
2827 | /// |
2828 | /// \Return CHANGED if the IR was altered, otherwise UNCHANGED. |
2829 | virtual ChangeStatus manifest(Attributor &A) { |
2830 | return ChangeStatus::UNCHANGED; |
2831 | } |
2832 | |
2833 | /// Hook to enable custom statistic tracking, called after manifest that |
2834 | /// resulted in a change if statistics are enabled. |
2835 | /// |
2836 | /// We require subclasses to provide an implementation so we remember to |
2837 | /// add statistics for them. |
2838 | virtual void trackStatistics() const = 0; |
2839 | |
2840 | /// The actual update/transfer function which has to be implemented by the |
2841 | /// derived classes. |
2842 | /// |
2843 | /// If it is called, the environment has changed and we have to determine if |
2844 | /// the current information is still valid or adjust it otherwise. |
2845 | /// |
2846 | /// \Return CHANGED if the internal state changed, otherwise UNCHANGED. |
2847 | virtual ChangeStatus updateImpl(Attributor &A) = 0; |
2848 | }; |
2849 | |
2850 | /// Forward declarations of output streams for debug purposes. |
2851 | /// |
2852 | ///{ |
2853 | raw_ostream &operator<<(raw_ostream &OS, const AbstractAttribute &AA); |
2854 | raw_ostream &operator<<(raw_ostream &OS, ChangeStatus S); |
2855 | raw_ostream &operator<<(raw_ostream &OS, IRPosition::Kind); |
2856 | raw_ostream &operator<<(raw_ostream &OS, const IRPosition &); |
2857 | raw_ostream &operator<<(raw_ostream &OS, const AbstractState &State); |
2858 | template <typename base_ty, base_ty BestState, base_ty WorstState> |
2859 | raw_ostream & |
2860 | operator<<(raw_ostream &OS, |
2861 | const IntegerStateBase<base_ty, BestState, WorstState> &S) { |
2862 | return OS << "(" << S.getKnown() << "-" << S.getAssumed() << ")" |
2863 | << static_cast<const AbstractState &>(S); |
2864 | } |
2865 | raw_ostream &operator<<(raw_ostream &OS, const IntegerRangeState &State); |
2866 | ///} |
2867 | |
2868 | struct AttributorPass : public PassInfoMixin<AttributorPass> { |
2869 | PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); |
2870 | }; |
2871 | struct AttributorCGSCCPass : public PassInfoMixin<AttributorCGSCCPass> { |
2872 | PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM, |
2873 | LazyCallGraph &CG, CGSCCUpdateResult &UR); |
2874 | }; |
2875 | |
2876 | Pass *createAttributorLegacyPass(); |
2877 | Pass *createAttributorCGSCCLegacyPass(); |
2878 | |
2879 | /// Helper function to clamp a state \p S of type \p StateType with the |
2880 | /// information in \p R and indicate/return if \p S did change (as-in update is |
2881 | /// required to be run again). |
2882 | template <typename StateType> |
2883 | ChangeStatus clampStateAndIndicateChange(StateType &S, const StateType &R) { |
2884 | auto Assumed = S.getAssumed(); |
2885 | S ^= R; |
2886 | return Assumed == S.getAssumed() ? ChangeStatus::UNCHANGED |
2887 | : ChangeStatus::CHANGED; |
2888 | } |
2889 | |
2890 | /// ---------------------------------------------------------------------------- |
2891 | /// Abstract Attribute Classes |
2892 | /// ---------------------------------------------------------------------------- |
2893 | |
2894 | /// An abstract attribute for the returned values of a function. |
2895 | struct AAReturnedValues |
2896 | : public IRAttribute<Attribute::Returned, AbstractAttribute> { |
2897 | AAReturnedValues(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {} |
2898 | |
2899 | /// Return an assumed unique return value if a single candidate is found. If |
2900 | /// there cannot be one, return a nullptr. If it is not clear yet, return the |
2901 | /// Optional::NoneType. |
2902 | Optional<Value *> getAssumedUniqueReturnValue(Attributor &A) const; |
2903 | |
2904 | /// Check \p Pred on all returned values. |
2905 | /// |
2906 | /// This method will evaluate \p Pred on returned values and return |
2907 | /// true if (1) all returned values are known, and (2) \p Pred returned true |
2908 | /// for all returned values. |
2909 | /// |
2910 | /// Note: Unlike the Attributor::checkForAllReturnedValuesAndReturnInsts |
2911 | /// method, this one will not filter dead return instructions. |
2912 | virtual bool checkForAllReturnedValuesAndReturnInsts( |
2913 | function_ref<bool(Value &, const SmallSetVector<ReturnInst *, 4> &)> Pred) |
2914 | const = 0; |
2915 | |
2916 | using iterator = |
2917 | MapVector<Value *, SmallSetVector<ReturnInst *, 4>>::iterator; |
2918 | using const_iterator = |
2919 | MapVector<Value *, SmallSetVector<ReturnInst *, 4>>::const_iterator; |
2920 | virtual llvm::iterator_range<iterator> returned_values() = 0; |
2921 | virtual llvm::iterator_range<const_iterator> returned_values() const = 0; |
2922 | |
2923 | virtual size_t getNumReturnValues() const = 0; |
2924 | |
2925 | /// Create an abstract attribute view for the position \p IRP. |
2926 | static AAReturnedValues &createForPosition(const IRPosition &IRP, |
2927 | Attributor &A); |
2928 | |
2929 | /// See AbstractAttribute::getName() |
2930 | const std::string getName() const override { return "AAReturnedValues"; } |
2931 | |
2932 | /// See AbstractAttribute::getIdAddr() |
2933 | const char *getIdAddr() const override { return &ID; } |
2934 | |
2935 | /// This function should return true if the type of the \p AA is |
2936 | /// AAReturnedValues |
2937 | static bool classof(const AbstractAttribute *AA) { |
2938 | return (AA->getIdAddr() == &ID); |
2939 | } |
2940 | |
2941 | /// Unique ID (due to the unique address) |
2942 | static const char ID; |
2943 | }; |
2944 | |
2945 | struct AANoUnwind |
2946 | : public IRAttribute<Attribute::NoUnwind, |
2947 | StateWrapper<BooleanState, AbstractAttribute>> { |
2948 | AANoUnwind(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {} |
2949 | |
2950 | /// Returns true if nounwind is assumed. |
2951 | bool isAssumedNoUnwind() const { return getAssumed(); } |
2952 | |
2953 | /// Returns true if nounwind is known. |
2954 | bool isKnownNoUnwind() const { return getKnown(); } |
2955 | |
2956 | /// Create an abstract attribute view for the position \p IRP. |
2957 | static AANoUnwind &createForPosition(const IRPosition &IRP, Attributor &A); |
2958 | |
2959 | /// See AbstractAttribute::getName() |
2960 | const std::string getName() const override { return "AANoUnwind"; } |
2961 | |
2962 | /// See AbstractAttribute::getIdAddr() |
2963 | const char *getIdAddr() const override { return &ID; } |
2964 | |
2965 | /// This function should return true if the type of the \p AA is AANoUnwind |
2966 | static bool classof(const AbstractAttribute *AA) { |
2967 | return (AA->getIdAddr() == &ID); |
2968 | } |
2969 | |
2970 | /// Unique ID (due to the unique address) |
2971 | static const char ID; |
2972 | }; |
2973 | |
2974 | struct AANoSync |
2975 | : public IRAttribute<Attribute::NoSync, |
2976 | StateWrapper<BooleanState, AbstractAttribute>> { |
2977 | AANoSync(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {} |
2978 | |
2979 | /// Returns true if "nosync" is assumed. |
2980 | bool isAssumedNoSync() const { return getAssumed(); } |
2981 | |
2982 | /// Returns true if "nosync" is known. |
2983 | bool isKnownNoSync() const { return getKnown(); } |
2984 | |
2985 | /// Create an abstract attribute view for the position \p IRP. |
2986 | static AANoSync &createForPosition(const IRPosition &IRP, Attributor &A); |
2987 | |
2988 | /// See AbstractAttribute::getName() |
2989 | const std::string getName() const override { return "AANoSync"; } |
2990 | |
2991 | /// See AbstractAttribute::getIdAddr() |
2992 | const char *getIdAddr() const override { return &ID; } |
2993 | |
2994 | /// This function should return true if the type of the \p AA is AANoSync |
2995 | static bool classof(const AbstractAttribute *AA) { |
2996 | return (AA->getIdAddr() == &ID); |
2997 | } |
2998 | |
2999 | /// Unique ID (due to the unique address) |
3000 | static const char ID; |
3001 | }; |
3002 | |
3003 | /// An abstract interface for all nonnull attributes. |
3004 | struct AANonNull |
3005 | : public IRAttribute<Attribute::NonNull, |
3006 | StateWrapper<BooleanState, AbstractAttribute>> { |
3007 | AANonNull(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {} |
3008 | |
3009 | /// Return true if we assume that the underlying value is nonnull. |
3010 | bool isAssumedNonNull() const { return getAssumed(); } |
3011 | |
3012 | /// Return true if we know that underlying value is nonnull. |
3013 | bool isKnownNonNull() const { return getKnown(); } |
3014 | |
3015 | /// Create an abstract attribute view for the position \p IRP. |
3016 | static AANonNull &createForPosition(const IRPosition &IRP, Attributor &A); |
3017 | |
3018 | /// See AbstractAttribute::getName() |
3019 | const std::string getName() const override { return "AANonNull"; } |
3020 | |
3021 | /// See AbstractAttribute::getIdAddr() |
3022 | const char *getIdAddr() const override { return &ID; } |
3023 | |
3024 | /// This function should return true if the type of the \p AA is AANonNull |
3025 | static bool classof(const AbstractAttribute *AA) { |
3026 | return (AA->getIdAddr() == &ID); |
3027 | } |
3028 | |
3029 | /// Unique ID (due to the unique address) |
3030 | static const char ID; |
3031 | }; |
3032 | |
3033 | /// An abstract attribute for norecurse. |
3034 | struct AANoRecurse |
3035 | : public IRAttribute<Attribute::NoRecurse, |
3036 | StateWrapper<BooleanState, AbstractAttribute>> { |
3037 | AANoRecurse(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {} |
3038 | |
3039 | /// Return true if "norecurse" is assumed. |
3040 | bool isAssumedNoRecurse() const { return getAssumed(); } |
3041 | |
3042 | /// Return true if "norecurse" is known. |
3043 | bool isKnownNoRecurse() const { return getKnown(); } |
3044 | |
3045 | /// Create an abstract attribute view for the position \p IRP. |
3046 | static AANoRecurse &createForPosition(const IRPosition &IRP, Attributor &A); |
3047 | |
3048 | /// See AbstractAttribute::getName() |
3049 | const std::string getName() const override { return "AANoRecurse"; } |
3050 | |
3051 | /// See AbstractAttribute::getIdAddr() |
3052 | const char *getIdAddr() const override { return &ID; } |
3053 | |
3054 | /// This function should return true if the type of the \p AA is AANoRecurse |
3055 | static bool classof(const AbstractAttribute *AA) { |
3056 | return (AA->getIdAddr() == &ID); |
3057 | } |
3058 | |
3059 | /// Unique ID (due to the unique address) |
3060 | static const char ID; |
3061 | }; |
3062 | |
3063 | /// An abstract attribute for willreturn. |
3064 | struct AAWillReturn |
3065 | : public IRAttribute<Attribute::WillReturn, |
3066 | StateWrapper<BooleanState, AbstractAttribute>> { |
3067 | AAWillReturn(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {} |
3068 | |
3069 | /// Return true if "willreturn" is assumed. |
3070 | bool isAssumedWillReturn() const { return getAssumed(); } |
3071 | |
3072 | /// Return true if "willreturn" is known. |
3073 | bool isKnownWillReturn() const { return getKnown(); } |
3074 | |
3075 | /// Create an abstract attribute view for the position \p IRP. |
3076 | static AAWillReturn &createForPosition(const IRPosition &IRP, Attributor &A); |
3077 | |
3078 | /// See AbstractAttribute::getName() |
3079 | const std::string getName() const override { return "AAWillReturn"; } |
3080 | |
3081 | /// See AbstractAttribute::getIdAddr() |
3082 | const char *getIdAddr() const override { return &ID; } |
3083 | |
3084 | /// This function should return true if the type of the \p AA is AAWillReturn |
3085 | static bool classof(const AbstractAttribute *AA) { |
3086 | return (AA->getIdAddr() == &ID); |
3087 | } |
3088 | |
3089 | /// Unique ID (due to the unique address) |
3090 | static const char ID; |
3091 | }; |
3092 | |
3093 | /// An abstract attribute for undefined behavior. |
3094 | struct AAUndefinedBehavior |
3095 | : public StateWrapper<BooleanState, AbstractAttribute> { |
3096 | using Base = StateWrapper<BooleanState, AbstractAttribute>; |
3097 | AAUndefinedBehavior(const IRPosition &IRP, Attributor &A) : Base(IRP) {} |
3098 | |
3099 | /// Return true if "undefined behavior" is assumed. |
3100 | bool isAssumedToCauseUB() const { return getAssumed(); } |
3101 | |
3102 | /// Return true if "undefined behavior" is assumed for a specific instruction. |
3103 | virtual bool isAssumedToCauseUB(Instruction *I) const = 0; |
3104 | |
3105 | /// Return true if "undefined behavior" is known. |
3106 | bool isKnownToCauseUB() const { return getKnown(); } |
3107 | |
3108 | /// Return true if "undefined behavior" is known for a specific instruction. |
3109 | virtual bool isKnownToCauseUB(Instruction *I) const = 0; |
3110 | |
3111 | /// Create an abstract attribute view for the position \p IRP. |
3112 | static AAUndefinedBehavior &createForPosition(const IRPosition &IRP, |
3113 | Attributor &A); |
3114 | |
3115 | /// See AbstractAttribute::getName() |
3116 | const std::string getName() const override { return "AAUndefinedBehavior"; } |
3117 | |
3118 | /// See AbstractAttribute::getIdAddr() |
3119 | const char *getIdAddr() const override { return &ID; } |
3120 | |
3121 | /// This function should return true if the type of the \p AA is |
3122 | /// AAUndefineBehavior |
3123 | static bool classof(const AbstractAttribute *AA) { |
3124 | return (AA->getIdAddr() == &ID); |
3125 | } |
3126 | |
3127 | /// Unique ID (due to the unique address) |
3128 | static const char ID; |
3129 | }; |
3130 | |
3131 | /// An abstract interface to determine reachability of point A to B. |
3132 | struct AAReachability : public StateWrapper<BooleanState, AbstractAttribute> { |
3133 | using Base = StateWrapper<BooleanState, AbstractAttribute>; |
3134 | AAReachability(const IRPosition &IRP, Attributor &A) : Base(IRP) {} |
3135 | |
3136 | /// Returns true if 'From' instruction is assumed to reach, 'To' instruction. |
3137 | /// Users should provide two positions they are interested in, and the class |
3138 | /// determines (and caches) reachability. |
3139 | bool isAssumedReachable(Attributor &A, const Instruction &From, |
3140 | const Instruction &To) const { |
3141 | if (!getState().isValidState()) |
3142 | return true; |
3143 | return A.getInfoCache().getPotentiallyReachable(From, To); |
3144 | } |
3145 | |
3146 | /// Returns true if 'From' instruction is known to reach, 'To' instruction. |
3147 | /// Users should provide two positions they are interested in, and the class |
3148 | /// determines (and caches) reachability. |
3149 | bool isKnownReachable(Attributor &A, const Instruction &From, |
3150 | const Instruction &To) const { |
3151 | if (!getState().isValidState()) |
3152 | return false; |
3153 | return A.getInfoCache().getPotentiallyReachable(From, To); |
3154 | } |
3155 | |
3156 | /// Create an abstract attribute view for the position \p IRP. |
3157 | static AAReachability &createForPosition(const IRPosition &IRP, |
3158 | Attributor &A); |
3159 | |
3160 | /// See AbstractAttribute::getName() |
3161 | const std::string getName() const override { return "AAReachability"; } |
3162 | |
3163 | /// See AbstractAttribute::getIdAddr() |
3164 | const char *getIdAddr() const override { return &ID; } |
3165 | |
3166 | /// This function should return true if the type of the \p AA is |
3167 | /// AAReachability |
3168 | static bool classof(const AbstractAttribute *AA) { |
3169 | return (AA->getIdAddr() == &ID); |
3170 | } |
3171 | |
3172 | /// Unique ID (due to the unique address) |
3173 | static const char ID; |
3174 | }; |
3175 | |
3176 | /// An abstract interface for all noalias attributes. |
3177 | struct AANoAlias |
3178 | : public IRAttribute<Attribute::NoAlias, |
3179 | StateWrapper<BooleanState, AbstractAttribute>> { |
3180 | AANoAlias(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {} |
3181 | |
3182 | /// Return true if we assume that the underlying value is alias. |
3183 | bool isAssumedNoAlias() const { return getAssumed(); } |
3184 | |
3185 | /// Return true if we know that underlying value is noalias. |
3186 | bool isKnownNoAlias() const { return getKnown(); } |
3187 | |
3188 | /// Create an abstract attribute view for the position \p IRP. |
3189 | static AANoAlias &createForPosition(const IRPosition &IRP, Attributor &A); |
3190 | |
3191 | /// See AbstractAttribute::getName() |
3192 | const std::string getName() const override { return "AANoAlias"; } |
3193 | |
3194 | /// See AbstractAttribute::getIdAddr() |
3195 | const char *getIdAddr() const override { return &ID; } |
3196 | |
3197 | /// This function should return true if the type of the \p AA is AANoAlias |
3198 | static bool classof(const AbstractAttribute *AA) { |
3199 | return (AA->getIdAddr() == &ID); |
3200 | } |
3201 | |
3202 | /// Unique ID (due to the unique address) |
3203 | static const char ID; |
3204 | }; |
3205 | |
3206 | /// An AbstractAttribute for nofree. |
3207 | struct AANoFree |
3208 | : public IRAttribute<Attribute::NoFree, |
3209 | StateWrapper<BooleanState, AbstractAttribute>> { |
3210 | AANoFree(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {} |
3211 | |
3212 | /// Return true if "nofree" is assumed. |
3213 | bool isAssumedNoFree() const { return getAssumed(); } |
3214 | |
3215 | /// Return true if "nofree" is known. |
3216 | bool isKnownNoFree() const { return getKnown(); } |
3217 | |
3218 | /// Create an abstract attribute view for the position \p IRP. |
3219 | static AANoFree &createForPosition(const IRPosition &IRP, Attributor &A); |
3220 | |
3221 | /// See AbstractAttribute::getName() |
3222 | const std::string getName() const override { return "AANoFree"; } |
3223 | |
3224 | /// See AbstractAttribute::getIdAddr() |
3225 | const char *getIdAddr() const override { return &ID; } |
3226 | |
3227 | /// This function should return true if the type of the \p AA is AANoFree |
3228 | static bool classof(const AbstractAttribute *AA) { |
3229 | return (AA->getIdAddr() == &ID); |
3230 | } |
3231 | |
3232 | /// Unique ID (due to the unique address) |
3233 | static const char ID; |
3234 | }; |
3235 | |
3236 | /// An AbstractAttribute for noreturn. |
3237 | struct AANoReturn |
3238 | : public IRAttribute<Attribute::NoReturn, |
3239 | StateWrapper<BooleanState, AbstractAttribute>> { |
3240 | AANoReturn(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {} |
3241 | |
3242 | /// Return true if the underlying object is assumed to never return. |
3243 | bool isAssumedNoReturn() const { return getAssumed(); } |
3244 | |
3245 | /// Return true if the underlying object is known to never return. |
3246 | bool isKnownNoReturn() const { return getKnown(); } |
3247 | |
3248 | /// Create an abstract attribute view for the position \p IRP. |
3249 | static AANoReturn &createForPosition(const IRPosition &IRP, Attributor &A); |
3250 | |
3251 | /// See AbstractAttribute::getName() |
3252 | const std::string getName() const override { return "AANoReturn"; } |
3253 | |
3254 | /// See AbstractAttribute::getIdAddr() |
3255 | const char *getIdAddr() const override { return &ID; } |
3256 | |
3257 | /// This function should return true if the type of the \p AA is AANoReturn |
3258 | static bool classof(const AbstractAttribute *AA) { |
3259 | return (AA->getIdAddr() == &ID); |
3260 | } |
3261 | |
3262 | /// Unique ID (due to the unique address) |
3263 | static const char ID; |
3264 | }; |
3265 | |
3266 | /// An abstract interface for liveness abstract attribute. |
3267 | struct AAIsDead |
3268 | : public StateWrapper<BitIntegerState<uint8_t, 3, 0>, AbstractAttribute> { |
3269 | using Base = StateWrapper<BitIntegerState<uint8_t, 3, 0>, AbstractAttribute>; |
3270 | AAIsDead(const IRPosition &IRP, Attributor &A) : Base(IRP) {} |
3271 | |
3272 | /// State encoding bits. A set bit in the state means the property holds. |
3273 | enum { |
3274 | HAS_NO_EFFECT = 1 << 0, |
3275 | IS_REMOVABLE = 1 << 1, |
3276 | |
3277 | IS_DEAD = HAS_NO_EFFECT | IS_REMOVABLE, |
3278 | }; |
3279 | static_assert(IS_DEAD == getBestState(), "Unexpected BEST_STATE value"); |
3280 | |
3281 | protected: |
3282 | /// The query functions are protected such that other attributes need to go |
3283 | /// through the Attributor interfaces: `Attributor::isAssumedDead(...)` |
3284 | |
3285 | /// Returns true if the underlying value is assumed dead. |
3286 | virtual bool isAssumedDead() const = 0; |
3287 | |
3288 | /// Returns true if the underlying value is known dead. |
3289 | virtual bool isKnownDead() const = 0; |
3290 | |
3291 | /// Returns true if \p BB is assumed dead. |
3292 | virtual bool isAssumedDead(const BasicBlock *BB) const = 0; |
3293 | |
3294 | /// Returns true if \p BB is known dead. |
3295 | virtual bool isKnownDead(const BasicBlock *BB) const = 0; |
3296 | |
3297 | /// Returns true if \p I is assumed dead. |
3298 | virtual bool isAssumedDead(const Instruction *I) const = 0; |
3299 | |
3300 | /// Returns true if \p I is known dead. |
3301 | virtual bool isKnownDead(const Instruction *I) const = 0; |
3302 | |
3303 | /// This method is used to check if at least one instruction in a collection |
3304 | /// of instructions is live. |
3305 | template <typename T> bool isLiveInstSet(T begin, T end) const { |
3306 | for (const auto &I : llvm::make_range(begin, end)) { |
3307 | assert(I->getFunction() == getIRPosition().getAssociatedFunction() &&(static_cast <bool> (I->getFunction() == getIRPosition ().getAssociatedFunction() && "Instruction must be in the same anchor scope function." ) ? void (0) : __assert_fail ("I->getFunction() == getIRPosition().getAssociatedFunction() && \"Instruction must be in the same anchor scope function.\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/include/llvm/Transforms/IPO/Attributor.h" , 3308, __extension__ __PRETTY_FUNCTION__)) |
3308 | "Instruction must be in the same anchor scope function.")(static_cast <bool> (I->getFunction() == getIRPosition ().getAssociatedFunction() && "Instruction must be in the same anchor scope function." ) ? void (0) : __assert_fail ("I->getFunction() == getIRPosition().getAssociatedFunction() && \"Instruction must be in the same anchor scope function.\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/include/llvm/Transforms/IPO/Attributor.h" , 3308, __extension__ __PRETTY_FUNCTION__)); |
3309 | |
3310 | if (!isAssumedDead(I)) |
3311 | return true; |
3312 | } |
3313 | |
3314 | return false; |
3315 | } |
3316 | |
3317 | public: |
3318 | /// Create an abstract attribute view for the position \p IRP. |
3319 | static AAIsDead &createForPosition(const IRPosition &IRP, Attributor &A); |
3320 | |
3321 | /// Determine if \p F might catch asynchronous exceptions. |
3322 | static bool mayCatchAsynchronousExceptions(const Function &F) { |
3323 | return F.hasPersonalityFn() && !canSimplifyInvokeNoUnwind(&F); |
3324 | } |
3325 | |
3326 | /// Return if the edge from \p From BB to \p To BB is assumed dead. |
3327 | /// This is specifically useful in AAReachability. |
3328 | virtual bool isEdgeDead(const BasicBlock *From, const BasicBlock *To) const { |
3329 | return false; |
3330 | } |
3331 | |
3332 | /// See AbstractAttribute::getName() |
3333 | const std::string getName() const override { return "AAIsDead"; } |
3334 | |
3335 | /// See AbstractAttribute::getIdAddr() |
3336 | const char *getIdAddr() const override { return &ID; } |
3337 | |
3338 | /// This function should return true if the type of the \p AA is AAIsDead |
3339 | static bool classof(const AbstractAttribute *AA) { |
3340 | return (AA->getIdAddr() == &ID); |
3341 | } |
3342 | |
3343 | /// Unique ID (due to the unique address) |
3344 | static const char ID; |
3345 | |
3346 | friend struct Attributor; |
3347 | }; |
3348 | |
3349 | /// State for dereferenceable attribute |
3350 | struct DerefState : AbstractState { |
3351 | |
3352 | static DerefState getBestState() { return DerefState(); } |
3353 | static DerefState getBestState(const DerefState &) { return getBestState(); } |
3354 | |
3355 | /// Return the worst possible representable state. |
3356 | static DerefState getWorstState() { |
3357 | DerefState DS; |
3358 | DS.indicatePessimisticFixpoint(); |
3359 | return DS; |
3360 | } |
3361 | static DerefState getWorstState(const DerefState &) { |
3362 | return getWorstState(); |
3363 | } |
3364 | |
3365 | /// State representing for dereferenceable bytes. |
3366 | IncIntegerState<> DerefBytesState; |
3367 | |
3368 | /// Map representing for accessed memory offsets and sizes. |
3369 | /// A key is Offset and a value is size. |
3370 | /// If there is a load/store instruction something like, |
3371 | /// p[offset] = v; |
3372 | /// (offset, sizeof(v)) will be inserted to this map. |
3373 | /// std::map is used because we want to iterate keys in ascending order. |
3374 | std::map<int64_t, uint64_t> AccessedBytesMap; |
3375 | |
3376 | /// Helper function to calculate dereferenceable bytes from current known |
3377 | /// bytes and accessed bytes. |
3378 | /// |
3379 | /// int f(int *A){ |
3380 | /// *A = 0; |
3381 | /// *(A+2) = 2; |
3382 | /// *(A+1) = 1; |
3383 | /// *(A+10) = 10; |
3384 | /// } |
3385 | /// ``` |
3386 | /// In that case, AccessedBytesMap is `{0:4, 4:4, 8:4, 40:4}`. |
3387 | /// AccessedBytesMap is std::map so it is iterated in accending order on |
3388 | /// key(Offset). So KnownBytes will be updated like this: |
3389 | /// |
3390 | /// |Access | KnownBytes |
3391 | /// |(0, 4)| 0 -> 4 |
3392 | /// |(4, 4)| 4 -> 8 |
3393 | /// |(8, 4)| 8 -> 12 |
3394 | /// |(40, 4) | 12 (break) |
3395 | void computeKnownDerefBytesFromAccessedMap() { |
3396 | int64_t KnownBytes = DerefBytesState.getKnown(); |
3397 | for (auto &Access : AccessedBytesMap) { |
3398 | if (KnownBytes < Access.first) |
3399 | break; |
3400 | KnownBytes = std::max(KnownBytes, Access.first + (int64_t)Access.second); |
3401 | } |
3402 | |
3403 | DerefBytesState.takeKnownMaximum(KnownBytes); |
3404 | } |
3405 | |
3406 | /// State representing that whether the value is globaly dereferenceable. |
3407 | BooleanState GlobalState; |
3408 | |
3409 | /// See AbstractState::isValidState() |
3410 | bool isValidState() const override { return DerefBytesState.isValidState(); } |
3411 | |
3412 | /// See AbstractState::isAtFixpoint() |
3413 | bool isAtFixpoint() const override { |
3414 | return !isValidState() || |
3415 | (DerefBytesState.isAtFixpoint() && GlobalState.isAtFixpoint()); |
3416 | } |
3417 | |
3418 | /// See AbstractState::indicateOptimisticFixpoint(...) |
3419 | ChangeStatus indicateOptimisticFixpoint() override { |
3420 | DerefBytesState.indicateOptimisticFixpoint(); |
3421 | GlobalState.indicateOptimisticFixpoint(); |
3422 | return ChangeStatus::UNCHANGED; |
3423 | } |
3424 | |
3425 | /// See AbstractState::indicatePessimisticFixpoint(...) |
3426 | ChangeStatus indicatePessimisticFixpoint() override { |
3427 | DerefBytesState.indicatePessimisticFixpoint(); |
3428 | GlobalState.indicatePessimisticFixpoint(); |
3429 | return ChangeStatus::CHANGED; |
3430 | } |
3431 | |
3432 | /// Update known dereferenceable bytes. |
3433 | void takeKnownDerefBytesMaximum(uint64_t Bytes) { |
3434 | DerefBytesState.takeKnownMaximum(Bytes); |
3435 | |
3436 | // Known bytes might increase. |
3437 | computeKnownDerefBytesFromAccessedMap(); |
3438 | } |
3439 | |
3440 | /// Update assumed dereferenceable bytes. |
3441 | void takeAssumedDerefBytesMinimum(uint64_t Bytes) { |
3442 | DerefBytesState.takeAssumedMinimum(Bytes); |
3443 | } |
3444 | |
3445 | /// Add accessed bytes to the map. |
3446 | void addAccessedBytes(int64_t Offset, uint64_t Size) { |
3447 | uint64_t &AccessedBytes = AccessedBytesMap[Offset]; |
3448 | AccessedBytes = std::max(AccessedBytes, Size); |
3449 | |
3450 | // Known bytes might increase. |
3451 | computeKnownDerefBytesFromAccessedMap(); |
3452 | } |
3453 | |
3454 | /// Equality for DerefState. |
3455 | bool operator==(const DerefState &R) const { |
3456 | return this->DerefBytesState == R.DerefBytesState && |
3457 | this->GlobalState == R.GlobalState; |
3458 | } |
3459 | |
3460 | /// Inequality for DerefState. |
3461 | bool operator!=(const DerefState &R) const { return !(*this == R); } |
3462 | |
3463 | /// See IntegerStateBase::operator^= |
3464 | DerefState operator^=(const DerefState &R) { |
3465 | DerefBytesState ^= R.DerefBytesState; |
3466 | GlobalState ^= R.GlobalState; |
3467 | return *this; |
3468 | } |
3469 | |
3470 | /// See IntegerStateBase::operator+= |
3471 | DerefState operator+=(const DerefState &R) { |
3472 | DerefBytesState += R.DerefBytesState; |
3473 | GlobalState += R.GlobalState; |
3474 | return *this; |
3475 | } |
3476 | |
3477 | /// See IntegerStateBase::operator&= |
3478 | DerefState operator&=(const DerefState &R) { |
3479 | DerefBytesState &= R.DerefBytesState; |
3480 | GlobalState &= R.GlobalState; |
3481 | return *this; |
3482 | } |
3483 | |
3484 | /// See IntegerStateBase::operator|= |
3485 | DerefState operator|=(const DerefState &R) { |
3486 | DerefBytesState |= R.DerefBytesState; |
3487 | GlobalState |= R.GlobalState; |
3488 | return *this; |
3489 | } |
3490 | |
3491 | protected: |
3492 | const AANonNull *NonNullAA = nullptr; |
3493 | }; |
3494 | |
3495 | /// An abstract interface for all dereferenceable attribute. |
3496 | struct AADereferenceable |
3497 | : public IRAttribute<Attribute::Dereferenceable, |
3498 | StateWrapper<DerefState, AbstractAttribute>> { |
3499 | AADereferenceable(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {} |
3500 | |
3501 | /// Return true if we assume that the underlying value is nonnull. |
3502 | bool isAssumedNonNull() const { |
3503 | return NonNullAA && NonNullAA->isAssumedNonNull(); |
3504 | } |
3505 | |
3506 | /// Return true if we know that the underlying value is nonnull. |
3507 | bool isKnownNonNull() const { |
3508 | return NonNullAA && NonNullAA->isKnownNonNull(); |
3509 | } |
3510 | |
3511 | /// Return true if we assume that underlying value is |
3512 | /// dereferenceable(_or_null) globally. |
3513 | bool isAssumedGlobal() const { return GlobalState.getAssumed(); } |
3514 | |
3515 | /// Return true if we know that underlying value is |
3516 | /// dereferenceable(_or_null) globally. |
3517 | bool isKnownGlobal() const { return GlobalState.getKnown(); } |
3518 | |
3519 | /// Return assumed dereferenceable bytes. |
3520 | uint32_t getAssumedDereferenceableBytes() const { |
3521 | return DerefBytesState.getAssumed(); |
3522 | } |
3523 | |
3524 | /// Return known dereferenceable bytes. |
3525 | uint32_t getKnownDereferenceableBytes() const { |
3526 | return DerefBytesState.getKnown(); |
3527 | } |
3528 | |
3529 | /// Create an abstract attribute view for the position \p IRP. |
3530 | static AADereferenceable &createForPosition(const IRPosition &IRP, |
3531 | Attributor &A); |
3532 | |
3533 | /// See AbstractAttribute::getName() |
3534 | const std::string getName() const override { return "AADereferenceable"; } |
3535 | |
3536 | /// See AbstractAttribute::getIdAddr() |
3537 | const char *getIdAddr() const override { return &ID; } |
3538 | |
3539 | /// This function should return true if the type of the \p AA is |
3540 | /// AADereferenceable |
3541 | static bool classof(const AbstractAttribute *AA) { |
3542 | return (AA->getIdAddr() == &ID); |
3543 | } |
3544 | |
3545 | /// Unique ID (due to the unique address) |
3546 | static const char ID; |
3547 | }; |
3548 | |
3549 | using AAAlignmentStateType = |
3550 | IncIntegerState<uint64_t, Value::MaximumAlignment, 1>; |
3551 | /// An abstract interface for all align attributes. |
3552 | struct AAAlign : public IRAttribute< |
3553 | Attribute::Alignment, |
3554 | StateWrapper<AAAlignmentStateType, AbstractAttribute>> { |
3555 | AAAlign(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {} |
3556 | |
3557 | /// Return assumed alignment. |
3558 | uint64_t getAssumedAlign() const { return getAssumed(); } |
3559 | |
3560 | /// Return known alignment. |
3561 | uint64_t getKnownAlign() const { return getKnown(); } |
3562 | |
3563 | /// See AbstractAttribute::getName() |
3564 | const std::string getName() const override { return "AAAlign"; } |
3565 | |
3566 | /// See AbstractAttribute::getIdAddr() |
3567 | const char *getIdAddr() const override { return &ID; } |
3568 | |
3569 | /// This function should return true if the type of the \p AA is AAAlign |
3570 | static bool classof(const AbstractAttribute *AA) { |
3571 | return (AA->getIdAddr() == &ID); |
3572 | } |
3573 | |
3574 | /// Create an abstract attribute view for the position \p IRP. |
3575 | static AAAlign &createForPosition(const IRPosition &IRP, Attributor &A); |
3576 | |
3577 | /// Unique ID (due to the unique address) |
3578 | static const char ID; |
3579 | }; |
3580 | |
3581 | /// An abstract interface for all nocapture attributes. |
3582 | struct AANoCapture |
3583 | : public IRAttribute< |
3584 | Attribute::NoCapture, |
3585 | StateWrapper<BitIntegerState<uint16_t, 7, 0>, AbstractAttribute>> { |
3586 | AANoCapture(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {} |
3587 | |
3588 | /// State encoding bits. A set bit in the state means the property holds. |
3589 | /// NO_CAPTURE is the best possible state, 0 the worst possible state. |
3590 | enum { |
3591 | NOT_CAPTURED_IN_MEM = 1 << 0, |
3592 | NOT_CAPTURED_IN_INT = 1 << 1, |
3593 | NOT_CAPTURED_IN_RET = 1 << 2, |
3594 | |
3595 | /// If we do not capture the value in memory or through integers we can only |
3596 | /// communicate it back as a derived pointer. |
3597 | NO_CAPTURE_MAYBE_RETURNED = NOT_CAPTURED_IN_MEM | NOT_CAPTURED_IN_INT, |
3598 | |
3599 | /// If we do not capture the value in memory, through integers, or as a |
3600 | /// derived pointer we know it is not captured. |
3601 | NO_CAPTURE = |
3602 | NOT_CAPTURED_IN_MEM | NOT_CAPTURED_IN_INT | NOT_CAPTURED_IN_RET, |
3603 | }; |
3604 | |
3605 | /// Return true if we know that the underlying value is not captured in its |
3606 | /// respective scope. |
3607 | bool isKnownNoCapture() const { return isKnown(NO_CAPTURE); } |
3608 | |
3609 | /// Return true if we assume that the underlying value is not captured in its |
3610 | /// respective scope. |
3611 | bool isAssumedNoCapture() const { return isAssumed(NO_CAPTURE); } |
3612 | |
3613 | /// Return true if we know that the underlying value is not captured in its |
3614 | /// respective scope but we allow it to escape through a "return". |
3615 | bool isKnownNoCaptureMaybeReturned() const { |
3616 | return isKnown(NO_CAPTURE_MAYBE_RETURNED); |
3617 | } |
3618 | |
3619 | /// Return true if we assume that the underlying value is not captured in its |
3620 | /// respective scope but we allow it to escape through a "return". |
3621 | bool isAssumedNoCaptureMaybeReturned() const { |
3622 | return isAssumed(NO_CAPTURE_MAYBE_RETURNED); |
3623 | } |
3624 | |
3625 | /// Create an abstract attribute view for the position \p IRP. |
3626 | static AANoCapture &createForPosition(const IRPosition &IRP, Attributor &A); |
3627 | |
3628 | /// See AbstractAttribute::getName() |
3629 | const std::string getName() const override { return "AANoCapture"; } |
3630 | |
3631 | /// See AbstractAttribute::getIdAddr() |
3632 | const char *getIdAddr() const override { return &ID; } |
3633 | |
3634 | /// This function should return true if the type of the \p AA is AANoCapture |
3635 | static bool classof(const AbstractAttribute *AA) { |
3636 | return (AA->getIdAddr() == &ID); |
3637 | } |
3638 | |
3639 | /// Unique ID (due to the unique address) |
3640 | static const char ID; |
3641 | }; |
3642 | |
3643 | struct ValueSimplifyStateType : public AbstractState { |
3644 | |
3645 | ValueSimplifyStateType(Type *Ty) : Ty(Ty) {} |
3646 | |
3647 | static ValueSimplifyStateType getBestState(Type *Ty) { |
3648 | return ValueSimplifyStateType(Ty); |
3649 | } |
3650 | static ValueSimplifyStateType getBestState(const ValueSimplifyStateType &VS) { |
3651 | return getBestState(VS.Ty); |
3652 | } |
3653 | |
3654 | /// Return the worst possible representable state. |
3655 | static ValueSimplifyStateType getWorstState(Type *Ty) { |
3656 | ValueSimplifyStateType DS(Ty); |
3657 | DS.indicatePessimisticFixpoint(); |
3658 | return DS; |
3659 | } |
3660 | static ValueSimplifyStateType |
3661 | getWorstState(const ValueSimplifyStateType &VS) { |
3662 | return getWorstState(VS.Ty); |
3663 | } |
3664 | |
3665 | /// See AbstractState::isValidState(...) |
3666 | bool isValidState() const override { return BS.isValidState(); } |
3667 | |
3668 | /// See AbstractState::isAtFixpoint(...) |
3669 | bool isAtFixpoint() const override { return BS.isAtFixpoint(); } |
3670 | |
3671 | /// Return the assumed state encoding. |
3672 | ValueSimplifyStateType getAssumed() { return *this; } |
3673 | const ValueSimplifyStateType &getAssumed() const { return *this; } |
3674 | |
3675 | /// See AbstractState::indicatePessimisticFixpoint(...) |
3676 | ChangeStatus indicatePessimisticFixpoint() override { |
3677 | return BS.indicatePessimisticFixpoint(); |
3678 | } |
3679 | |
3680 | /// See AbstractState::indicateOptimisticFixpoint(...) |
3681 | ChangeStatus indicateOptimisticFixpoint() override { |
3682 | return BS.indicateOptimisticFixpoint(); |
3683 | } |
3684 | |
3685 | /// "Clamp" this state with \p PVS. |
3686 | ValueSimplifyStateType operator^=(const ValueSimplifyStateType &VS) { |
3687 | BS ^= VS.BS; |
3688 | unionAssumed(VS.SimplifiedAssociatedValue); |
3689 | return *this; |
3690 | } |
3691 | |
3692 | bool operator==(const ValueSimplifyStateType &RHS) const { |
3693 | if (isValidState() != RHS.isValidState()) |
3694 | return false; |
3695 | if (!isValidState() && !RHS.isValidState()) |
3696 | return true; |
3697 | return SimplifiedAssociatedValue == RHS.SimplifiedAssociatedValue; |
3698 | } |
3699 | |
3700 | protected: |
3701 | /// The type of the original value. |
3702 | Type *Ty; |
3703 | |
3704 | /// Merge \p Other into the currently assumed simplified value |
3705 | bool unionAssumed(Optional<Value *> Other); |
3706 | |
3707 | /// Helper to track validity and fixpoint |
3708 | BooleanState BS; |
3709 | |
3710 | /// An assumed simplified value. Initially, it is set to Optional::None, which |
3711 | /// means that the value is not clear under current assumption. If in the |
3712 | /// pessimistic state, getAssumedSimplifiedValue doesn't return this value but |
3713 | /// returns orignal associated value. |
3714 | Optional<Value *> SimplifiedAssociatedValue; |
3715 | }; |
3716 | |
3717 | /// An abstract interface for value simplify abstract attribute. |
3718 | struct AAValueSimplify |
3719 | : public StateWrapper<ValueSimplifyStateType, AbstractAttribute, Type *> { |
3720 | using Base = StateWrapper<ValueSimplifyStateType, AbstractAttribute, Type *>; |
3721 | AAValueSimplify(const IRPosition &IRP, Attributor &A) |
3722 | : Base(IRP, IRP.getAssociatedType()) {} |
3723 | |
3724 | /// Create an abstract attribute view for the position \p IRP. |
3725 | static AAValueSimplify &createForPosition(const IRPosition &IRP, |
3726 | Attributor &A); |
3727 | |
3728 | /// See AbstractAttribute::getName() |
3729 | const std::string getName() const override { return "AAValueSimplify"; } |
3730 | |
3731 | /// See AbstractAttribute::getIdAddr() |
3732 | const char *getIdAddr() const override { return &ID; } |
3733 | |
3734 | /// This function should return true if the type of the \p AA is |
3735 | /// AAValueSimplify |
3736 | static bool classof(const AbstractAttribute *AA) { |
3737 | return (AA->getIdAddr() == &ID); |
3738 | } |
3739 | |
3740 | /// Unique ID (due to the unique address) |
3741 | static const char ID; |
3742 | |
3743 | private: |
3744 | /// Return an assumed simplified value if a single candidate is found. If |
3745 | /// there cannot be one, return original value. If it is not clear yet, return |
3746 | /// the Optional::NoneType. |
3747 | /// |
3748 | /// Use `Attributor::getAssumedSimplified` for value simplification. |
3749 | virtual Optional<Value *> getAssumedSimplifiedValue(Attributor &A) const = 0; |
3750 | |
3751 | friend struct Attributor; |
3752 | }; |
3753 | |
3754 | struct AAHeapToStack : public StateWrapper<BooleanState, AbstractAttribute> { |
3755 | using Base = StateWrapper<BooleanState, AbstractAttribute>; |
3756 | AAHeapToStack(const IRPosition &IRP, Attributor &A) : Base(IRP) {} |
3757 | |
3758 | /// Returns true if HeapToStack conversion is assumed to be possible. |
3759 | virtual bool isAssumedHeapToStack(const CallBase &CB) const = 0; |
3760 | |
3761 | /// Returns true if HeapToStack conversion is assumed and the CB is a |
3762 | /// callsite to a free operation to be removed. |
3763 | virtual bool isAssumedHeapToStackRemovedFree(CallBase &CB) const = 0; |
3764 | |
3765 | /// Create an abstract attribute view for the position \p IRP. |
3766 | static AAHeapToStack &createForPosition(const IRPosition &IRP, Attributor &A); |
3767 | |
3768 | /// See AbstractAttribute::getName() |
3769 | const std::string getName() const override { return "AAHeapToStack"; } |
3770 | |
3771 | /// See AbstractAttribute::getIdAddr() |
3772 | const char *getIdAddr() const override { return &ID; } |
3773 | |
3774 | /// This function should return true if the type of the \p AA is AAHeapToStack |
3775 | static bool classof(const AbstractAttribute *AA) { |
3776 | return (AA->getIdAddr() == &ID); |
3777 | } |
3778 | |
3779 | /// Unique ID (due to the unique address) |
3780 | static const char ID; |
3781 | }; |
3782 | |
3783 | /// An abstract interface for privatizability. |
3784 | /// |
3785 | /// A pointer is privatizable if it can be replaced by a new, private one. |
3786 | /// Privatizing pointer reduces the use count, interaction between unrelated |
3787 | /// code parts. |
3788 | /// |
3789 | /// In order for a pointer to be privatizable its value cannot be observed |
3790 | /// (=nocapture), it is (for now) not written (=readonly & noalias), we know |
3791 | /// what values are necessary to make the private copy look like the original |
3792 | /// one, and the values we need can be loaded (=dereferenceable). |
3793 | struct AAPrivatizablePtr |
3794 | : public StateWrapper<BooleanState, AbstractAttribute> { |
3795 | using Base = StateWrapper<BooleanState, AbstractAttribute>; |
3796 | AAPrivatizablePtr(const IRPosition &IRP, Attributor &A) : Base(IRP) {} |
3797 | |
3798 | /// Returns true if pointer privatization is assumed to be possible. |
3799 | bool isAssumedPrivatizablePtr() const { return getAssumed(); } |
3800 | |
3801 | /// Returns true if pointer privatization is known to be possible. |
3802 | bool isKnownPrivatizablePtr() const { return getKnown(); } |
3803 | |
3804 | /// Return the type we can choose for a private copy of the underlying |
3805 | /// value. None means it is not clear yet, nullptr means there is none. |
3806 | virtual Optional<Type *> getPrivatizableType() const = 0; |
3807 | |
3808 | /// Create an abstract attribute view for the position \p IRP. |
3809 | static AAPrivatizablePtr &createForPosition(const IRPosition &IRP, |
3810 | Attributor &A); |
3811 | |
3812 | /// See AbstractAttribute::getName() |
3813 | const std::string getName() const override { return "AAPrivatizablePtr"; } |
3814 | |
3815 | /// See AbstractAttribute::getIdAddr() |
3816 | const char *getIdAddr() const override { return &ID; } |
3817 | |
3818 | /// This function should return true if the type of the \p AA is |
3819 | /// AAPricatizablePtr |
3820 | static bool classof(const AbstractAttribute *AA) { |
3821 | return (AA->getIdAddr() == &ID); |
3822 | } |
3823 | |
3824 | /// Unique ID (due to the unique address) |
3825 | static const char ID; |
3826 | }; |
3827 | |
3828 | /// An abstract interface for memory access kind related attributes |
3829 | /// (readnone/readonly/writeonly). |
3830 | struct AAMemoryBehavior |
3831 | : public IRAttribute< |
3832 | Attribute::ReadNone, |
3833 | StateWrapper<BitIntegerState<uint8_t, 3>, AbstractAttribute>> { |
3834 | AAMemoryBehavior(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {} |
3835 | |
3836 | /// State encoding bits. A set bit in the state means the property holds. |
3837 | /// BEST_STATE is the best possible state, 0 the worst possible state. |
3838 | enum { |
3839 | NO_READS = 1 << 0, |
3840 | NO_WRITES = 1 << 1, |
3841 | NO_ACCESSES = NO_READS | NO_WRITES, |
3842 | |
3843 | BEST_STATE = NO_ACCESSES, |
3844 | }; |
3845 | static_assert(BEST_STATE == getBestState(), "Unexpected BEST_STATE value"); |
3846 | |
3847 | /// Return true if we know that the underlying value is not read or accessed |
3848 | /// in its respective scope. |
3849 | bool isKnownReadNone() const { return isKnown(NO_ACCESSES); } |
3850 | |
3851 | /// Return true if we assume that the underlying value is not read or accessed |
3852 | /// in its respective scope. |
3853 | bool isAssumedReadNone() const { return isAssumed(NO_ACCESSES); } |
3854 | |
3855 | /// Return true if we know that the underlying value is not accessed |
3856 | /// (=written) in its respective scope. |
3857 | bool isKnownReadOnly() const { return isKnown(NO_WRITES); } |
3858 | |
3859 | /// Return true if we assume that the underlying value is not accessed |
3860 | /// (=written) in its respective scope. |
3861 | bool isAssumedReadOnly() const { return isAssumed(NO_WRITES); } |
3862 | |
3863 | /// Return true if we know that the underlying value is not read in its |
3864 | /// respective scope. |
3865 | bool isKnownWriteOnly() const { return isKnown(NO_READS); } |
3866 | |
3867 | /// Return true if we assume that the underlying value is not read in its |
3868 | /// respective scope. |
3869 | bool isAssumedWriteOnly() const { return isAssumed(NO_READS); } |
3870 | |
3871 | /// Create an abstract attribute view for the position \p IRP. |
3872 | static AAMemoryBehavior &createForPosition(const IRPosition &IRP, |
3873 | Attributor &A); |
3874 | |
3875 | /// See AbstractAttribute::getName() |
3876 | const std::string getName() const override { return "AAMemoryBehavior"; } |
3877 | |
3878 | /// See AbstractAttribute::getIdAddr() |
3879 | const char *getIdAddr() const override { return &ID; } |
3880 | |
3881 | /// This function should return true if the type of the \p AA is |
3882 | /// AAMemoryBehavior |
3883 | static bool classof(const AbstractAttribute *AA) { |
3884 | return (AA->getIdAddr() == &ID); |
3885 | } |
3886 | |
3887 | /// Unique ID (due to the unique address) |
3888 | static const char ID; |
3889 | }; |
3890 | |
3891 | /// An abstract interface for all memory location attributes |
3892 | /// (readnone/argmemonly/inaccessiblememonly/inaccessibleorargmemonly). |
3893 | struct AAMemoryLocation |
3894 | : public IRAttribute< |
3895 | Attribute::ReadNone, |
3896 | StateWrapper<BitIntegerState<uint32_t, 511>, AbstractAttribute>> { |
3897 | using MemoryLocationsKind = StateType::base_t; |
3898 | |
3899 | AAMemoryLocation(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {} |
3900 | |
3901 | /// Encoding of different locations that could be accessed by a memory |
3902 | /// access. |
3903 | enum { |
3904 | ALL_LOCATIONS = 0, |
3905 | NO_LOCAL_MEM = 1 << 0, |
3906 | NO_CONST_MEM = 1 << 1, |
3907 | NO_GLOBAL_INTERNAL_MEM = 1 << 2, |
3908 | NO_GLOBAL_EXTERNAL_MEM = 1 << 3, |
3909 | NO_GLOBAL_MEM = NO_GLOBAL_INTERNAL_MEM | NO_GLOBAL_EXTERNAL_MEM, |
3910 | NO_ARGUMENT_MEM = 1 << 4, |
3911 | NO_INACCESSIBLE_MEM = 1 << 5, |
3912 | NO_MALLOCED_MEM = 1 << 6, |
3913 | NO_UNKOWN_MEM = 1 << 7, |
3914 | NO_LOCATIONS = NO_LOCAL_MEM | NO_CONST_MEM | NO_GLOBAL_INTERNAL_MEM | |
3915 | NO_GLOBAL_EXTERNAL_MEM | NO_ARGUMENT_MEM | |
3916 | NO_INACCESSIBLE_MEM | NO_MALLOCED_MEM | NO_UNKOWN_MEM, |
3917 | |
3918 | // Helper bit to track if we gave up or not. |
3919 | VALID_STATE = NO_LOCATIONS + 1, |
3920 | |
3921 | BEST_STATE = NO_LOCATIONS | VALID_STATE, |
3922 | }; |
3923 | static_assert(BEST_STATE == getBestState(), "Unexpected BEST_STATE value"); |
3924 | |
3925 | /// Return true if we know that the associated functions has no observable |
3926 | /// accesses. |
3927 | bool isKnownReadNone() const { return isKnown(NO_LOCATIONS); } |
3928 | |
3929 | /// Return true if we assume that the associated functions has no observable |
3930 | /// accesses. |
3931 | bool isAssumedReadNone() const { |
3932 | return isAssumed(NO_LOCATIONS) || isAssumedStackOnly(); |
3933 | } |
3934 | |
3935 | /// Return true if we know that the associated functions has at most |
3936 | /// local/stack accesses. |
3937 | bool isKnowStackOnly() const { |
3938 | return isKnown(inverseLocation(NO_LOCAL_MEM, true, true)); |
3939 | } |
3940 | |
3941 | /// Return true if we assume that the associated functions has at most |
3942 | /// local/stack accesses. |
3943 | bool isAssumedStackOnly() const { |
3944 | return isAssumed(inverseLocation(NO_LOCAL_MEM, true, true)); |
3945 | } |
3946 | |
3947 | /// Return true if we know that the underlying value will only access |
3948 | /// inaccesible memory only (see Attribute::InaccessibleMemOnly). |
3949 | bool isKnownInaccessibleMemOnly() const { |
3950 | return isKnown(inverseLocation(NO_INACCESSIBLE_MEM, true, true)); |
3951 | } |
3952 | |
3953 | /// Return true if we assume that the underlying value will only access |
3954 | /// inaccesible memory only (see Attribute::InaccessibleMemOnly). |
3955 | bool isAssumedInaccessibleMemOnly() const { |
3956 | return isAssumed(inverseLocation(NO_INACCESSIBLE_MEM, true, true)); |
3957 | } |
3958 | |
3959 | /// Return true if we know that the underlying value will only access |
3960 | /// argument pointees (see Attribute::ArgMemOnly). |
3961 | bool isKnownArgMemOnly() const { |
3962 | return isKnown(inverseLocation(NO_ARGUMENT_MEM, true, true)); |
3963 | } |
3964 | |
3965 | /// Return true if we assume that the underlying value will only access |
3966 | /// argument pointees (see Attribute::ArgMemOnly). |
3967 | bool isAssumedArgMemOnly() const { |
3968 | return isAssumed(inverseLocation(NO_ARGUMENT_MEM, true, true)); |
3969 | } |
3970 | |
3971 | /// Return true if we know that the underlying value will only access |
3972 | /// inaccesible memory or argument pointees (see |
3973 | /// Attribute::InaccessibleOrArgMemOnly). |
3974 | bool isKnownInaccessibleOrArgMemOnly() const { |
3975 | return isKnown( |
3976 | inverseLocation(NO_INACCESSIBLE_MEM | NO_ARGUMENT_MEM, true, true)); |
3977 | } |
3978 | |
3979 | /// Return true if we assume that the underlying value will only access |
3980 | /// inaccesible memory or argument pointees (see |
3981 | /// Attribute::InaccessibleOrArgMemOnly). |
3982 | bool isAssumedInaccessibleOrArgMemOnly() const { |
3983 | return isAssumed( |
3984 | inverseLocation(NO_INACCESSIBLE_MEM | NO_ARGUMENT_MEM, true, true)); |
3985 | } |
3986 | |
3987 | /// Return true if the underlying value may access memory through arguement |
3988 | /// pointers of the associated function, if any. |
3989 | bool mayAccessArgMem() const { return !isAssumed(NO_ARGUMENT_MEM); } |
3990 | |
3991 | /// Return true if only the memory locations specififed by \p MLK are assumed |
3992 | /// to be accessed by the associated function. |
3993 | bool isAssumedSpecifiedMemOnly(MemoryLocationsKind MLK) const { |
3994 | return isAssumed(MLK); |
3995 | } |
3996 | |
3997 | /// Return the locations that are assumed to be not accessed by the associated |
3998 | /// function, if any. |
3999 | MemoryLocationsKind getAssumedNotAccessedLocation() const { |
4000 | return getAssumed(); |
4001 | } |
4002 | |
4003 | /// Return the inverse of location \p Loc, thus for NO_XXX the return |
4004 | /// describes ONLY_XXX. The flags \p AndLocalMem and \p AndConstMem determine |
4005 | /// if local (=stack) and constant memory are allowed as well. Most of the |
4006 | /// time we do want them to be included, e.g., argmemonly allows accesses via |
4007 | /// argument pointers or local or constant memory accesses. |
4008 | static MemoryLocationsKind |
4009 | inverseLocation(MemoryLocationsKind Loc, bool AndLocalMem, bool AndConstMem) { |
4010 | return NO_LOCATIONS & ~(Loc | (AndLocalMem ? NO_LOCAL_MEM : 0) | |
4011 | (AndConstMem ? NO_CONST_MEM : 0)); |
4012 | }; |
4013 | |
4014 | /// Return the locations encoded by \p MLK as a readable string. |
4015 | static std::string getMemoryLocationsAsStr(MemoryLocationsKind MLK); |
4016 | |
4017 | /// Simple enum to distinguish read/write/read-write accesses. |
4018 | enum AccessKind { |
4019 | NONE = 0, |
4020 | READ = 1 << 0, |
4021 | WRITE = 1 << 1, |
4022 | READ_WRITE = READ | WRITE, |
4023 | }; |
4024 | |
4025 | /// Check \p Pred on all accesses to the memory kinds specified by \p MLK. |
4026 | /// |
4027 | /// This method will evaluate \p Pred on all accesses (access instruction + |
4028 | /// underlying accessed memory pointer) and it will return true if \p Pred |
4029 | /// holds every time. |
4030 | virtual bool checkForAllAccessesToMemoryKind( |
4031 | function_ref<bool(const Instruction *, const Value *, AccessKind, |
4032 | MemoryLocationsKind)> |
4033 | Pred, |
4034 | MemoryLocationsKind MLK) const = 0; |
4035 | |
4036 | /// Create an abstract attribute view for the position \p IRP. |
4037 | static AAMemoryLocation &createForPosition(const IRPosition &IRP, |
4038 | Attributor &A); |
4039 | |
4040 | /// See AbstractState::getAsStr(). |
4041 | const std::string getAsStr() const override { |
4042 | return getMemoryLocationsAsStr(getAssumedNotAccessedLocation()); |
4043 | } |
4044 | |
4045 | /// See AbstractAttribute::getName() |
4046 | const std::string getName() const override { return "AAMemoryLocation"; } |
4047 | |
4048 | /// See AbstractAttribute::getIdAddr() |
4049 | const char *getIdAddr() const override { return &ID; } |
4050 | |
4051 | /// This function should return true if the type of the \p AA is |
4052 | /// AAMemoryLocation |
4053 | static bool classof(const AbstractAttribute *AA) { |
4054 | return (AA->getIdAddr() == &ID); |
4055 | } |
4056 | |
4057 | /// Unique ID (due to the unique address) |
4058 | static const char ID; |
4059 | }; |
4060 | |
4061 | /// An abstract interface for range value analysis. |
4062 | struct AAValueConstantRange |
4063 | : public StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t> { |
4064 | using Base = StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t>; |
4065 | AAValueConstantRange(const IRPosition &IRP, Attributor &A) |
4066 | : Base(IRP, IRP.getAssociatedType()->getIntegerBitWidth()) {} |
4067 | |
4068 | /// See AbstractAttribute::getState(...). |
4069 | IntegerRangeState &getState() override { return *this; } |
4070 | const IntegerRangeState &getState() const override { return *this; } |
4071 | |
4072 | /// Create an abstract attribute view for the position \p IRP. |
4073 | static AAValueConstantRange &createForPosition(const IRPosition &IRP, |
4074 | Attributor &A); |
4075 | |
4076 | /// Return an assumed range for the associated value a program point \p CtxI. |
4077 | /// If \p I is nullptr, simply return an assumed range. |
4078 | virtual ConstantRange |
4079 | getAssumedConstantRange(Attributor &A, |
4080 | const Instruction *CtxI = nullptr) const = 0; |
4081 | |
4082 | /// Return a known range for the associated value at a program point \p CtxI. |
4083 | /// If \p I is nullptr, simply return a known range. |
4084 | virtual ConstantRange |
4085 | getKnownConstantRange(Attributor &A, |
4086 | const Instruction *CtxI = nullptr) const = 0; |
4087 | |
4088 | /// Return an assumed constant for the associated value a program point \p |
4089 | /// CtxI. |
4090 | Optional<ConstantInt *> |
4091 | getAssumedConstantInt(Attributor &A, |
4092 | const Instruction *CtxI = nullptr) const { |
4093 | ConstantRange RangeV = getAssumedConstantRange(A, CtxI); |
4094 | if (auto *C = RangeV.getSingleElement()) |
4095 | return cast<ConstantInt>( |
4096 | ConstantInt::get(getAssociatedValue().getType(), *C)); |
4097 | if (RangeV.isEmptySet()) |
4098 | return llvm::None; |
4099 | return nullptr; |
4100 | } |
4101 | |
4102 | /// See AbstractAttribute::getName() |
4103 | const std::string getName() const override { return "AAValueConstantRange"; } |
4104 | |
4105 | /// See AbstractAttribute::getIdAddr() |
4106 | const char *getIdAddr() const override { return &ID; } |
4107 | |
4108 | /// This function should return true if the type of the \p AA is |
4109 | /// AAValueConstantRange |
4110 | static bool classof(const AbstractAttribute *AA) { |
4111 | return (AA->getIdAddr() == &ID); |
4112 | } |
4113 | |
4114 | /// Unique ID (due to the unique address) |
4115 | static const char ID; |
4116 | }; |
4117 | |
4118 | /// A class for a set state. |
4119 | /// The assumed boolean state indicates whether the corresponding set is full |
4120 | /// set or not. If the assumed state is false, this is the worst state. The |
4121 | /// worst state (invalid state) of set of potential values is when the set |
4122 | /// contains every possible value (i.e. we cannot in any way limit the value |
4123 | /// that the target position can take). That never happens naturally, we only |
4124 | /// force it. As for the conditions under which we force it, see |
4125 | /// AAPotentialValues. |
4126 | template <typename MemberTy, typename KeyInfo = DenseMapInfo<MemberTy>> |
4127 | struct PotentialValuesState : AbstractState { |
4128 | using SetTy = DenseSet<MemberTy, KeyInfo>; |
4129 | |
4130 | PotentialValuesState() : IsValidState(true), UndefIsContained(false) {} |
4131 | |
4132 | PotentialValuesState(bool IsValid) |
4133 | : IsValidState(IsValid), UndefIsContained(false) {} |
4134 | |
4135 | /// See AbstractState::isValidState(...) |
4136 | bool isValidState() const override { return IsValidState.isValidState(); } |
4137 | |
4138 | /// See AbstractState::isAtFixpoint(...) |
4139 | bool isAtFixpoint() const override { return IsValidState.isAtFixpoint(); } |
4140 | |
4141 | /// See AbstractState::indicatePessimisticFixpoint(...) |
4142 | ChangeStatus indicatePessimisticFixpoint() override { |
4143 | return IsValidState.indicatePessimisticFixpoint(); |
4144 | } |
4145 | |
4146 | /// See AbstractState::indicateOptimisticFixpoint(...) |
4147 | ChangeStatus indicateOptimisticFixpoint() override { |
4148 | return IsValidState.indicateOptimisticFixpoint(); |
4149 | } |
4150 | |
4151 | /// Return the assumed state |
4152 | PotentialValuesState &getAssumed() { return *this; } |
4153 | const PotentialValuesState &getAssumed() const { return *this; } |
4154 | |
4155 | /// Return this set. We should check whether this set is valid or not by |
4156 | /// isValidState() before calling this function. |
4157 | const SetTy &getAssumedSet() const { |
4158 | assert(isValidState() && "This set shoud not be used when it is invalid!")(static_cast <bool> (isValidState() && "This set shoud not be used when it is invalid!" ) ? void (0) : __assert_fail ("isValidState() && \"This set shoud not be used when it is invalid!\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/include/llvm/Transforms/IPO/Attributor.h" , 4158, __extension__ __PRETTY_FUNCTION__)); |
4159 | return Set; |
4160 | } |
4161 | |
4162 | /// Returns whether this state contains an undef value or not. |
4163 | bool undefIsContained() const { |
4164 | assert(isValidState() && "This flag shoud not be used when it is invalid!")(static_cast <bool> (isValidState() && "This flag shoud not be used when it is invalid!" ) ? void (0) : __assert_fail ("isValidState() && \"This flag shoud not be used when it is invalid!\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/include/llvm/Transforms/IPO/Attributor.h" , 4164, __extension__ __PRETTY_FUNCTION__)); |
4165 | return UndefIsContained; |
4166 | } |
4167 | |
4168 | bool operator==(const PotentialValuesState &RHS) const { |
4169 | if (isValidState() != RHS.isValidState()) |
4170 | return false; |
4171 | if (!isValidState() && !RHS.isValidState()) |
4172 | return true; |
4173 | if (undefIsContained() != RHS.undefIsContained()) |
4174 | return false; |
4175 | return Set == RHS.getAssumedSet(); |
4176 | } |
4177 | |
4178 | /// Maximum number of potential values to be tracked. |
4179 | /// This is set by -attributor-max-potential-values command line option |
4180 | static unsigned MaxPotentialValues; |
4181 | |
4182 | /// Return empty set as the best state of potential values. |
4183 | static PotentialValuesState getBestState() { |
4184 | return PotentialValuesState(true); |
4185 | } |
4186 | |
4187 | static PotentialValuesState getBestState(PotentialValuesState &PVS) { |
4188 | return getBestState(); |
4189 | } |
4190 | |
4191 | /// Return full set as the worst state of potential values. |
4192 | static PotentialValuesState getWorstState() { |
4193 | return PotentialValuesState(false); |
4194 | } |
4195 | |
4196 | /// Union assumed set with the passed value. |
4197 | void unionAssumed(const MemberTy &C) { insert(C); } |
4198 | |
4199 | /// Union assumed set with assumed set of the passed state \p PVS. |
4200 | void unionAssumed(const PotentialValuesState &PVS) { unionWith(PVS); } |
4201 | |
4202 | /// Union assumed set with an undef value. |
4203 | void unionAssumedWithUndef() { unionWithUndef(); } |
4204 | |
4205 | /// "Clamp" this state with \p PVS. |
4206 | PotentialValuesState operator^=(const PotentialValuesState &PVS) { |
4207 | IsValidState ^= PVS.IsValidState; |
4208 | unionAssumed(PVS); |
4209 | return *this; |
4210 | } |
4211 | |
4212 | PotentialValuesState operator&=(const PotentialValuesState &PVS) { |
4213 | IsValidState &= PVS.IsValidState; |
4214 | unionAssumed(PVS); |
4215 | return *this; |
4216 | } |
4217 | |
4218 | private: |
4219 | /// Check the size of this set, and invalidate when the size is no |
4220 | /// less than \p MaxPotentialValues threshold. |
4221 | void checkAndInvalidate() { |
4222 | if (Set.size() >= MaxPotentialValues) |
4223 | indicatePessimisticFixpoint(); |
4224 | else |
4225 | reduceUndefValue(); |
4226 | } |
4227 | |
4228 | /// If this state contains both undef and not undef, we can reduce |
4229 | /// undef to the not undef value. |
4230 | void reduceUndefValue() { UndefIsContained = UndefIsContained & Set.empty(); } |
4231 | |
4232 | /// Insert an element into this set. |
4233 | void insert(const MemberTy &C) { |
4234 | if (!isValidState()) |
4235 | return; |
4236 | Set.insert(C); |
4237 | checkAndInvalidate(); |
4238 | } |
4239 | |
4240 | /// Take union with R. |
4241 | void unionWith(const PotentialValuesState &R) { |
4242 | /// If this is a full set, do nothing. |
4243 | if (!isValidState()) |
4244 | return; |
4245 | /// If R is full set, change L to a full set. |
4246 | if (!R.isValidState()) { |
4247 | indicatePessimisticFixpoint(); |
4248 | return; |
4249 | } |
4250 | for (const MemberTy &C : R.Set) |
4251 | Set.insert(C); |
4252 | UndefIsContained |= R.undefIsContained(); |
4253 | checkAndInvalidate(); |
4254 | } |
4255 | |
4256 | /// Take union with an undef value. |
4257 | void unionWithUndef() { |
4258 | UndefIsContained = true; |
4259 | reduceUndefValue(); |
4260 | } |
4261 | |
4262 | /// Take intersection with R. |
4263 | void intersectWith(const PotentialValuesState &R) { |
4264 | /// If R is a full set, do nothing. |
4265 | if (!R.isValidState()) |
4266 | return; |
4267 | /// If this is a full set, change this to R. |
4268 | if (!isValidState()) { |
4269 | *this = R; |
4270 | return; |
4271 | } |
4272 | SetTy IntersectSet; |
4273 | for (const MemberTy &C : Set) { |
4274 | if (R.Set.count(C)) |
4275 | IntersectSet.insert(C); |
4276 | } |
4277 | Set = IntersectSet; |
4278 | UndefIsContained &= R.undefIsContained(); |
4279 | reduceUndefValue(); |
4280 | } |
4281 | |
4282 | /// A helper state which indicate whether this state is valid or not. |
4283 | BooleanState IsValidState; |
4284 | |
4285 | /// Container for potential values |
4286 | SetTy Set; |
4287 | |
4288 | /// Flag for undef value |
4289 | bool UndefIsContained; |
4290 | }; |
4291 | |
4292 | using PotentialConstantIntValuesState = PotentialValuesState<APInt>; |
4293 | |
4294 | raw_ostream &operator<<(raw_ostream &OS, |
4295 | const PotentialConstantIntValuesState &R); |
4296 | |
4297 | /// An abstract interface for potential values analysis. |
4298 | /// |
4299 | /// This AA collects potential values for each IR position. |
4300 | /// An assumed set of potential values is initialized with the empty set (the |
4301 | /// best state) and it will grow monotonically as we find more potential values |
4302 | /// for this position. |
4303 | /// The set might be forced to the worst state, that is, to contain every |
4304 | /// possible value for this position in 2 cases. |
4305 | /// 1. We surpassed the \p MaxPotentialValues threshold. This includes the |
4306 | /// case that this position is affected (e.g. because of an operation) by a |
4307 | /// Value that is in the worst state. |
4308 | /// 2. We tried to initialize on a Value that we cannot handle (e.g. an |
4309 | /// operator we do not currently handle). |
4310 | /// |
4311 | /// TODO: Support values other than constant integers. |
4312 | struct AAPotentialValues |
4313 | : public StateWrapper<PotentialConstantIntValuesState, AbstractAttribute> { |
4314 | using Base = StateWrapper<PotentialConstantIntValuesState, AbstractAttribute>; |
4315 | AAPotentialValues(const IRPosition &IRP, Attributor &A) : Base(IRP) {} |
4316 | |
4317 | /// See AbstractAttribute::getState(...). |
4318 | PotentialConstantIntValuesState &getState() override { return *this; } |
4319 | const PotentialConstantIntValuesState &getState() const override { |
4320 | return *this; |
4321 | } |
4322 | |
4323 | /// Create an abstract attribute view for the position \p IRP. |
4324 | static AAPotentialValues &createForPosition(const IRPosition &IRP, |
4325 | Attributor &A); |
4326 | |
4327 | /// Return assumed constant for the associated value |
4328 | Optional<ConstantInt *> |
4329 | getAssumedConstantInt(Attributor &A, |
4330 | const Instruction *CtxI = nullptr) const { |
4331 | if (!isValidState()) |
4332 | return nullptr; |
4333 | if (getAssumedSet().size() == 1) |
4334 | return cast<ConstantInt>(ConstantInt::get(getAssociatedValue().getType(), |
4335 | *(getAssumedSet().begin()))); |
4336 | if (getAssumedSet().size() == 0) { |
4337 | if (undefIsContained()) |
4338 | return cast<ConstantInt>( |
4339 | ConstantInt::get(getAssociatedValue().getType(), 0)); |
4340 | return llvm::None; |
4341 | } |
4342 | |
4343 | return nullptr; |
4344 | } |
4345 | |
4346 | /// See AbstractAttribute::getName() |
4347 | const std::string getName() const override { return "AAPotentialValues"; } |
4348 | |
4349 | /// See AbstractAttribute::getIdAddr() |
4350 | const char *getIdAddr() const override { return &ID; } |
4351 | |
4352 | /// This function should return true if the type of the \p AA is |
4353 | /// AAPotentialValues |
4354 | static bool classof(const AbstractAttribute *AA) { |
4355 | return (AA->getIdAddr() == &ID); |
4356 | } |
4357 | |
4358 | /// Unique ID (due to the unique address) |
4359 | static const char ID; |
4360 | }; |
4361 | |
4362 | /// An abstract interface for all noundef attributes. |
4363 | struct AANoUndef |
4364 | : public IRAttribute<Attribute::NoUndef, |
4365 | StateWrapper<BooleanState, AbstractAttribute>> { |
4366 | AANoUndef(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {} |
4367 | |
4368 | /// Return true if we assume that the underlying value is noundef. |
4369 | bool isAssumedNoUndef() const { return getAssumed(); } |
4370 | |
4371 | /// Return true if we know that underlying value is noundef. |
4372 | bool isKnownNoUndef() const { return getKnown(); } |
4373 | |
4374 | /// Create an abstract attribute view for the position \p IRP. |
4375 | static AANoUndef &createForPosition(const IRPosition &IRP, Attributor &A); |
4376 | |
4377 | /// See AbstractAttribute::getName() |
4378 | const std::string getName() const override { return "AANoUndef"; } |
4379 | |
4380 | /// See AbstractAttribute::getIdAddr() |
4381 | const char *getIdAddr() const override { return &ID; } |
4382 | |
4383 | /// This function should return true if the type of the \p AA is AANoUndef |
4384 | static bool classof(const AbstractAttribute *AA) { |
4385 | return (AA->getIdAddr() == &ID); |
4386 | } |
4387 | |
4388 | /// Unique ID (due to the unique address) |
4389 | static const char ID; |
4390 | }; |
4391 | |
4392 | struct AACallGraphNode; |
4393 | struct AACallEdges; |
4394 | |
4395 | /// An Iterator for call edges, creates AACallEdges attributes in a lazy way. |
4396 | /// This iterator becomes invalid if the underlying edge list changes. |
4397 | /// So This shouldn't outlive a iteration of Attributor. |
4398 | class AACallEdgeIterator |
4399 | : public iterator_adaptor_base<AACallEdgeIterator, |
4400 | SetVector<Function *>::iterator> { |
4401 | AACallEdgeIterator(Attributor &A, SetVector<Function *>::iterator Begin) |
4402 | : iterator_adaptor_base(Begin), A(A) {} |
4403 | |
4404 | public: |
4405 | AACallGraphNode *operator*() const; |
4406 | |
4407 | private: |
4408 | Attributor &A; |
4409 | friend AACallEdges; |
4410 | friend AttributorCallGraph; |
4411 | }; |
4412 | |
4413 | struct AACallGraphNode { |
4414 | AACallGraphNode(Attributor &A) : A(A) {} |
4415 | virtual ~AACallGraphNode() {} |
4416 | |
4417 | virtual AACallEdgeIterator optimisticEdgesBegin() const = 0; |
4418 | virtual AACallEdgeIterator optimisticEdgesEnd() const = 0; |
4419 | |
4420 | /// Iterator range for exploring the call graph. |
4421 | iterator_range<AACallEdgeIterator> optimisticEdgesRange() const { |
4422 | return iterator_range<AACallEdgeIterator>(optimisticEdgesBegin(), |
4423 | optimisticEdgesEnd()); |
4424 | } |
4425 | |
4426 | protected: |
4427 | /// Reference to Attributor needed for GraphTraits implementation. |
4428 | Attributor &A; |
4429 | }; |
4430 | |
4431 | /// An abstract state for querying live call edges. |
4432 | /// This interface uses the Attributor's optimistic liveness |
4433 | /// information to compute the edges that are alive. |
4434 | struct AACallEdges : public StateWrapper<BooleanState, AbstractAttribute>, |
4435 | AACallGraphNode { |
4436 | using Base = StateWrapper<BooleanState, AbstractAttribute>; |
4437 | |
4438 | AACallEdges(const IRPosition &IRP, Attributor &A) |
4439 | : Base(IRP), AACallGraphNode(A) {} |
4440 | |
4441 | /// Get the optimistic edges. |
4442 | virtual const SetVector<Function *> &getOptimisticEdges() const = 0; |
4443 | |
4444 | /// Is there any call with a unknown callee. |
4445 | virtual bool hasUnknownCallee() const = 0; |
4446 | |
4447 | /// Is there any call with a unknown callee, excluding any inline asm. |
4448 | virtual bool hasNonAsmUnknownCallee() const = 0; |
4449 | |
4450 | /// Iterator for exploring the call graph. |
4451 | AACallEdgeIterator optimisticEdgesBegin() const override { |
4452 | return AACallEdgeIterator(A, getOptimisticEdges().begin()); |
4453 | } |
4454 | |
4455 | /// Iterator for exploring the call graph. |
4456 | AACallEdgeIterator optimisticEdgesEnd() const override { |
4457 | return AACallEdgeIterator(A, getOptimisticEdges().end()); |
4458 | } |
4459 | |
4460 | /// Create an abstract attribute view for the position \p IRP. |
4461 | static AACallEdges &createForPosition(const IRPosition &IRP, Attributor &A); |
4462 | |
4463 | /// See AbstractAttribute::getName() |
4464 | const std::string getName() const override { return "AACallEdges"; } |
4465 | |
4466 | /// See AbstractAttribute::getIdAddr() |
4467 | const char *getIdAddr() const override { return &ID; } |
4468 | |
4469 | /// This function should return true if the type of the \p AA is AACallEdges. |
4470 | static bool classof(const AbstractAttribute *AA) { |
4471 | return (AA->getIdAddr() == &ID); |
4472 | } |
4473 | |
4474 | /// Unique ID (due to the unique address) |
4475 | static const char ID; |
4476 | }; |
4477 | |
4478 | // Synthetic root node for the Attributor's internal call graph. |
4479 | struct AttributorCallGraph : public AACallGraphNode { |
4480 | AttributorCallGraph(Attributor &A) : AACallGraphNode(A) {} |
4481 | virtual ~AttributorCallGraph() {} |
4482 | |
4483 | AACallEdgeIterator optimisticEdgesBegin() const override { |
4484 | return AACallEdgeIterator(A, A.Functions.begin()); |
4485 | } |
4486 | |
4487 | AACallEdgeIterator optimisticEdgesEnd() const override { |
4488 | return AACallEdgeIterator(A, A.Functions.end()); |
4489 | } |
4490 | |
4491 | /// Force populate the entire call graph. |
4492 | void populateAll() const { |
4493 | for (const AACallGraphNode *AA : optimisticEdgesRange()) { |
4494 | // Nothing else to do here. |
4495 | (void)AA; |
4496 | } |
4497 | } |
4498 | |
4499 | void print(); |
4500 | }; |
4501 | |
4502 | template <> struct GraphTraits<AACallGraphNode *> { |
4503 | using NodeRef = AACallGraphNode *; |
4504 | using ChildIteratorType = AACallEdgeIterator; |
4505 | |
4506 | static AACallEdgeIterator child_begin(AACallGraphNode *Node) { |
4507 | return Node->optimisticEdgesBegin(); |
4508 | } |
4509 | |
4510 | static AACallEdgeIterator child_end(AACallGraphNode *Node) { |
4511 | return Node->optimisticEdgesEnd(); |
4512 | } |
4513 | }; |
4514 | |
4515 | template <> |
4516 | struct GraphTraits<AttributorCallGraph *> |
4517 | : public GraphTraits<AACallGraphNode *> { |
4518 | using nodes_iterator = AACallEdgeIterator; |
4519 | |
4520 | static AACallGraphNode *getEntryNode(AttributorCallGraph *G) { |
4521 | return static_cast<AACallGraphNode *>(G); |
4522 | } |
4523 | |
4524 | static AACallEdgeIterator nodes_begin(const AttributorCallGraph *G) { |
4525 | return G->optimisticEdgesBegin(); |
4526 | } |
4527 | |
4528 | static AACallEdgeIterator nodes_end(const AttributorCallGraph *G) { |
4529 | return G->optimisticEdgesEnd(); |
4530 | } |
4531 | }; |
4532 | |
4533 | template <> |
4534 | struct DOTGraphTraits<AttributorCallGraph *> : public DefaultDOTGraphTraits { |
4535 | DOTGraphTraits(bool Simple = false) : DefaultDOTGraphTraits(Simple) {} |
4536 | |
4537 | std::string getNodeLabel(const AACallGraphNode *Node, |
4538 | const AttributorCallGraph *Graph) { |
4539 | const AACallEdges *AACE = static_cast<const AACallEdges *>(Node); |
4540 | return AACE->getAssociatedFunction()->getName().str(); |
4541 | } |
4542 | |
4543 | static bool isNodeHidden(const AACallGraphNode *Node, |
4544 | const AttributorCallGraph *Graph) { |
4545 | // Hide the synth root. |
4546 | return static_cast<const AACallGraphNode *>(Graph) == Node; |
4547 | } |
4548 | }; |
4549 | |
4550 | struct AAExecutionDomain |
4551 | : public StateWrapper<BooleanState, AbstractAttribute> { |
4552 | using Base = StateWrapper<BooleanState, AbstractAttribute>; |
4553 | AAExecutionDomain(const IRPosition &IRP, Attributor &A) : Base(IRP) {} |
4554 | |
4555 | /// Create an abstract attribute view for the position \p IRP. |
4556 | static AAExecutionDomain &createForPosition(const IRPosition &IRP, |
4557 | Attributor &A); |
4558 | |
4559 | /// See AbstractAttribute::getName(). |
4560 | const std::string getName() const override { return "AAExecutionDomain"; } |
4561 | |
4562 | /// See AbstractAttribute::getIdAddr(). |
4563 | const char *getIdAddr() const override { return &ID; } |
4564 | |
4565 | /// Check if an instruction is executed only by the initial thread. |
4566 | virtual bool isExecutedByInitialThreadOnly(const Instruction &) const = 0; |
4567 | |
4568 | /// Check if a basic block is executed only by the initial thread. |
4569 | virtual bool isExecutedByInitialThreadOnly(const BasicBlock &) const = 0; |
4570 | |
4571 | /// This function should return true if the type of the \p AA is |
4572 | /// AAExecutionDomain. |
4573 | static bool classof(const AbstractAttribute *AA) { |
4574 | return (AA->getIdAddr() == &ID); |
4575 | } |
4576 | |
4577 | /// Unique ID (due to the unique address) |
4578 | static const char ID; |
4579 | }; |
4580 | |
4581 | /// An abstract Attribute for computing reachability between functions. |
4582 | struct AAFunctionReachability |
4583 | : public StateWrapper<BooleanState, AbstractAttribute> { |
4584 | using Base = StateWrapper<BooleanState, AbstractAttribute>; |
4585 | |
4586 | AAFunctionReachability(const IRPosition &IRP, Attributor &A) : Base(IRP) {} |
4587 | |
4588 | /// If the function represented by this possition can reach \p Fn. |
4589 | virtual bool canReach(Attributor &A, Function *Fn) const = 0; |
4590 | |
4591 | /// Can \p CB reach \p Fn |
4592 | virtual bool canReach(Attributor &A, CallBase &CB, Function *Fn) const = 0; |
4593 | |
4594 | /// Create an abstract attribute view for the position \p IRP. |
4595 | static AAFunctionReachability &createForPosition(const IRPosition &IRP, |
4596 | Attributor &A); |
4597 | |
4598 | /// See AbstractAttribute::getName() |
4599 | const std::string getName() const override { return "AAFuncitonReacability"; } |
4600 | |
4601 | /// See AbstractAttribute::getIdAddr() |
4602 | const char *getIdAddr() const override { return &ID; } |
4603 | |
4604 | /// This function should return true if the type of the \p AA is AACallEdges. |
4605 | static bool classof(const AbstractAttribute *AA) { |
4606 | return (AA->getIdAddr() == &ID); |
4607 | } |
4608 | |
4609 | /// Unique ID (due to the unique address) |
4610 | static const char ID; |
4611 | |
4612 | private: |
4613 | /// Can this function reach a call with unknown calee. |
4614 | virtual bool canReachUnknownCallee() const = 0; |
4615 | }; |
4616 | |
4617 | /// An abstract interface for struct information. |
4618 | struct AAPointerInfo : public AbstractAttribute { |
4619 | AAPointerInfo(const IRPosition &IRP) : AbstractAttribute(IRP) {} |
4620 | |
4621 | enum AccessKind { |
4622 | AK_READ = 1 << 0, |
4623 | AK_WRITE = 1 << 1, |
4624 | AK_READ_WRITE = AK_READ | AK_WRITE, |
4625 | }; |
4626 | |
4627 | /// An access description. |
4628 | struct Access { |
4629 | Access(Instruction *I, Optional<Value *> Content, AccessKind Kind, Type *Ty) |
4630 | : LocalI(I), RemoteI(I), Content(Content), Kind(Kind), Ty(Ty) {} |
4631 | Access(Instruction *LocalI, Instruction *RemoteI, Optional<Value *> Content, |
4632 | AccessKind Kind, Type *Ty) |
4633 | : LocalI(LocalI), RemoteI(RemoteI), Content(Content), Kind(Kind), |
4634 | Ty(Ty) {} |
4635 | Access(const Access &Other) |
4636 | : LocalI(Other.LocalI), RemoteI(Other.RemoteI), Content(Other.Content), |
4637 | Kind(Other.Kind), Ty(Other.Ty) {} |
4638 | Access(const Access &&Other) |
4639 | : LocalI(Other.LocalI), RemoteI(Other.RemoteI), Content(Other.Content), |
4640 | Kind(Other.Kind), Ty(Other.Ty) {} |
4641 | |
4642 | Access &operator=(const Access &Other) { |
4643 | LocalI = Other.LocalI; |
4644 | RemoteI = Other.RemoteI; |
4645 | Content = Other.Content; |
4646 | Kind = Other.Kind; |
4647 | Ty = Other.Ty; |
4648 | return *this; |
4649 | } |
4650 | bool operator==(const Access &R) const { |
4651 | return LocalI == R.LocalI && RemoteI == R.RemoteI && |
4652 | Content == R.Content && Kind == R.Kind; |
4653 | } |
4654 | bool operator!=(const Access &R) const { return !(*this == R); } |
4655 | |
4656 | Access &operator&=(const Access &R) { |
4657 | assert(RemoteI == R.RemoteI && "Expected same instruction!")(static_cast <bool> (RemoteI == R.RemoteI && "Expected same instruction!" ) ? void (0) : __assert_fail ("RemoteI == R.RemoteI && \"Expected same instruction!\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/include/llvm/Transforms/IPO/Attributor.h" , 4657, __extension__ __PRETTY_FUNCTION__)); |
4658 | Content = |
4659 | AA::combineOptionalValuesInAAValueLatice(Content, R.Content, Ty); |
4660 | Kind = AccessKind(Kind | R.Kind); |
4661 | return *this; |
4662 | } |
4663 | |
4664 | /// Return the access kind. |
4665 | AccessKind getKind() const { return Kind; } |
4666 | |
4667 | /// Return true if this is a read access. |
4668 | bool isRead() const { return Kind & AK_READ; } |
4669 | |
4670 | /// Return true if this is a write access. |
4671 | bool isWrite() const { return Kind & AK_WRITE; } |
4672 | |
4673 | /// Return the instruction that causes the access with respect to the local |
4674 | /// scope of the associated attribute. |
4675 | Instruction *getLocalInst() const { return LocalI; } |
4676 | |
4677 | /// Return the actual instruction that causes the access. |
4678 | Instruction *getRemoteInst() const { return RemoteI; } |
4679 | |
4680 | /// Return true if the value written is not known yet. |
4681 | bool isWrittenValueYetUndetermined() const { return !Content.hasValue(); } |
4682 | |
4683 | /// Return true if the value written cannot be determined at all. |
4684 | bool isWrittenValueUnknown() const { |
4685 | return Content.hasValue() && !*Content; |
4686 | } |
4687 | |
4688 | /// Return the type associated with the access, if known. |
4689 | Type *getType() const { return Ty; } |
4690 | |
4691 | /// Return the value writen, if any. As long as |
4692 | /// isWrittenValueYetUndetermined return true this function shall not be |
4693 | /// called. |
4694 | Value *getWrittenValue() const { return *Content; } |
4695 | |
4696 | /// Return the written value which can be `llvm::null` if it is not yet |
4697 | /// determined. |
4698 | Optional<Value *> getContent() const { return Content; } |
4699 | |
4700 | private: |
4701 | /// The instruction responsible for the access with respect to the local |
4702 | /// scope of the associated attribute. |
4703 | Instruction *LocalI; |
4704 | |
4705 | /// The instruction responsible for the access. |
4706 | Instruction *RemoteI; |
4707 | |
4708 | /// The value written, if any. `llvm::none` means "not known yet", `nullptr` |
4709 | /// cannot be determined. |
4710 | Optional<Value *> Content; |
4711 | |
4712 | /// The access kind, e.g., READ, as bitset (could be more than one). |
4713 | AccessKind Kind; |
4714 | |
4715 | /// The type of the content, thus the type read/written, can be null if not |
4716 | /// available. |
4717 | Type *Ty; |
4718 | }; |
4719 | |
4720 | /// Create an abstract attribute view for the position \p IRP. |
4721 | static AAPointerInfo &createForPosition(const IRPosition &IRP, Attributor &A); |
4722 | |
4723 | /// See AbstractAttribute::getName() |
4724 | const std::string getName() const override { return "AAPointerInfo"; } |
4725 | |
4726 | /// See AbstractAttribute::getIdAddr() |
4727 | const char *getIdAddr() const override { return &ID; } |
4728 | |
4729 | /// Call \p CB on all accesses that might interfere with \p LI and return true |
4730 | /// if all such accesses were known and the callback returned true for all of |
4731 | /// them, false otherwise. |
4732 | virtual bool forallInterferingAccesses( |
4733 | LoadInst &LI, function_ref<bool(const Access &, bool)> CB) const = 0; |
4734 | virtual bool forallInterferingAccesses( |
4735 | StoreInst &SI, function_ref<bool(const Access &, bool)> CB) const = 0; |
4736 | |
4737 | /// This function should return true if the type of the \p AA is AAPointerInfo |
4738 | static bool classof(const AbstractAttribute *AA) { |
4739 | return (AA->getIdAddr() == &ID); |
4740 | } |
4741 | |
4742 | /// Unique ID (due to the unique address) |
4743 | static const char ID; |
4744 | }; |
4745 | |
4746 | /// An abstract attribute for getting assumption information. |
4747 | struct AAAssumptionInfo |
4748 | : public StateWrapper<SetState<StringRef>, AbstractAttribute, |
4749 | DenseSet<StringRef>> { |
4750 | using Base = |
4751 | StateWrapper<SetState<StringRef>, AbstractAttribute, DenseSet<StringRef>>; |
4752 | |
4753 | AAAssumptionInfo(const IRPosition &IRP, Attributor &A, |
4754 | const DenseSet<StringRef> &Known) |
4755 | : Base(IRP, Known) {} |
4756 | |
4757 | /// Returns true if the assumption set contains the assumption \p Assumption. |
4758 | virtual bool hasAssumption(const StringRef Assumption) const = 0; |
4759 | |
4760 | /// Create an abstract attribute view for the position \p IRP. |
4761 | static AAAssumptionInfo &createForPosition(const IRPosition &IRP, |
4762 | Attributor &A); |
4763 | |
4764 | /// See AbstractAttribute::getName() |
4765 | const std::string getName() const override { return "AAAssumptionInfo"; } |
4766 | |
4767 | /// See AbstractAttribute::getIdAddr() |
4768 | const char *getIdAddr() const override { return &ID; } |
4769 | |
4770 | /// This function should return true if the type of the \p AA is |
4771 | /// AAAssumptionInfo |
4772 | static bool classof(const AbstractAttribute *AA) { |
4773 | return (AA->getIdAddr() == &ID); |
4774 | } |
4775 | |
4776 | /// Unique ID (due to the unique address) |
4777 | static const char ID; |
4778 | }; |
4779 | |
4780 | raw_ostream &operator<<(raw_ostream &, const AAPointerInfo::Access &); |
4781 | |
4782 | /// Run options, used by the pass manager. |
4783 | enum AttributorRunOption { |
4784 | NONE = 0, |
4785 | MODULE = 1 << 0, |
4786 | CGSCC = 1 << 1, |
4787 | ALL = MODULE | CGSCC |
4788 | }; |
4789 | |
4790 | } // end namespace llvm |
4791 | |
4792 | #endif // LLVM_TRANSFORMS_IPO_ATTRIBUTOR_H |