Bug Summary

File:llvm/lib/Transforms/IPO/OpenMPOpt.cpp
Warning:line 3777, column 7
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name OpenMPOpt.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/build-llvm -resource-dir /usr/lib/llvm-14/lib/clang/14.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I lib/Transforms/IPO -I /build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO -I include -I /build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/include -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-14/lib/clang/14.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-command-line-argument -Wno-unknown-warning-option -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/build-llvm -ferror-limit 19 -fvisibility-inlines-hidden -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2021-11-10-160236-22541-1 -x c++ /build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp

/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp

1//===-- IPO/OpenMPOpt.cpp - Collection of OpenMP specific optimizations ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// OpenMP specific optimizations:
10//
11// - Deduplication of runtime calls, e.g., omp_get_thread_num.
12// - Replacing globalized device memory with stack memory.
13// - Replacing globalized device memory with shared memory.
14// - Parallel region merging.
15// - Transforming generic-mode device kernels to SPMD mode.
16// - Specializing the state machine for generic-mode device kernels.
17//
18//===----------------------------------------------------------------------===//
19
20#include "llvm/Transforms/IPO/OpenMPOpt.h"
21
22#include "llvm/ADT/EnumeratedArray.h"
23#include "llvm/ADT/PostOrderIterator.h"
24#include "llvm/ADT/Statistic.h"
25#include "llvm/ADT/StringRef.h"
26#include "llvm/Analysis/CallGraph.h"
27#include "llvm/Analysis/CallGraphSCCPass.h"
28#include "llvm/Analysis/OptimizationRemarkEmitter.h"
29#include "llvm/Analysis/ValueTracking.h"
30#include "llvm/Frontend/OpenMP/OMPConstants.h"
31#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
32#include "llvm/IR/Assumptions.h"
33#include "llvm/IR/DiagnosticInfo.h"
34#include "llvm/IR/GlobalValue.h"
35#include "llvm/IR/Instruction.h"
36#include "llvm/IR/IntrinsicInst.h"
37#include "llvm/IR/IntrinsicsAMDGPU.h"
38#include "llvm/IR/IntrinsicsNVPTX.h"
39#include "llvm/InitializePasses.h"
40#include "llvm/Support/CommandLine.h"
41#include "llvm/Transforms/IPO.h"
42#include "llvm/Transforms/IPO/Attributor.h"
43#include "llvm/Transforms/Utils/BasicBlockUtils.h"
44#include "llvm/Transforms/Utils/CallGraphUpdater.h"
45#include "llvm/Transforms/Utils/CodeExtractor.h"
46
47#include <algorithm>
48
49using namespace llvm;
50using namespace omp;
51
52#define DEBUG_TYPE"openmp-opt" "openmp-opt"
53
54static cl::opt<bool> DisableOpenMPOptimizations(
55 "openmp-opt-disable", cl::ZeroOrMore,
56 cl::desc("Disable OpenMP specific optimizations."), cl::Hidden,
57 cl::init(false));
58
59static cl::opt<bool> EnableParallelRegionMerging(
60 "openmp-opt-enable-merging", cl::ZeroOrMore,
61 cl::desc("Enable the OpenMP region merging optimization."), cl::Hidden,
62 cl::init(false));
63
64static cl::opt<bool>
65 DisableInternalization("openmp-opt-disable-internalization", cl::ZeroOrMore,
66 cl::desc("Disable function internalization."),
67 cl::Hidden, cl::init(false));
68
69static cl::opt<bool> PrintICVValues("openmp-print-icv-values", cl::init(false),
70 cl::Hidden);
71static cl::opt<bool> PrintOpenMPKernels("openmp-print-gpu-kernels",
72 cl::init(false), cl::Hidden);
73
74static cl::opt<bool> HideMemoryTransferLatency(
75 "openmp-hide-memory-transfer-latency",
76 cl::desc("[WIP] Tries to hide the latency of host to device memory"
77 " transfers"),
78 cl::Hidden, cl::init(false));
79
80static cl::opt<bool> DisableOpenMPOptDeglobalization(
81 "openmp-opt-disable-deglobalization", cl::ZeroOrMore,
82 cl::desc("Disable OpenMP optimizations involving deglobalization."),
83 cl::Hidden, cl::init(false));
84
85static cl::opt<bool> DisableOpenMPOptSPMDization(
86 "openmp-opt-disable-spmdization", cl::ZeroOrMore,
87 cl::desc("Disable OpenMP optimizations involving SPMD-ization."),
88 cl::Hidden, cl::init(false));
89
90static cl::opt<bool> DisableOpenMPOptFolding(
91 "openmp-opt-disable-folding", cl::ZeroOrMore,
92 cl::desc("Disable OpenMP optimizations involving folding."), cl::Hidden,
93 cl::init(false));
94
95static cl::opt<bool> DisableOpenMPOptStateMachineRewrite(
96 "openmp-opt-disable-state-machine-rewrite", cl::ZeroOrMore,
97 cl::desc("Disable OpenMP optimizations that replace the state machine."),
98 cl::Hidden, cl::init(false));
99
100static cl::opt<bool> PrintModuleAfterOptimizations(
101 "openmp-opt-print-module", cl::ZeroOrMore,
102 cl::desc("Print the current module after OpenMP optimizations."),
103 cl::Hidden, cl::init(false));
104
105static cl::opt<bool> AlwaysInlineDeviceFunctions(
106 "openmp-opt-inline-device", cl::ZeroOrMore,
107 cl::desc("Inline all applicible functions on the device."), cl::Hidden,
108 cl::init(false));
109
110static cl::opt<bool>
111 EnableVerboseRemarks("openmp-opt-verbose-remarks", cl::ZeroOrMore,
112 cl::desc("Enables more verbose remarks."), cl::Hidden,
113 cl::init(false));
114
115static cl::opt<unsigned>
116 SetFixpointIterations("openmp-opt-max-iterations", cl::Hidden,
117 cl::desc("Maximal number of attributor iterations."),
118 cl::init(256));
119
120STATISTIC(NumOpenMPRuntimeCallsDeduplicated,static llvm::Statistic NumOpenMPRuntimeCallsDeduplicated = {"openmp-opt"
, "NumOpenMPRuntimeCallsDeduplicated", "Number of OpenMP runtime calls deduplicated"
}
121 "Number of OpenMP runtime calls deduplicated")static llvm::Statistic NumOpenMPRuntimeCallsDeduplicated = {"openmp-opt"
, "NumOpenMPRuntimeCallsDeduplicated", "Number of OpenMP runtime calls deduplicated"
}
;
122STATISTIC(NumOpenMPParallelRegionsDeleted,static llvm::Statistic NumOpenMPParallelRegionsDeleted = {"openmp-opt"
, "NumOpenMPParallelRegionsDeleted", "Number of OpenMP parallel regions deleted"
}
123 "Number of OpenMP parallel regions deleted")static llvm::Statistic NumOpenMPParallelRegionsDeleted = {"openmp-opt"
, "NumOpenMPParallelRegionsDeleted", "Number of OpenMP parallel regions deleted"
}
;
124STATISTIC(NumOpenMPRuntimeFunctionsIdentified,static llvm::Statistic NumOpenMPRuntimeFunctionsIdentified = {
"openmp-opt", "NumOpenMPRuntimeFunctionsIdentified", "Number of OpenMP runtime functions identified"
}
125 "Number of OpenMP runtime functions identified")static llvm::Statistic NumOpenMPRuntimeFunctionsIdentified = {
"openmp-opt", "NumOpenMPRuntimeFunctionsIdentified", "Number of OpenMP runtime functions identified"
}
;
126STATISTIC(NumOpenMPRuntimeFunctionUsesIdentified,static llvm::Statistic NumOpenMPRuntimeFunctionUsesIdentified
= {"openmp-opt", "NumOpenMPRuntimeFunctionUsesIdentified", "Number of OpenMP runtime function uses identified"
}
127 "Number of OpenMP runtime function uses identified")static llvm::Statistic NumOpenMPRuntimeFunctionUsesIdentified
= {"openmp-opt", "NumOpenMPRuntimeFunctionUsesIdentified", "Number of OpenMP runtime function uses identified"
}
;
128STATISTIC(NumOpenMPTargetRegionKernels,static llvm::Statistic NumOpenMPTargetRegionKernels = {"openmp-opt"
, "NumOpenMPTargetRegionKernels", "Number of OpenMP target region entry points (=kernels) identified"
}
129 "Number of OpenMP target region entry points (=kernels) identified")static llvm::Statistic NumOpenMPTargetRegionKernels = {"openmp-opt"
, "NumOpenMPTargetRegionKernels", "Number of OpenMP target region entry points (=kernels) identified"
}
;
130STATISTIC(NumOpenMPTargetRegionKernelsSPMD,static llvm::Statistic NumOpenMPTargetRegionKernelsSPMD = {"openmp-opt"
, "NumOpenMPTargetRegionKernelsSPMD", "Number of OpenMP target region entry points (=kernels) executed in "
"SPMD-mode instead of generic-mode"}
131 "Number of OpenMP target region entry points (=kernels) executed in "static llvm::Statistic NumOpenMPTargetRegionKernelsSPMD = {"openmp-opt"
, "NumOpenMPTargetRegionKernelsSPMD", "Number of OpenMP target region entry points (=kernels) executed in "
"SPMD-mode instead of generic-mode"}
132 "SPMD-mode instead of generic-mode")static llvm::Statistic NumOpenMPTargetRegionKernelsSPMD = {"openmp-opt"
, "NumOpenMPTargetRegionKernelsSPMD", "Number of OpenMP target region entry points (=kernels) executed in "
"SPMD-mode instead of generic-mode"}
;
133STATISTIC(NumOpenMPTargetRegionKernelsWithoutStateMachine,static llvm::Statistic NumOpenMPTargetRegionKernelsWithoutStateMachine
= {"openmp-opt", "NumOpenMPTargetRegionKernelsWithoutStateMachine"
, "Number of OpenMP target region entry points (=kernels) executed in "
"generic-mode without a state machines"}
134 "Number of OpenMP target region entry points (=kernels) executed in "static llvm::Statistic NumOpenMPTargetRegionKernelsWithoutStateMachine
= {"openmp-opt", "NumOpenMPTargetRegionKernelsWithoutStateMachine"
, "Number of OpenMP target region entry points (=kernels) executed in "
"generic-mode without a state machines"}
135 "generic-mode without a state machines")static llvm::Statistic NumOpenMPTargetRegionKernelsWithoutStateMachine
= {"openmp-opt", "NumOpenMPTargetRegionKernelsWithoutStateMachine"
, "Number of OpenMP target region entry points (=kernels) executed in "
"generic-mode without a state machines"}
;
136STATISTIC(NumOpenMPTargetRegionKernelsCustomStateMachineWithFallback,static llvm::Statistic NumOpenMPTargetRegionKernelsCustomStateMachineWithFallback
= {"openmp-opt", "NumOpenMPTargetRegionKernelsCustomStateMachineWithFallback"
, "Number of OpenMP target region entry points (=kernels) executed in "
"generic-mode with customized state machines with fallback"}
137 "Number of OpenMP target region entry points (=kernels) executed in "static llvm::Statistic NumOpenMPTargetRegionKernelsCustomStateMachineWithFallback
= {"openmp-opt", "NumOpenMPTargetRegionKernelsCustomStateMachineWithFallback"
, "Number of OpenMP target region entry points (=kernels) executed in "
"generic-mode with customized state machines with fallback"}
138 "generic-mode with customized state machines with fallback")static llvm::Statistic NumOpenMPTargetRegionKernelsCustomStateMachineWithFallback
= {"openmp-opt", "NumOpenMPTargetRegionKernelsCustomStateMachineWithFallback"
, "Number of OpenMP target region entry points (=kernels) executed in "
"generic-mode with customized state machines with fallback"}
;
139STATISTIC(NumOpenMPTargetRegionKernelsCustomStateMachineWithoutFallback,static llvm::Statistic NumOpenMPTargetRegionKernelsCustomStateMachineWithoutFallback
= {"openmp-opt", "NumOpenMPTargetRegionKernelsCustomStateMachineWithoutFallback"
, "Number of OpenMP target region entry points (=kernels) executed in "
"generic-mode with customized state machines without fallback"
}
140 "Number of OpenMP target region entry points (=kernels) executed in "static llvm::Statistic NumOpenMPTargetRegionKernelsCustomStateMachineWithoutFallback
= {"openmp-opt", "NumOpenMPTargetRegionKernelsCustomStateMachineWithoutFallback"
, "Number of OpenMP target region entry points (=kernels) executed in "
"generic-mode with customized state machines without fallback"
}
141 "generic-mode with customized state machines without fallback")static llvm::Statistic NumOpenMPTargetRegionKernelsCustomStateMachineWithoutFallback
= {"openmp-opt", "NumOpenMPTargetRegionKernelsCustomStateMachineWithoutFallback"
, "Number of OpenMP target region entry points (=kernels) executed in "
"generic-mode with customized state machines without fallback"
}
;
142STATISTIC(static llvm::Statistic NumOpenMPParallelRegionsReplacedInGPUStateMachine
= {"openmp-opt", "NumOpenMPParallelRegionsReplacedInGPUStateMachine"
, "Number of OpenMP parallel regions replaced with ID in GPU state machines"
}
143 NumOpenMPParallelRegionsReplacedInGPUStateMachine,static llvm::Statistic NumOpenMPParallelRegionsReplacedInGPUStateMachine
= {"openmp-opt", "NumOpenMPParallelRegionsReplacedInGPUStateMachine"
, "Number of OpenMP parallel regions replaced with ID in GPU state machines"
}
144 "Number of OpenMP parallel regions replaced with ID in GPU state machines")static llvm::Statistic NumOpenMPParallelRegionsReplacedInGPUStateMachine
= {"openmp-opt", "NumOpenMPParallelRegionsReplacedInGPUStateMachine"
, "Number of OpenMP parallel regions replaced with ID in GPU state machines"
}
;
145STATISTIC(NumOpenMPParallelRegionsMerged,static llvm::Statistic NumOpenMPParallelRegionsMerged = {"openmp-opt"
, "NumOpenMPParallelRegionsMerged", "Number of OpenMP parallel regions merged"
}
146 "Number of OpenMP parallel regions merged")static llvm::Statistic NumOpenMPParallelRegionsMerged = {"openmp-opt"
, "NumOpenMPParallelRegionsMerged", "Number of OpenMP parallel regions merged"
}
;
147STATISTIC(NumBytesMovedToSharedMemory,static llvm::Statistic NumBytesMovedToSharedMemory = {"openmp-opt"
, "NumBytesMovedToSharedMemory", "Amount of memory pushed to shared memory"
}
148 "Amount of memory pushed to shared memory")static llvm::Statistic NumBytesMovedToSharedMemory = {"openmp-opt"
, "NumBytesMovedToSharedMemory", "Amount of memory pushed to shared memory"
}
;
149
150#if !defined(NDEBUG)
151static constexpr auto TAG = "[" DEBUG_TYPE"openmp-opt" "]";
152#endif
153
154namespace {
155
156enum class AddressSpace : unsigned {
157 Generic = 0,
158 Global = 1,
159 Shared = 3,
160 Constant = 4,
161 Local = 5,
162};
163
164struct AAHeapToShared;
165
166struct AAICVTracker;
167
168/// OpenMP specific information. For now, stores RFIs and ICVs also needed for
169/// Attributor runs.
170struct OMPInformationCache : public InformationCache {
171 OMPInformationCache(Module &M, AnalysisGetter &AG,
172 BumpPtrAllocator &Allocator, SetVector<Function *> &CGSCC,
173 SmallPtrSetImpl<Kernel> &Kernels)
174 : InformationCache(M, AG, Allocator, &CGSCC), OMPBuilder(M),
175 Kernels(Kernels) {
176
177 OMPBuilder.initialize();
178 initializeRuntimeFunctions();
179 initializeInternalControlVars();
180 }
181
182 /// Generic information that describes an internal control variable.
183 struct InternalControlVarInfo {
184 /// The kind, as described by InternalControlVar enum.
185 InternalControlVar Kind;
186
187 /// The name of the ICV.
188 StringRef Name;
189
190 /// Environment variable associated with this ICV.
191 StringRef EnvVarName;
192
193 /// Initial value kind.
194 ICVInitValue InitKind;
195
196 /// Initial value.
197 ConstantInt *InitValue;
198
199 /// Setter RTL function associated with this ICV.
200 RuntimeFunction Setter;
201
202 /// Getter RTL function associated with this ICV.
203 RuntimeFunction Getter;
204
205 /// RTL Function corresponding to the override clause of this ICV
206 RuntimeFunction Clause;
207 };
208
209 /// Generic information that describes a runtime function
210 struct RuntimeFunctionInfo {
211
212 /// The kind, as described by the RuntimeFunction enum.
213 RuntimeFunction Kind;
214
215 /// The name of the function.
216 StringRef Name;
217
218 /// Flag to indicate a variadic function.
219 bool IsVarArg;
220
221 /// The return type of the function.
222 Type *ReturnType;
223
224 /// The argument types of the function.
225 SmallVector<Type *, 8> ArgumentTypes;
226
227 /// The declaration if available.
228 Function *Declaration = nullptr;
229
230 /// Uses of this runtime function per function containing the use.
231 using UseVector = SmallVector<Use *, 16>;
232
233 /// Clear UsesMap for runtime function.
234 void clearUsesMap() { UsesMap.clear(); }
235
236 /// Boolean conversion that is true if the runtime function was found.
237 operator bool() const { return Declaration; }
238
239 /// Return the vector of uses in function \p F.
240 UseVector &getOrCreateUseVector(Function *F) {
241 std::shared_ptr<UseVector> &UV = UsesMap[F];
242 if (!UV)
243 UV = std::make_shared<UseVector>();
244 return *UV;
245 }
246
247 /// Return the vector of uses in function \p F or `nullptr` if there are
248 /// none.
249 const UseVector *getUseVector(Function &F) const {
250 auto I = UsesMap.find(&F);
251 if (I != UsesMap.end())
252 return I->second.get();
253 return nullptr;
254 }
255
256 /// Return how many functions contain uses of this runtime function.
257 size_t getNumFunctionsWithUses() const { return UsesMap.size(); }
258
259 /// Return the number of arguments (or the minimal number for variadic
260 /// functions).
261 size_t getNumArgs() const { return ArgumentTypes.size(); }
262
263 /// Run the callback \p CB on each use and forget the use if the result is
264 /// true. The callback will be fed the function in which the use was
265 /// encountered as second argument.
266 void foreachUse(SmallVectorImpl<Function *> &SCC,
267 function_ref<bool(Use &, Function &)> CB) {
268 for (Function *F : SCC)
269 foreachUse(CB, F);
270 }
271
272 /// Run the callback \p CB on each use within the function \p F and forget
273 /// the use if the result is true.
274 void foreachUse(function_ref<bool(Use &, Function &)> CB, Function *F) {
275 SmallVector<unsigned, 8> ToBeDeleted;
276 ToBeDeleted.clear();
277
278 unsigned Idx = 0;
279 UseVector &UV = getOrCreateUseVector(F);
280
281 for (Use *U : UV) {
282 if (CB(*U, *F))
283 ToBeDeleted.push_back(Idx);
284 ++Idx;
285 }
286
287 // Remove the to-be-deleted indices in reverse order as prior
288 // modifications will not modify the smaller indices.
289 while (!ToBeDeleted.empty()) {
290 unsigned Idx = ToBeDeleted.pop_back_val();
291 UV[Idx] = UV.back();
292 UV.pop_back();
293 }
294 }
295
296 private:
297 /// Map from functions to all uses of this runtime function contained in
298 /// them.
299 DenseMap<Function *, std::shared_ptr<UseVector>> UsesMap;
300
301 public:
302 /// Iterators for the uses of this runtime function.
303 decltype(UsesMap)::iterator begin() { return UsesMap.begin(); }
304 decltype(UsesMap)::iterator end() { return UsesMap.end(); }
305 };
306
307 /// An OpenMP-IR-Builder instance
308 OpenMPIRBuilder OMPBuilder;
309
310 /// Map from runtime function kind to the runtime function description.
311 EnumeratedArray<RuntimeFunctionInfo, RuntimeFunction,
312 RuntimeFunction::OMPRTL___last>
313 RFIs;
314
315 /// Map from function declarations/definitions to their runtime enum type.
316 DenseMap<Function *, RuntimeFunction> RuntimeFunctionIDMap;
317
318 /// Map from ICV kind to the ICV description.
319 EnumeratedArray<InternalControlVarInfo, InternalControlVar,
320 InternalControlVar::ICV___last>
321 ICVs;
322
323 /// Helper to initialize all internal control variable information for those
324 /// defined in OMPKinds.def.
325 void initializeInternalControlVars() {
326#define ICV_RT_SET(_Name, RTL) \
327 { \
328 auto &ICV = ICVs[_Name]; \
329 ICV.Setter = RTL; \
330 }
331#define ICV_RT_GET(Name, RTL) \
332 { \
333 auto &ICV = ICVs[Name]; \
334 ICV.Getter = RTL; \
335 }
336#define ICV_DATA_ENV(Enum, _Name, _EnvVarName, Init) \
337 { \
338 auto &ICV = ICVs[Enum]; \
339 ICV.Name = _Name; \
340 ICV.Kind = Enum; \
341 ICV.InitKind = Init; \
342 ICV.EnvVarName = _EnvVarName; \
343 switch (ICV.InitKind) { \
344 case ICV_IMPLEMENTATION_DEFINED: \
345 ICV.InitValue = nullptr; \
346 break; \
347 case ICV_ZERO: \
348 ICV.InitValue = ConstantInt::get( \
349 Type::getInt32Ty(OMPBuilder.Int32->getContext()), 0); \
350 break; \
351 case ICV_FALSE: \
352 ICV.InitValue = ConstantInt::getFalse(OMPBuilder.Int1->getContext()); \
353 break; \
354 case ICV_LAST: \
355 break; \
356 } \
357 }
358#include "llvm/Frontend/OpenMP/OMPKinds.def"
359 }
360
361 /// Returns true if the function declaration \p F matches the runtime
362 /// function types, that is, return type \p RTFRetType, and argument types
363 /// \p RTFArgTypes.
364 static bool declMatchesRTFTypes(Function *F, Type *RTFRetType,
365 SmallVector<Type *, 8> &RTFArgTypes) {
366 // TODO: We should output information to the user (under debug output
367 // and via remarks).
368
369 if (!F)
370 return false;
371 if (F->getReturnType() != RTFRetType)
372 return false;
373 if (F->arg_size() != RTFArgTypes.size())
374 return false;
375
376 auto *RTFTyIt = RTFArgTypes.begin();
377 for (Argument &Arg : F->args()) {
378 if (Arg.getType() != *RTFTyIt)
379 return false;
380
381 ++RTFTyIt;
382 }
383
384 return true;
385 }
386
387 // Helper to collect all uses of the declaration in the UsesMap.
388 unsigned collectUses(RuntimeFunctionInfo &RFI, bool CollectStats = true) {
389 unsigned NumUses = 0;
390 if (!RFI.Declaration)
391 return NumUses;
392 OMPBuilder.addAttributes(RFI.Kind, *RFI.Declaration);
393
394 if (CollectStats) {
395 NumOpenMPRuntimeFunctionsIdentified += 1;
396 NumOpenMPRuntimeFunctionUsesIdentified += RFI.Declaration->getNumUses();
397 }
398
399 // TODO: We directly convert uses into proper calls and unknown uses.
400 for (Use &U : RFI.Declaration->uses()) {
401 if (Instruction *UserI = dyn_cast<Instruction>(U.getUser())) {
402 if (ModuleSlice.count(UserI->getFunction())) {
403 RFI.getOrCreateUseVector(UserI->getFunction()).push_back(&U);
404 ++NumUses;
405 }
406 } else {
407 RFI.getOrCreateUseVector(nullptr).push_back(&U);
408 ++NumUses;
409 }
410 }
411 return NumUses;
412 }
413
414 // Helper function to recollect uses of a runtime function.
415 void recollectUsesForFunction(RuntimeFunction RTF) {
416 auto &RFI = RFIs[RTF];
417 RFI.clearUsesMap();
418 collectUses(RFI, /*CollectStats*/ false);
419 }
420
421 // Helper function to recollect uses of all runtime functions.
422 void recollectUses() {
423 for (int Idx = 0; Idx < RFIs.size(); ++Idx)
424 recollectUsesForFunction(static_cast<RuntimeFunction>(Idx));
425 }
426
427 /// Helper to initialize all runtime function information for those defined
428 /// in OpenMPKinds.def.
429 void initializeRuntimeFunctions() {
430 Module &M = *((*ModuleSlice.begin())->getParent());
431
432 // Helper macros for handling __VA_ARGS__ in OMP_RTL
433#define OMP_TYPE(VarName, ...) \
434 Type *VarName = OMPBuilder.VarName; \
435 (void)VarName;
436
437#define OMP_ARRAY_TYPE(VarName, ...) \
438 ArrayType *VarName##Ty = OMPBuilder.VarName##Ty; \
439 (void)VarName##Ty; \
440 PointerType *VarName##PtrTy = OMPBuilder.VarName##PtrTy; \
441 (void)VarName##PtrTy;
442
443#define OMP_FUNCTION_TYPE(VarName, ...) \
444 FunctionType *VarName = OMPBuilder.VarName; \
445 (void)VarName; \
446 PointerType *VarName##Ptr = OMPBuilder.VarName##Ptr; \
447 (void)VarName##Ptr;
448
449#define OMP_STRUCT_TYPE(VarName, ...) \
450 StructType *VarName = OMPBuilder.VarName; \
451 (void)VarName; \
452 PointerType *VarName##Ptr = OMPBuilder.VarName##Ptr; \
453 (void)VarName##Ptr;
454
455#define OMP_RTL(_Enum, _Name, _IsVarArg, _ReturnType, ...) \
456 { \
457 SmallVector<Type *, 8> ArgsTypes({__VA_ARGS__}); \
458 Function *F = M.getFunction(_Name); \
459 RTLFunctions.insert(F); \
460 if (declMatchesRTFTypes(F, OMPBuilder._ReturnType, ArgsTypes)) { \
461 RuntimeFunctionIDMap[F] = _Enum; \
462 F->removeFnAttr(Attribute::NoInline); \
463 auto &RFI = RFIs[_Enum]; \
464 RFI.Kind = _Enum; \
465 RFI.Name = _Name; \
466 RFI.IsVarArg = _IsVarArg; \
467 RFI.ReturnType = OMPBuilder._ReturnType; \
468 RFI.ArgumentTypes = std::move(ArgsTypes); \
469 RFI.Declaration = F; \
470 unsigned NumUses = collectUses(RFI); \
471 (void)NumUses; \
472 LLVM_DEBUG({ \do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("openmp-opt")) { { dbgs() << TAG << RFI.Name <<
(RFI.Declaration ? "" : " not") << " found\n"; if (RFI
.Declaration) dbgs() << TAG << "-> got " <<
NumUses << " uses in " << RFI.getNumFunctionsWithUses
() << " different functions.\n"; }; } } while (false)
473 dbgs() << TAG << RFI.Name << (RFI.Declaration ? "" : " not") \do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("openmp-opt")) { { dbgs() << TAG << RFI.Name <<
(RFI.Declaration ? "" : " not") << " found\n"; if (RFI
.Declaration) dbgs() << TAG << "-> got " <<
NumUses << " uses in " << RFI.getNumFunctionsWithUses
() << " different functions.\n"; }; } } while (false)
474 << " found\n"; \do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("openmp-opt")) { { dbgs() << TAG << RFI.Name <<
(RFI.Declaration ? "" : " not") << " found\n"; if (RFI
.Declaration) dbgs() << TAG << "-> got " <<
NumUses << " uses in " << RFI.getNumFunctionsWithUses
() << " different functions.\n"; }; } } while (false)
475 if (RFI.Declaration) \do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("openmp-opt")) { { dbgs() << TAG << RFI.Name <<
(RFI.Declaration ? "" : " not") << " found\n"; if (RFI
.Declaration) dbgs() << TAG << "-> got " <<
NumUses << " uses in " << RFI.getNumFunctionsWithUses
() << " different functions.\n"; }; } } while (false)
476 dbgs() << TAG << "-> got " << NumUses << " uses in " \do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("openmp-opt")) { { dbgs() << TAG << RFI.Name <<
(RFI.Declaration ? "" : " not") << " found\n"; if (RFI
.Declaration) dbgs() << TAG << "-> got " <<
NumUses << " uses in " << RFI.getNumFunctionsWithUses
() << " different functions.\n"; }; } } while (false)
477 << RFI.getNumFunctionsWithUses() \do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("openmp-opt")) { { dbgs() << TAG << RFI.Name <<
(RFI.Declaration ? "" : " not") << " found\n"; if (RFI
.Declaration) dbgs() << TAG << "-> got " <<
NumUses << " uses in " << RFI.getNumFunctionsWithUses
() << " different functions.\n"; }; } } while (false)
478 << " different functions.\n"; \do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("openmp-opt")) { { dbgs() << TAG << RFI.Name <<
(RFI.Declaration ? "" : " not") << " found\n"; if (RFI
.Declaration) dbgs() << TAG << "-> got " <<
NumUses << " uses in " << RFI.getNumFunctionsWithUses
() << " different functions.\n"; }; } } while (false)
479 })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("openmp-opt")) { { dbgs() << TAG << RFI.Name <<
(RFI.Declaration ? "" : " not") << " found\n"; if (RFI
.Declaration) dbgs() << TAG << "-> got " <<
NumUses << " uses in " << RFI.getNumFunctionsWithUses
() << " different functions.\n"; }; } } while (false)
; \
480 } \
481 }
482#include "llvm/Frontend/OpenMP/OMPKinds.def"
483
484 // TODO: We should attach the attributes defined in OMPKinds.def.
485 }
486
487 /// Collection of known kernels (\see Kernel) in the module.
488 SmallPtrSetImpl<Kernel> &Kernels;
489
490 /// Collection of known OpenMP runtime functions..
491 DenseSet<const Function *> RTLFunctions;
492};
493
494template <typename Ty, bool InsertInvalidates = true>
495struct BooleanStateWithSetVector : public BooleanState {
496 bool contains(const Ty &Elem) const { return Set.contains(Elem); }
497 bool insert(const Ty &Elem) {
498 if (InsertInvalidates)
499 BooleanState::indicatePessimisticFixpoint();
500 return Set.insert(Elem);
501 }
502
503 const Ty &operator[](int Idx) const { return Set[Idx]; }
504 bool operator==(const BooleanStateWithSetVector &RHS) const {
505 return BooleanState::operator==(RHS) && Set == RHS.Set;
506 }
507 bool operator!=(const BooleanStateWithSetVector &RHS) const {
508 return !(*this == RHS);
509 }
510
511 bool empty() const { return Set.empty(); }
512 size_t size() const { return Set.size(); }
513
514 /// "Clamp" this state with \p RHS.
515 BooleanStateWithSetVector &operator^=(const BooleanStateWithSetVector &RHS) {
516 BooleanState::operator^=(RHS);
517 Set.insert(RHS.Set.begin(), RHS.Set.end());
518 return *this;
519 }
520
521private:
522 /// A set to keep track of elements.
523 SetVector<Ty> Set;
524
525public:
526 typename decltype(Set)::iterator begin() { return Set.begin(); }
527 typename decltype(Set)::iterator end() { return Set.end(); }
528 typename decltype(Set)::const_iterator begin() const { return Set.begin(); }
529 typename decltype(Set)::const_iterator end() const { return Set.end(); }
530};
531
532template <typename Ty, bool InsertInvalidates = true>
533using BooleanStateWithPtrSetVector =
534 BooleanStateWithSetVector<Ty *, InsertInvalidates>;
535
536struct KernelInfoState : AbstractState {
537 /// Flag to track if we reached a fixpoint.
538 bool IsAtFixpoint = false;
539
540 /// The parallel regions (identified by the outlined parallel functions) that
541 /// can be reached from the associated function.
542 BooleanStateWithPtrSetVector<Function, /* InsertInvalidates */ false>
543 ReachedKnownParallelRegions;
544
545 /// State to track what parallel region we might reach.
546 BooleanStateWithPtrSetVector<CallBase> ReachedUnknownParallelRegions;
547
548 /// State to track if we are in SPMD-mode, assumed or know, and why we decided
549 /// we cannot be. If it is assumed, then RequiresFullRuntime should also be
550 /// false.
551 BooleanStateWithPtrSetVector<Instruction, false> SPMDCompatibilityTracker;
552
553 /// The __kmpc_target_init call in this kernel, if any. If we find more than
554 /// one we abort as the kernel is malformed.
555 CallBase *KernelInitCB = nullptr;
556
557 /// The __kmpc_target_deinit call in this kernel, if any. If we find more than
558 /// one we abort as the kernel is malformed.
559 CallBase *KernelDeinitCB = nullptr;
560
561 /// Flag to indicate if the associated function is a kernel entry.
562 bool IsKernelEntry = false;
563
564 /// State to track what kernel entries can reach the associated function.
565 BooleanStateWithPtrSetVector<Function, false> ReachingKernelEntries;
566
567 /// State to indicate if we can track parallel level of the associated
568 /// function. We will give up tracking if we encounter unknown caller or the
569 /// caller is __kmpc_parallel_51.
570 BooleanStateWithSetVector<uint8_t> ParallelLevels;
571
572 /// Abstract State interface
573 ///{
574
575 KernelInfoState() {}
576 KernelInfoState(bool BestState) {
577 if (!BestState)
578 indicatePessimisticFixpoint();
579 }
580
581 /// See AbstractState::isValidState(...)
582 bool isValidState() const override { return true; }
583
584 /// See AbstractState::isAtFixpoint(...)
585 bool isAtFixpoint() const override { return IsAtFixpoint; }
586
587 /// See AbstractState::indicatePessimisticFixpoint(...)
588 ChangeStatus indicatePessimisticFixpoint() override {
589 IsAtFixpoint = true;
590 ReachingKernelEntries.indicatePessimisticFixpoint();
591 SPMDCompatibilityTracker.indicatePessimisticFixpoint();
592 ReachedKnownParallelRegions.indicatePessimisticFixpoint();
593 ReachedUnknownParallelRegions.indicatePessimisticFixpoint();
594 return ChangeStatus::CHANGED;
595 }
596
597 /// See AbstractState::indicateOptimisticFixpoint(...)
598 ChangeStatus indicateOptimisticFixpoint() override {
599 IsAtFixpoint = true;
600 ReachingKernelEntries.indicateOptimisticFixpoint();
601 SPMDCompatibilityTracker.indicateOptimisticFixpoint();
602 ReachedKnownParallelRegions.indicateOptimisticFixpoint();
603 ReachedUnknownParallelRegions.indicateOptimisticFixpoint();
604 return ChangeStatus::UNCHANGED;
605 }
606
607 /// Return the assumed state
608 KernelInfoState &getAssumed() { return *this; }
609 const KernelInfoState &getAssumed() const { return *this; }
610
611 bool operator==(const KernelInfoState &RHS) const {
612 if (SPMDCompatibilityTracker != RHS.SPMDCompatibilityTracker)
613 return false;
614 if (ReachedKnownParallelRegions != RHS.ReachedKnownParallelRegions)
615 return false;
616 if (ReachedUnknownParallelRegions != RHS.ReachedUnknownParallelRegions)
617 return false;
618 if (ReachingKernelEntries != RHS.ReachingKernelEntries)
619 return false;
620 return true;
621 }
622
623 /// Returns true if this kernel contains any OpenMP parallel regions.
624 bool mayContainParallelRegion() {
625 return !ReachedKnownParallelRegions.empty() ||
626 !ReachedUnknownParallelRegions.empty();
627 }
628
629 /// Return empty set as the best state of potential values.
630 static KernelInfoState getBestState() { return KernelInfoState(true); }
631
632 static KernelInfoState getBestState(KernelInfoState &KIS) {
633 return getBestState();
634 }
635
636 /// Return full set as the worst state of potential values.
637 static KernelInfoState getWorstState() { return KernelInfoState(false); }
638
639 /// "Clamp" this state with \p KIS.
640 KernelInfoState operator^=(const KernelInfoState &KIS) {
641 // Do not merge two different _init and _deinit call sites.
642 if (KIS.KernelInitCB) {
643 if (KernelInitCB && KernelInitCB != KIS.KernelInitCB)
644 llvm_unreachable("Kernel that calls another kernel violates OpenMP-Opt "::llvm::llvm_unreachable_internal("Kernel that calls another kernel violates OpenMP-Opt "
"assumptions.", "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp"
, 645)
645 "assumptions.")::llvm::llvm_unreachable_internal("Kernel that calls another kernel violates OpenMP-Opt "
"assumptions.", "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp"
, 645)
;
646 KernelInitCB = KIS.KernelInitCB;
647 }
648 if (KIS.KernelDeinitCB) {
649 if (KernelDeinitCB && KernelDeinitCB != KIS.KernelDeinitCB)
650 llvm_unreachable("Kernel that calls another kernel violates OpenMP-Opt "::llvm::llvm_unreachable_internal("Kernel that calls another kernel violates OpenMP-Opt "
"assumptions.", "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp"
, 651)
651 "assumptions.")::llvm::llvm_unreachable_internal("Kernel that calls another kernel violates OpenMP-Opt "
"assumptions.", "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp"
, 651)
;
652 KernelDeinitCB = KIS.KernelDeinitCB;
653 }
654 SPMDCompatibilityTracker ^= KIS.SPMDCompatibilityTracker;
655 ReachedKnownParallelRegions ^= KIS.ReachedKnownParallelRegions;
656 ReachedUnknownParallelRegions ^= KIS.ReachedUnknownParallelRegions;
657 return *this;
658 }
659
660 KernelInfoState operator&=(const KernelInfoState &KIS) {
661 return (*this ^= KIS);
662 }
663
664 ///}
665};
666
667/// Used to map the values physically (in the IR) stored in an offload
668/// array, to a vector in memory.
669struct OffloadArray {
670 /// Physical array (in the IR).
671 AllocaInst *Array = nullptr;
672 /// Mapped values.
673 SmallVector<Value *, 8> StoredValues;
674 /// Last stores made in the offload array.
675 SmallVector<StoreInst *, 8> LastAccesses;
676
677 OffloadArray() = default;
678
679 /// Initializes the OffloadArray with the values stored in \p Array before
680 /// instruction \p Before is reached. Returns false if the initialization
681 /// fails.
682 /// This MUST be used immediately after the construction of the object.
683 bool initialize(AllocaInst &Array, Instruction &Before) {
684 if (!Array.getAllocatedType()->isArrayTy())
685 return false;
686
687 if (!getValues(Array, Before))
688 return false;
689
690 this->Array = &Array;
691 return true;
692 }
693
694 static const unsigned DeviceIDArgNum = 1;
695 static const unsigned BasePtrsArgNum = 3;
696 static const unsigned PtrsArgNum = 4;
697 static const unsigned SizesArgNum = 5;
698
699private:
700 /// Traverses the BasicBlock where \p Array is, collecting the stores made to
701 /// \p Array, leaving StoredValues with the values stored before the
702 /// instruction \p Before is reached.
703 bool getValues(AllocaInst &Array, Instruction &Before) {
704 // Initialize container.
705 const uint64_t NumValues = Array.getAllocatedType()->getArrayNumElements();
706 StoredValues.assign(NumValues, nullptr);
707 LastAccesses.assign(NumValues, nullptr);
708
709 // TODO: This assumes the instruction \p Before is in the same
710 // BasicBlock as Array. Make it general, for any control flow graph.
711 BasicBlock *BB = Array.getParent();
712 if (BB != Before.getParent())
713 return false;
714
715 const DataLayout &DL = Array.getModule()->getDataLayout();
716 const unsigned int PointerSize = DL.getPointerSize();
717
718 for (Instruction &I : *BB) {
719 if (&I == &Before)
720 break;
721
722 if (!isa<StoreInst>(&I))
723 continue;
724
725 auto *S = cast<StoreInst>(&I);
726 int64_t Offset = -1;
727 auto *Dst =
728 GetPointerBaseWithConstantOffset(S->getPointerOperand(), Offset, DL);
729 if (Dst == &Array) {
730 int64_t Idx = Offset / PointerSize;
731 StoredValues[Idx] = getUnderlyingObject(S->getValueOperand());
732 LastAccesses[Idx] = S;
733 }
734 }
735
736 return isFilled();
737 }
738
739 /// Returns true if all values in StoredValues and
740 /// LastAccesses are not nullptrs.
741 bool isFilled() {
742 const unsigned NumValues = StoredValues.size();
743 for (unsigned I = 0; I < NumValues; ++I) {
744 if (!StoredValues[I] || !LastAccesses[I])
745 return false;
746 }
747
748 return true;
749 }
750};
751
752struct OpenMPOpt {
753
754 using OptimizationRemarkGetter =
755 function_ref<OptimizationRemarkEmitter &(Function *)>;
756
757 OpenMPOpt(SmallVectorImpl<Function *> &SCC, CallGraphUpdater &CGUpdater,
758 OptimizationRemarkGetter OREGetter,
759 OMPInformationCache &OMPInfoCache, Attributor &A)
760 : M(*(*SCC.begin())->getParent()), SCC(SCC), CGUpdater(CGUpdater),
761 OREGetter(OREGetter), OMPInfoCache(OMPInfoCache), A(A) {}
762
763 /// Check if any remarks are enabled for openmp-opt
764 bool remarksEnabled() {
765 auto &Ctx = M.getContext();
766 return Ctx.getDiagHandlerPtr()->isAnyRemarkEnabled(DEBUG_TYPE"openmp-opt");
767 }
768
769 /// Run all OpenMP optimizations on the underlying SCC/ModuleSlice.
770 bool run(bool IsModulePass) {
771 if (SCC.empty())
772 return false;
773
774 bool Changed = false;
775
776 LLVM_DEBUG(dbgs() << TAG << "Run on SCC with " << SCC.size()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("openmp-opt")) { dbgs() << TAG << "Run on SCC with "
<< SCC.size() << " functions in a slice with " <<
OMPInfoCache.ModuleSlice.size() << " functions\n"; } }
while (false)
777 << " functions in a slice with "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("openmp-opt")) { dbgs() << TAG << "Run on SCC with "
<< SCC.size() << " functions in a slice with " <<
OMPInfoCache.ModuleSlice.size() << " functions\n"; } }
while (false)
778 << OMPInfoCache.ModuleSlice.size() << " functions\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("openmp-opt")) { dbgs() << TAG << "Run on SCC with "
<< SCC.size() << " functions in a slice with " <<
OMPInfoCache.ModuleSlice.size() << " functions\n"; } }
while (false)
;
779
780 if (IsModulePass) {
781 Changed |= runAttributor(IsModulePass);
782
783 // Recollect uses, in case Attributor deleted any.
784 OMPInfoCache.recollectUses();
785
786 // TODO: This should be folded into buildCustomStateMachine.
787 Changed |= rewriteDeviceCodeStateMachine();
788
789 if (remarksEnabled())
790 analysisGlobalization();
791 } else {
792 if (PrintICVValues)
793 printICVs();
794 if (PrintOpenMPKernels)
795 printKernels();
796
797 Changed |= runAttributor(IsModulePass);
798
799 // Recollect uses, in case Attributor deleted any.
800 OMPInfoCache.recollectUses();
801
802 Changed |= deleteParallelRegions();
803
804 if (HideMemoryTransferLatency)
805 Changed |= hideMemTransfersLatency();
806 Changed |= deduplicateRuntimeCalls();
807 if (EnableParallelRegionMerging) {
808 if (mergeParallelRegions()) {
809 deduplicateRuntimeCalls();
810 Changed = true;
811 }
812 }
813 }
814
815 return Changed;
816 }
817
818 /// Print initial ICV values for testing.
819 /// FIXME: This should be done from the Attributor once it is added.
820 void printICVs() const {
821 InternalControlVar ICVs[] = {ICV_nthreads, ICV_active_levels, ICV_cancel,
822 ICV_proc_bind};
823
824 for (Function *F : OMPInfoCache.ModuleSlice) {
825 for (auto ICV : ICVs) {
826 auto ICVInfo = OMPInfoCache.ICVs[ICV];
827 auto Remark = [&](OptimizationRemarkAnalysis ORA) {
828 return ORA << "OpenMP ICV " << ore::NV("OpenMPICV", ICVInfo.Name)
829 << " Value: "
830 << (ICVInfo.InitValue
831 ? toString(ICVInfo.InitValue->getValue(), 10, true)
832 : "IMPLEMENTATION_DEFINED");
833 };
834
835 emitRemark<OptimizationRemarkAnalysis>(F, "OpenMPICVTracker", Remark);
836 }
837 }
838 }
839
840 /// Print OpenMP GPU kernels for testing.
841 void printKernels() const {
842 for (Function *F : SCC) {
843 if (!OMPInfoCache.Kernels.count(F))
844 continue;
845
846 auto Remark = [&](OptimizationRemarkAnalysis ORA) {
847 return ORA << "OpenMP GPU kernel "
848 << ore::NV("OpenMPGPUKernel", F->getName()) << "\n";
849 };
850
851 emitRemark<OptimizationRemarkAnalysis>(F, "OpenMPGPU", Remark);
852 }
853 }
854
855 /// Return the call if \p U is a callee use in a regular call. If \p RFI is
856 /// given it has to be the callee or a nullptr is returned.
857 static CallInst *getCallIfRegularCall(
858 Use &U, OMPInformationCache::RuntimeFunctionInfo *RFI = nullptr) {
859 CallInst *CI = dyn_cast<CallInst>(U.getUser());
860 if (CI && CI->isCallee(&U) && !CI->hasOperandBundles() &&
861 (!RFI ||
862 (RFI->Declaration && CI->getCalledFunction() == RFI->Declaration)))
863 return CI;
864 return nullptr;
865 }
866
867 /// Return the call if \p V is a regular call. If \p RFI is given it has to be
868 /// the callee or a nullptr is returned.
869 static CallInst *getCallIfRegularCall(
870 Value &V, OMPInformationCache::RuntimeFunctionInfo *RFI = nullptr) {
871 CallInst *CI = dyn_cast<CallInst>(&V);
872 if (CI && !CI->hasOperandBundles() &&
873 (!RFI ||
874 (RFI->Declaration && CI->getCalledFunction() == RFI->Declaration)))
875 return CI;
876 return nullptr;
877 }
878
879private:
880 /// Merge parallel regions when it is safe.
881 bool mergeParallelRegions() {
882 const unsigned CallbackCalleeOperand = 2;
883 const unsigned CallbackFirstArgOperand = 3;
884 using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
885
886 // Check if there are any __kmpc_fork_call calls to merge.
887 OMPInformationCache::RuntimeFunctionInfo &RFI =
888 OMPInfoCache.RFIs[OMPRTL___kmpc_fork_call];
889
890 if (!RFI.Declaration)
891 return false;
892
893 // Unmergable calls that prevent merging a parallel region.
894 OMPInformationCache::RuntimeFunctionInfo UnmergableCallsInfo[] = {
895 OMPInfoCache.RFIs[OMPRTL___kmpc_push_proc_bind],
896 OMPInfoCache.RFIs[OMPRTL___kmpc_push_num_threads],
897 };
898
899 bool Changed = false;
900 LoopInfo *LI = nullptr;
901 DominatorTree *DT = nullptr;
902
903 SmallDenseMap<BasicBlock *, SmallPtrSet<Instruction *, 4>> BB2PRMap;
904
905 BasicBlock *StartBB = nullptr, *EndBB = nullptr;
906 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
907 BasicBlock &ContinuationIP) {
908 BasicBlock *CGStartBB = CodeGenIP.getBlock();
909 BasicBlock *CGEndBB =
910 SplitBlock(CGStartBB, &*CodeGenIP.getPoint(), DT, LI);
911 assert(StartBB != nullptr && "StartBB should not be null")(static_cast <bool> (StartBB != nullptr && "StartBB should not be null"
) ? void (0) : __assert_fail ("StartBB != nullptr && \"StartBB should not be null\""
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp"
, 911, __extension__ __PRETTY_FUNCTION__))
;
912 CGStartBB->getTerminator()->setSuccessor(0, StartBB);
913 assert(EndBB != nullptr && "EndBB should not be null")(static_cast <bool> (EndBB != nullptr && "EndBB should not be null"
) ? void (0) : __assert_fail ("EndBB != nullptr && \"EndBB should not be null\""
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp"
, 913, __extension__ __PRETTY_FUNCTION__))
;
914 EndBB->getTerminator()->setSuccessor(0, CGEndBB);
915 };
916
917 auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &,
918 Value &Inner, Value *&ReplacementValue) -> InsertPointTy {
919 ReplacementValue = &Inner;
920 return CodeGenIP;
921 };
922
923 auto FiniCB = [&](InsertPointTy CodeGenIP) {};
924
925 /// Create a sequential execution region within a merged parallel region,
926 /// encapsulated in a master construct with a barrier for synchronization.
927 auto CreateSequentialRegion = [&](Function *OuterFn,
928 BasicBlock *OuterPredBB,
929 Instruction *SeqStartI,
930 Instruction *SeqEndI) {
931 // Isolate the instructions of the sequential region to a separate
932 // block.
933 BasicBlock *ParentBB = SeqStartI->getParent();
934 BasicBlock *SeqEndBB =
935 SplitBlock(ParentBB, SeqEndI->getNextNode(), DT, LI);
936 BasicBlock *SeqAfterBB =
937 SplitBlock(SeqEndBB, &*SeqEndBB->getFirstInsertionPt(), DT, LI);
938 BasicBlock *SeqStartBB =
939 SplitBlock(ParentBB, SeqStartI, DT, LI, nullptr, "seq.par.merged");
940
941 assert(ParentBB->getUniqueSuccessor() == SeqStartBB &&(static_cast <bool> (ParentBB->getUniqueSuccessor() ==
SeqStartBB && "Expected a different CFG") ? void (0)
: __assert_fail ("ParentBB->getUniqueSuccessor() == SeqStartBB && \"Expected a different CFG\""
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp"
, 942, __extension__ __PRETTY_FUNCTION__))
942 "Expected a different CFG")(static_cast <bool> (ParentBB->getUniqueSuccessor() ==
SeqStartBB && "Expected a different CFG") ? void (0)
: __assert_fail ("ParentBB->getUniqueSuccessor() == SeqStartBB && \"Expected a different CFG\""
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp"
, 942, __extension__ __PRETTY_FUNCTION__))
;
943 const DebugLoc DL = ParentBB->getTerminator()->getDebugLoc();
944 ParentBB->getTerminator()->eraseFromParent();
945
946 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
947 BasicBlock &ContinuationIP) {
948 BasicBlock *CGStartBB = CodeGenIP.getBlock();
949 BasicBlock *CGEndBB =
950 SplitBlock(CGStartBB, &*CodeGenIP.getPoint(), DT, LI);
951 assert(SeqStartBB != nullptr && "SeqStartBB should not be null")(static_cast <bool> (SeqStartBB != nullptr && "SeqStartBB should not be null"
) ? void (0) : __assert_fail ("SeqStartBB != nullptr && \"SeqStartBB should not be null\""
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp"
, 951, __extension__ __PRETTY_FUNCTION__))
;
952 CGStartBB->getTerminator()->setSuccessor(0, SeqStartBB);
953 assert(SeqEndBB != nullptr && "SeqEndBB should not be null")(static_cast <bool> (SeqEndBB != nullptr && "SeqEndBB should not be null"
) ? void (0) : __assert_fail ("SeqEndBB != nullptr && \"SeqEndBB should not be null\""
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp"
, 953, __extension__ __PRETTY_FUNCTION__))
;
954 SeqEndBB->getTerminator()->setSuccessor(0, CGEndBB);
955 };
956 auto FiniCB = [&](InsertPointTy CodeGenIP) {};
957
958 // Find outputs from the sequential region to outside users and
959 // broadcast their values to them.
960 for (Instruction &I : *SeqStartBB) {
961 SmallPtrSet<Instruction *, 4> OutsideUsers;
962 for (User *Usr : I.users()) {
963 Instruction &UsrI = *cast<Instruction>(Usr);
964 // Ignore outputs to LT intrinsics, code extraction for the merged
965 // parallel region will fix them.
966 if (UsrI.isLifetimeStartOrEnd())
967 continue;
968
969 if (UsrI.getParent() != SeqStartBB)
970 OutsideUsers.insert(&UsrI);
971 }
972
973 if (OutsideUsers.empty())
974 continue;
975
976 // Emit an alloca in the outer region to store the broadcasted
977 // value.
978 const DataLayout &DL = M.getDataLayout();
979 AllocaInst *AllocaI = new AllocaInst(
980 I.getType(), DL.getAllocaAddrSpace(), nullptr,
981 I.getName() + ".seq.output.alloc", &OuterFn->front().front());
982
983 // Emit a store instruction in the sequential BB to update the
984 // value.
985 new StoreInst(&I, AllocaI, SeqStartBB->getTerminator());
986
987 // Emit a load instruction and replace the use of the output value
988 // with it.
989 for (Instruction *UsrI : OutsideUsers) {
990 LoadInst *LoadI = new LoadInst(
991 I.getType(), AllocaI, I.getName() + ".seq.output.load", UsrI);
992 UsrI->replaceUsesOfWith(&I, LoadI);
993 }
994 }
995
996 OpenMPIRBuilder::LocationDescription Loc(
997 InsertPointTy(ParentBB, ParentBB->end()), DL);
998 InsertPointTy SeqAfterIP =
999 OMPInfoCache.OMPBuilder.createMaster(Loc, BodyGenCB, FiniCB);
1000
1001 OMPInfoCache.OMPBuilder.createBarrier(SeqAfterIP, OMPD_parallel);
1002
1003 BranchInst::Create(SeqAfterBB, SeqAfterIP.getBlock());
1004
1005 LLVM_DEBUG(dbgs() << TAG << "After sequential inlining " << *OuterFndo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("openmp-opt")) { dbgs() << TAG << "After sequential inlining "
<< *OuterFn << "\n"; } } while (false)
1006 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("openmp-opt")) { dbgs() << TAG << "After sequential inlining "
<< *OuterFn << "\n"; } } while (false)
;
1007 };
1008
1009 // Helper to merge the __kmpc_fork_call calls in MergableCIs. They are all
1010 // contained in BB and only separated by instructions that can be
1011 // redundantly executed in parallel. The block BB is split before the first
1012 // call (in MergableCIs) and after the last so the entire region we merge
1013 // into a single parallel region is contained in a single basic block
1014 // without any other instructions. We use the OpenMPIRBuilder to outline
1015 // that block and call the resulting function via __kmpc_fork_call.
1016 auto Merge = [&](SmallVectorImpl<CallInst *> &MergableCIs, BasicBlock *BB) {
1017 // TODO: Change the interface to allow single CIs expanded, e.g, to
1018 // include an outer loop.
1019 assert(MergableCIs.size() > 1 && "Assumed multiple mergable CIs")(static_cast <bool> (MergableCIs.size() > 1 &&
"Assumed multiple mergable CIs") ? void (0) : __assert_fail (
"MergableCIs.size() > 1 && \"Assumed multiple mergable CIs\""
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp"
, 1019, __extension__ __PRETTY_FUNCTION__))
;
1020
1021 auto Remark = [&](OptimizationRemark OR) {
1022 OR << "Parallel region merged with parallel region"
1023 << (MergableCIs.size() > 2 ? "s" : "") << " at ";
1024 for (auto *CI : llvm::drop_begin(MergableCIs)) {
1025 OR << ore::NV("OpenMPParallelMerge", CI->getDebugLoc());
1026 if (CI != MergableCIs.back())
1027 OR << ", ";
1028 }
1029 return OR << ".";
1030 };
1031
1032 emitRemark<OptimizationRemark>(MergableCIs.front(), "OMP150", Remark);
1033
1034 Function *OriginalFn = BB->getParent();
1035 LLVM_DEBUG(dbgs() << TAG << "Merge " << MergableCIs.size()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("openmp-opt")) { dbgs() << TAG << "Merge " <<
MergableCIs.size() << " parallel regions in " <<
OriginalFn->getName() << "\n"; } } while (false)
1036 << " parallel regions in " << OriginalFn->getName()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("openmp-opt")) { dbgs() << TAG << "Merge " <<
MergableCIs.size() << " parallel regions in " <<
OriginalFn->getName() << "\n"; } } while (false)
1037 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("openmp-opt")) { dbgs() << TAG << "Merge " <<
MergableCIs.size() << " parallel regions in " <<
OriginalFn->getName() << "\n"; } } while (false)
;
1038
1039 // Isolate the calls to merge in a separate block.
1040 EndBB = SplitBlock(BB, MergableCIs.back()->getNextNode(), DT, LI);
1041 BasicBlock *AfterBB =
1042 SplitBlock(EndBB, &*EndBB->getFirstInsertionPt(), DT, LI);
1043 StartBB = SplitBlock(BB, MergableCIs.front(), DT, LI, nullptr,
1044 "omp.par.merged");
1045
1046 assert(BB->getUniqueSuccessor() == StartBB && "Expected a different CFG")(static_cast <bool> (BB->getUniqueSuccessor() == StartBB
&& "Expected a different CFG") ? void (0) : __assert_fail
("BB->getUniqueSuccessor() == StartBB && \"Expected a different CFG\""
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp"
, 1046, __extension__ __PRETTY_FUNCTION__))
;
1047 const DebugLoc DL = BB->getTerminator()->getDebugLoc();
1048 BB->getTerminator()->eraseFromParent();
1049
1050 // Create sequential regions for sequential instructions that are
1051 // in-between mergable parallel regions.
1052 for (auto *It = MergableCIs.begin(), *End = MergableCIs.end() - 1;
1053 It != End; ++It) {
1054 Instruction *ForkCI = *It;
1055 Instruction *NextForkCI = *(It + 1);
1056
1057 // Continue if there are not in-between instructions.
1058 if (ForkCI->getNextNode() == NextForkCI)
1059 continue;
1060
1061 CreateSequentialRegion(OriginalFn, BB, ForkCI->getNextNode(),
1062 NextForkCI->getPrevNode());
1063 }
1064
1065 OpenMPIRBuilder::LocationDescription Loc(InsertPointTy(BB, BB->end()),
1066 DL);
1067 IRBuilder<>::InsertPoint AllocaIP(
1068 &OriginalFn->getEntryBlock(),
1069 OriginalFn->getEntryBlock().getFirstInsertionPt());
1070 // Create the merged parallel region with default proc binding, to
1071 // avoid overriding binding settings, and without explicit cancellation.
1072 InsertPointTy AfterIP = OMPInfoCache.OMPBuilder.createParallel(
1073 Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, nullptr, nullptr,
1074 OMP_PROC_BIND_default, /* IsCancellable */ false);
1075 BranchInst::Create(AfterBB, AfterIP.getBlock());
1076
1077 // Perform the actual outlining.
1078 OMPInfoCache.OMPBuilder.finalize(OriginalFn,
1079 /* AllowExtractorSinking */ true);
1080
1081 Function *OutlinedFn = MergableCIs.front()->getCaller();
1082
1083 // Replace the __kmpc_fork_call calls with direct calls to the outlined
1084 // callbacks.
1085 SmallVector<Value *, 8> Args;
1086 for (auto *CI : MergableCIs) {
1087 Value *Callee =
1088 CI->getArgOperand(CallbackCalleeOperand)->stripPointerCasts();
1089 FunctionType *FT =
1090 cast<FunctionType>(Callee->getType()->getPointerElementType());
1091 Args.clear();
1092 Args.push_back(OutlinedFn->getArg(0));
1093 Args.push_back(OutlinedFn->getArg(1));
1094 for (unsigned U = CallbackFirstArgOperand, E = CI->arg_size(); U < E;
1095 ++U)
1096 Args.push_back(CI->getArgOperand(U));
1097
1098 CallInst *NewCI = CallInst::Create(FT, Callee, Args, "", CI);
1099 if (CI->getDebugLoc())
1100 NewCI->setDebugLoc(CI->getDebugLoc());
1101
1102 // Forward parameter attributes from the callback to the callee.
1103 for (unsigned U = CallbackFirstArgOperand, E = CI->arg_size(); U < E;
1104 ++U)
1105 for (const Attribute &A : CI->getAttributes().getParamAttrs(U))
1106 NewCI->addParamAttr(
1107 U - (CallbackFirstArgOperand - CallbackCalleeOperand), A);
1108
1109 // Emit an explicit barrier to replace the implicit fork-join barrier.
1110 if (CI != MergableCIs.back()) {
1111 // TODO: Remove barrier if the merged parallel region includes the
1112 // 'nowait' clause.
1113 OMPInfoCache.OMPBuilder.createBarrier(
1114 InsertPointTy(NewCI->getParent(),
1115 NewCI->getNextNode()->getIterator()),
1116 OMPD_parallel);
1117 }
1118
1119 CI->eraseFromParent();
1120 }
1121
1122 assert(OutlinedFn != OriginalFn && "Outlining failed")(static_cast <bool> (OutlinedFn != OriginalFn &&
"Outlining failed") ? void (0) : __assert_fail ("OutlinedFn != OriginalFn && \"Outlining failed\""
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp"
, 1122, __extension__ __PRETTY_FUNCTION__))
;
1123 CGUpdater.registerOutlinedFunction(*OriginalFn, *OutlinedFn);
1124 CGUpdater.reanalyzeFunction(*OriginalFn);
1125
1126 NumOpenMPParallelRegionsMerged += MergableCIs.size();
1127
1128 return true;
1129 };
1130
1131 // Helper function that identifes sequences of
1132 // __kmpc_fork_call uses in a basic block.
1133 auto DetectPRsCB = [&](Use &U, Function &F) {
1134 CallInst *CI = getCallIfRegularCall(U, &RFI);
1135 BB2PRMap[CI->getParent()].insert(CI);
1136
1137 return false;
1138 };
1139
1140 BB2PRMap.clear();
1141 RFI.foreachUse(SCC, DetectPRsCB);
1142 SmallVector<SmallVector<CallInst *, 4>, 4> MergableCIsVector;
1143 // Find mergable parallel regions within a basic block that are
1144 // safe to merge, that is any in-between instructions can safely
1145 // execute in parallel after merging.
1146 // TODO: support merging across basic-blocks.
1147 for (auto &It : BB2PRMap) {
1148 auto &CIs = It.getSecond();
1149 if (CIs.size() < 2)
1150 continue;
1151
1152 BasicBlock *BB = It.getFirst();
1153 SmallVector<CallInst *, 4> MergableCIs;
1154
1155 /// Returns true if the instruction is mergable, false otherwise.
1156 /// A terminator instruction is unmergable by definition since merging
1157 /// works within a BB. Instructions before the mergable region are
1158 /// mergable if they are not calls to OpenMP runtime functions that may
1159 /// set different execution parameters for subsequent parallel regions.
1160 /// Instructions in-between parallel regions are mergable if they are not
1161 /// calls to any non-intrinsic function since that may call a non-mergable
1162 /// OpenMP runtime function.
1163 auto IsMergable = [&](Instruction &I, bool IsBeforeMergableRegion) {
1164 // We do not merge across BBs, hence return false (unmergable) if the
1165 // instruction is a terminator.
1166 if (I.isTerminator())
1167 return false;
1168
1169 if (!isa<CallInst>(&I))
1170 return true;
1171
1172 CallInst *CI = cast<CallInst>(&I);
1173 if (IsBeforeMergableRegion) {
1174 Function *CalledFunction = CI->getCalledFunction();
1175 if (!CalledFunction)
1176 return false;
1177 // Return false (unmergable) if the call before the parallel
1178 // region calls an explicit affinity (proc_bind) or number of
1179 // threads (num_threads) compiler-generated function. Those settings
1180 // may be incompatible with following parallel regions.
1181 // TODO: ICV tracking to detect compatibility.
1182 for (const auto &RFI : UnmergableCallsInfo) {
1183 if (CalledFunction == RFI.Declaration)
1184 return false;
1185 }
1186 } else {
1187 // Return false (unmergable) if there is a call instruction
1188 // in-between parallel regions when it is not an intrinsic. It
1189 // may call an unmergable OpenMP runtime function in its callpath.
1190 // TODO: Keep track of possible OpenMP calls in the callpath.
1191 if (!isa<IntrinsicInst>(CI))
1192 return false;
1193 }
1194
1195 return true;
1196 };
1197 // Find maximal number of parallel region CIs that are safe to merge.
1198 for (auto It = BB->begin(), End = BB->end(); It != End;) {
1199 Instruction &I = *It;
1200 ++It;
1201
1202 if (CIs.count(&I)) {
1203 MergableCIs.push_back(cast<CallInst>(&I));
1204 continue;
1205 }
1206
1207 // Continue expanding if the instruction is mergable.
1208 if (IsMergable(I, MergableCIs.empty()))
1209 continue;
1210
1211 // Forward the instruction iterator to skip the next parallel region
1212 // since there is an unmergable instruction which can affect it.
1213 for (; It != End; ++It) {
1214 Instruction &SkipI = *It;
1215 if (CIs.count(&SkipI)) {
1216 LLVM_DEBUG(dbgs() << TAG << "Skip parallel region " << SkipIdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("openmp-opt")) { dbgs() << TAG << "Skip parallel region "
<< SkipI << " due to " << I << "\n";
} } while (false)
1217 << " due to " << I << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("openmp-opt")) { dbgs() << TAG << "Skip parallel region "
<< SkipI << " due to " << I << "\n";
} } while (false)
;
1218 ++It;
1219 break;
1220 }
1221 }
1222
1223 // Store mergable regions found.
1224 if (MergableCIs.size() > 1) {
1225 MergableCIsVector.push_back(MergableCIs);
1226 LLVM_DEBUG(dbgs() << TAG << "Found " << MergableCIs.size()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("openmp-opt")) { dbgs() << TAG << "Found " <<
MergableCIs.size() << " parallel regions in block " <<
BB->getName() << " of function " << BB->getParent
()->getName() << "\n";; } } while (false)
1227 << " parallel regions in block " << BB->getName()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("openmp-opt")) { dbgs() << TAG << "Found " <<
MergableCIs.size() << " parallel regions in block " <<
BB->getName() << " of function " << BB->getParent
()->getName() << "\n";; } } while (false)
1228 << " of function " << BB->getParent()->getName()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("openmp-opt")) { dbgs() << TAG << "Found " <<
MergableCIs.size() << " parallel regions in block " <<
BB->getName() << " of function " << BB->getParent
()->getName() << "\n";; } } while (false)
1229 << "\n";)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("openmp-opt")) { dbgs() << TAG << "Found " <<
MergableCIs.size() << " parallel regions in block " <<
BB->getName() << " of function " << BB->getParent
()->getName() << "\n";; } } while (false)
;
1230 }
1231
1232 MergableCIs.clear();
1233 }
1234
1235 if (!MergableCIsVector.empty()) {
1236 Changed = true;
1237
1238 for (auto &MergableCIs : MergableCIsVector)
1239 Merge(MergableCIs, BB);
1240 MergableCIsVector.clear();
1241 }
1242 }
1243
1244 if (Changed) {
1245 /// Re-collect use for fork calls, emitted barrier calls, and
1246 /// any emitted master/end_master calls.
1247 OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_fork_call);
1248 OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_barrier);
1249 OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_master);
1250 OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_end_master);
1251 }
1252
1253 return Changed;
1254 }
1255
1256 /// Try to delete parallel regions if possible.
1257 bool deleteParallelRegions() {
1258 const unsigned CallbackCalleeOperand = 2;
1259
1260 OMPInformationCache::RuntimeFunctionInfo &RFI =
1261 OMPInfoCache.RFIs[OMPRTL___kmpc_fork_call];
1262
1263 if (!RFI.Declaration)
1264 return false;
1265
1266 bool Changed = false;
1267 auto DeleteCallCB = [&](Use &U, Function &) {
1268 CallInst *CI = getCallIfRegularCall(U);
1269 if (!CI)
1270 return false;
1271 auto *Fn = dyn_cast<Function>(
1272 CI->getArgOperand(CallbackCalleeOperand)->stripPointerCasts());
1273 if (!Fn)
1274 return false;
1275 if (!Fn->onlyReadsMemory())
1276 return false;
1277 if (!Fn->hasFnAttribute(Attribute::WillReturn))
1278 return false;
1279
1280 LLVM_DEBUG(dbgs() << TAG << "Delete read-only parallel region in "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("openmp-opt")) { dbgs() << TAG << "Delete read-only parallel region in "
<< CI->getCaller()->getName() << "\n"; } }
while (false)
1281 << CI->getCaller()->getName() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("openmp-opt")) { dbgs() << TAG << "Delete read-only parallel region in "
<< CI->getCaller()->getName() << "\n"; } }
while (false)
;
1282
1283 auto Remark = [&](OptimizationRemark OR) {
1284 return OR << "Removing parallel region with no side-effects.";
1285 };
1286 emitRemark<OptimizationRemark>(CI, "OMP160", Remark);
1287
1288 CGUpdater.removeCallSite(*CI);
1289 CI->eraseFromParent();
1290 Changed = true;
1291 ++NumOpenMPParallelRegionsDeleted;
1292 return true;
1293 };
1294
1295 RFI.foreachUse(SCC, DeleteCallCB);
1296
1297 return Changed;
1298 }
1299
1300 /// Try to eliminate runtime calls by reusing existing ones.
1301 bool deduplicateRuntimeCalls() {
1302 bool Changed = false;
1303
1304 RuntimeFunction DeduplicableRuntimeCallIDs[] = {
1305 OMPRTL_omp_get_num_threads,
1306 OMPRTL_omp_in_parallel,
1307 OMPRTL_omp_get_cancellation,
1308 OMPRTL_omp_get_thread_limit,
1309 OMPRTL_omp_get_supported_active_levels,
1310 OMPRTL_omp_get_level,
1311 OMPRTL_omp_get_ancestor_thread_num,
1312 OMPRTL_omp_get_team_size,
1313 OMPRTL_omp_get_active_level,
1314 OMPRTL_omp_in_final,
1315 OMPRTL_omp_get_proc_bind,
1316 OMPRTL_omp_get_num_places,
1317 OMPRTL_omp_get_num_procs,
1318 OMPRTL_omp_get_place_num,
1319 OMPRTL_omp_get_partition_num_places,
1320 OMPRTL_omp_get_partition_place_nums};
1321
1322 // Global-tid is handled separately.
1323 SmallSetVector<Value *, 16> GTIdArgs;
1324 collectGlobalThreadIdArguments(GTIdArgs);
1325 LLVM_DEBUG(dbgs() << TAG << "Found " << GTIdArgs.size()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("openmp-opt")) { dbgs() << TAG << "Found " <<
GTIdArgs.size() << " global thread ID arguments\n"; } }
while (false)
1326 << " global thread ID arguments\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("openmp-opt")) { dbgs() << TAG << "Found " <<
GTIdArgs.size() << " global thread ID arguments\n"; } }
while (false)
;
1327
1328 for (Function *F : SCC) {
1329 for (auto DeduplicableRuntimeCallID : DeduplicableRuntimeCallIDs)
1330 Changed |= deduplicateRuntimeCalls(
1331 *F, OMPInfoCache.RFIs[DeduplicableRuntimeCallID]);
1332
1333 // __kmpc_global_thread_num is special as we can replace it with an
1334 // argument in enough cases to make it worth trying.
1335 Value *GTIdArg = nullptr;
1336 for (Argument &Arg : F->args())
1337 if (GTIdArgs.count(&Arg)) {
1338 GTIdArg = &Arg;
1339 break;
1340 }
1341 Changed |= deduplicateRuntimeCalls(
1342 *F, OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num], GTIdArg);
1343 }
1344
1345 return Changed;
1346 }
1347
1348 /// Tries to hide the latency of runtime calls that involve host to
1349 /// device memory transfers by splitting them into their "issue" and "wait"
1350 /// versions. The "issue" is moved upwards as much as possible. The "wait" is
1351 /// moved downards as much as possible. The "issue" issues the memory transfer
1352 /// asynchronously, returning a handle. The "wait" waits in the returned
1353 /// handle for the memory transfer to finish.
1354 bool hideMemTransfersLatency() {
1355 auto &RFI = OMPInfoCache.RFIs[OMPRTL___tgt_target_data_begin_mapper];
1356 bool Changed = false;
1357 auto SplitMemTransfers = [&](Use &U, Function &Decl) {
1358 auto *RTCall = getCallIfRegularCall(U, &RFI);
1359 if (!RTCall)
1360 return false;
1361
1362 OffloadArray OffloadArrays[3];
1363 if (!getValuesInOffloadArrays(*RTCall, OffloadArrays))
1364 return false;
1365
1366 LLVM_DEBUG(dumpValuesInOffloadArrays(OffloadArrays))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("openmp-opt")) { dumpValuesInOffloadArrays(OffloadArrays); }
} while (false)
;
1367
1368 // TODO: Check if can be moved upwards.
1369 bool WasSplit = false;
1370 Instruction *WaitMovementPoint = canBeMovedDownwards(*RTCall);
1371 if (WaitMovementPoint)
1372 WasSplit = splitTargetDataBeginRTC(*RTCall, *WaitMovementPoint);
1373
1374 Changed |= WasSplit;
1375 return WasSplit;
1376 };
1377 RFI.foreachUse(SCC, SplitMemTransfers);
1378
1379 return Changed;
1380 }
1381
1382 void analysisGlobalization() {
1383 auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared];
1384
1385 auto CheckGlobalization = [&](Use &U, Function &Decl) {
1386 if (CallInst *CI = getCallIfRegularCall(U, &RFI)) {
1387 auto Remark = [&](OptimizationRemarkMissed ORM) {
1388 return ORM
1389 << "Found thread data sharing on the GPU. "
1390 << "Expect degraded performance due to data globalization.";
1391 };
1392 emitRemark<OptimizationRemarkMissed>(CI, "OMP112", Remark);
1393 }
1394
1395 return false;
1396 };
1397
1398 RFI.foreachUse(SCC, CheckGlobalization);
1399 }
1400
1401 /// Maps the values stored in the offload arrays passed as arguments to
1402 /// \p RuntimeCall into the offload arrays in \p OAs.
1403 bool getValuesInOffloadArrays(CallInst &RuntimeCall,
1404 MutableArrayRef<OffloadArray> OAs) {
1405 assert(OAs.size() == 3 && "Need space for three offload arrays!")(static_cast <bool> (OAs.size() == 3 && "Need space for three offload arrays!"
) ? void (0) : __assert_fail ("OAs.size() == 3 && \"Need space for three offload arrays!\""
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp"
, 1405, __extension__ __PRETTY_FUNCTION__))
;
1406
1407 // A runtime call that involves memory offloading looks something like:
1408 // call void @__tgt_target_data_begin_mapper(arg0, arg1,
1409 // i8** %offload_baseptrs, i8** %offload_ptrs, i64* %offload_sizes,
1410 // ...)
1411 // So, the idea is to access the allocas that allocate space for these
1412 // offload arrays, offload_baseptrs, offload_ptrs, offload_sizes.
1413 // Therefore:
1414 // i8** %offload_baseptrs.
1415 Value *BasePtrsArg =
1416 RuntimeCall.getArgOperand(OffloadArray::BasePtrsArgNum);
1417 // i8** %offload_ptrs.
1418 Value *PtrsArg = RuntimeCall.getArgOperand(OffloadArray::PtrsArgNum);
1419 // i8** %offload_sizes.
1420 Value *SizesArg = RuntimeCall.getArgOperand(OffloadArray::SizesArgNum);
1421
1422 // Get values stored in **offload_baseptrs.
1423 auto *V = getUnderlyingObject(BasePtrsArg);
1424 if (!isa<AllocaInst>(V))
1425 return false;
1426 auto *BasePtrsArray = cast<AllocaInst>(V);
1427 if (!OAs[0].initialize(*BasePtrsArray, RuntimeCall))
1428 return false;
1429
1430 // Get values stored in **offload_baseptrs.
1431 V = getUnderlyingObject(PtrsArg);
1432 if (!isa<AllocaInst>(V))
1433 return false;
1434 auto *PtrsArray = cast<AllocaInst>(V);
1435 if (!OAs[1].initialize(*PtrsArray, RuntimeCall))
1436 return false;
1437
1438 // Get values stored in **offload_sizes.
1439 V = getUnderlyingObject(SizesArg);
1440 // If it's a [constant] global array don't analyze it.
1441 if (isa<GlobalValue>(V))
1442 return isa<Constant>(V);
1443 if (!isa<AllocaInst>(V))
1444 return false;
1445
1446 auto *SizesArray = cast<AllocaInst>(V);
1447 if (!OAs[2].initialize(*SizesArray, RuntimeCall))
1448 return false;
1449
1450 return true;
1451 }
1452
1453 /// Prints the values in the OffloadArrays \p OAs using LLVM_DEBUG.
1454 /// For now this is a way to test that the function getValuesInOffloadArrays
1455 /// is working properly.
1456 /// TODO: Move this to a unittest when unittests are available for OpenMPOpt.
1457 void dumpValuesInOffloadArrays(ArrayRef<OffloadArray> OAs) {
1458 assert(OAs.size() == 3 && "There are three offload arrays to debug!")(static_cast <bool> (OAs.size() == 3 && "There are three offload arrays to debug!"
) ? void (0) : __assert_fail ("OAs.size() == 3 && \"There are three offload arrays to debug!\""
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp"
, 1458, __extension__ __PRETTY_FUNCTION__))
;
1459
1460 LLVM_DEBUG(dbgs() << TAG << " Successfully got offload values:\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("openmp-opt")) { dbgs() << TAG << " Successfully got offload values:\n"
; } } while (false)
;
1461 std::string ValuesStr;
1462 raw_string_ostream Printer(ValuesStr);
1463 std::string Separator = " --- ";
1464
1465 for (auto *BP : OAs[0].StoredValues) {
1466 BP->print(Printer);
1467 Printer << Separator;
1468 }
1469 LLVM_DEBUG(dbgs() << "\t\toffload_baseptrs: " << Printer.str() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("openmp-opt")) { dbgs() << "\t\toffload_baseptrs: " <<
Printer.str() << "\n"; } } while (false)
;
1470 ValuesStr.clear();
1471
1472 for (auto *P : OAs[1].StoredValues) {
1473 P->print(Printer);
1474 Printer << Separator;
1475 }
1476 LLVM_DEBUG(dbgs() << "\t\toffload_ptrs: " << Printer.str() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("openmp-opt")) { dbgs() << "\t\toffload_ptrs: " <<
Printer.str() << "\n"; } } while (false)
;
1477 ValuesStr.clear();
1478
1479 for (auto *S : OAs[2].StoredValues) {
1480 S->print(Printer);
1481 Printer << Separator;
1482 }
1483 LLVM_DEBUG(dbgs() << "\t\toffload_sizes: " << Printer.str() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("openmp-opt")) { dbgs() << "\t\toffload_sizes: " <<
Printer.str() << "\n"; } } while (false)
;
1484 }
1485
1486 /// Returns the instruction where the "wait" counterpart \p RuntimeCall can be
1487 /// moved. Returns nullptr if the movement is not possible, or not worth it.
1488 Instruction *canBeMovedDownwards(CallInst &RuntimeCall) {
1489 // FIXME: This traverses only the BasicBlock where RuntimeCall is.
1490 // Make it traverse the CFG.
1491
1492 Instruction *CurrentI = &RuntimeCall;
1493 bool IsWorthIt = false;
1494 while ((CurrentI = CurrentI->getNextNode())) {
1495
1496 // TODO: Once we detect the regions to be offloaded we should use the
1497 // alias analysis manager to check if CurrentI may modify one of
1498 // the offloaded regions.
1499 if (CurrentI->mayHaveSideEffects() || CurrentI->mayReadFromMemory()) {
1500 if (IsWorthIt)
1501 return CurrentI;
1502
1503 return nullptr;
1504 }
1505
1506 // FIXME: For now if we move it over anything without side effect
1507 // is worth it.
1508 IsWorthIt = true;
1509 }
1510
1511 // Return end of BasicBlock.
1512 return RuntimeCall.getParent()->getTerminator();
1513 }
1514
1515 /// Splits \p RuntimeCall into its "issue" and "wait" counterparts.
1516 bool splitTargetDataBeginRTC(CallInst &RuntimeCall,
1517 Instruction &WaitMovementPoint) {
1518 // Create stack allocated handle (__tgt_async_info) at the beginning of the
1519 // function. Used for storing information of the async transfer, allowing to
1520 // wait on it later.
1521 auto &IRBuilder = OMPInfoCache.OMPBuilder;
1522 auto *F = RuntimeCall.getCaller();
1523 Instruction *FirstInst = &(F->getEntryBlock().front());
1524 AllocaInst *Handle = new AllocaInst(
1525 IRBuilder.AsyncInfo, F->getAddressSpace(), "handle", FirstInst);
1526
1527 // Add "issue" runtime call declaration:
1528 // declare %struct.tgt_async_info @__tgt_target_data_begin_issue(i64, i32,
1529 // i8**, i8**, i64*, i64*)
1530 FunctionCallee IssueDecl = IRBuilder.getOrCreateRuntimeFunction(
1531 M, OMPRTL___tgt_target_data_begin_mapper_issue);
1532
1533 // Change RuntimeCall call site for its asynchronous version.
1534 SmallVector<Value *, 16> Args;
1535 for (auto &Arg : RuntimeCall.args())
1536 Args.push_back(Arg.get());
1537 Args.push_back(Handle);
1538
1539 CallInst *IssueCallsite =
1540 CallInst::Create(IssueDecl, Args, /*NameStr=*/"", &RuntimeCall);
1541 RuntimeCall.eraseFromParent();
1542
1543 // Add "wait" runtime call declaration:
1544 // declare void @__tgt_target_data_begin_wait(i64, %struct.__tgt_async_info)
1545 FunctionCallee WaitDecl = IRBuilder.getOrCreateRuntimeFunction(
1546 M, OMPRTL___tgt_target_data_begin_mapper_wait);
1547
1548 Value *WaitParams[2] = {
1549 IssueCallsite->getArgOperand(
1550 OffloadArray::DeviceIDArgNum), // device_id.
1551 Handle // handle to wait on.
1552 };
1553 CallInst::Create(WaitDecl, WaitParams, /*NameStr=*/"", &WaitMovementPoint);
1554
1555 return true;
1556 }
1557
1558 static Value *combinedIdentStruct(Value *CurrentIdent, Value *NextIdent,
1559 bool GlobalOnly, bool &SingleChoice) {
1560 if (CurrentIdent == NextIdent)
1561 return CurrentIdent;
1562
1563 // TODO: Figure out how to actually combine multiple debug locations. For
1564 // now we just keep an existing one if there is a single choice.
1565 if (!GlobalOnly || isa<GlobalValue>(NextIdent)) {
1566 SingleChoice = !CurrentIdent;
1567 return NextIdent;
1568 }
1569 return nullptr;
1570 }
1571
1572 /// Return an `struct ident_t*` value that represents the ones used in the
1573 /// calls of \p RFI inside of \p F. If \p GlobalOnly is true, we will not
1574 /// return a local `struct ident_t*`. For now, if we cannot find a suitable
1575 /// return value we create one from scratch. We also do not yet combine
1576 /// information, e.g., the source locations, see combinedIdentStruct.
1577 Value *
1578 getCombinedIdentFromCallUsesIn(OMPInformationCache::RuntimeFunctionInfo &RFI,
1579 Function &F, bool GlobalOnly) {
1580 bool SingleChoice = true;
1581 Value *Ident = nullptr;
1582 auto CombineIdentStruct = [&](Use &U, Function &Caller) {
1583 CallInst *CI = getCallIfRegularCall(U, &RFI);
1584 if (!CI || &F != &Caller)
1585 return false;
1586 Ident = combinedIdentStruct(Ident, CI->getArgOperand(0),
1587 /* GlobalOnly */ true, SingleChoice);
1588 return false;
1589 };
1590 RFI.foreachUse(SCC, CombineIdentStruct);
1591
1592 if (!Ident || !SingleChoice) {
1593 // The IRBuilder uses the insertion block to get to the module, this is
1594 // unfortunate but we work around it for now.
1595 if (!OMPInfoCache.OMPBuilder.getInsertionPoint().getBlock())
1596 OMPInfoCache.OMPBuilder.updateToLocation(OpenMPIRBuilder::InsertPointTy(
1597 &F.getEntryBlock(), F.getEntryBlock().begin()));
1598 // Create a fallback location if non was found.
1599 // TODO: Use the debug locations of the calls instead.
1600 Constant *Loc = OMPInfoCache.OMPBuilder.getOrCreateDefaultSrcLocStr();
1601 Ident = OMPInfoCache.OMPBuilder.getOrCreateIdent(Loc);
1602 }
1603 return Ident;
1604 }
1605
1606 /// Try to eliminate calls of \p RFI in \p F by reusing an existing one or
1607 /// \p ReplVal if given.
1608 bool deduplicateRuntimeCalls(Function &F,
1609 OMPInformationCache::RuntimeFunctionInfo &RFI,
1610 Value *ReplVal = nullptr) {
1611 auto *UV = RFI.getUseVector(F);
1612 if (!UV || UV->size() + (ReplVal != nullptr) < 2)
1613 return false;
1614
1615 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("openmp-opt")) { dbgs() << TAG << "Deduplicate "
<< UV->size() << " uses of " << RFI.Name
<< (ReplVal ? " with an existing value\n" : "\n") <<
"\n"; } } while (false)
1616 dbgs() << TAG << "Deduplicate " << UV->size() << " uses of " << RFI.Namedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("openmp-opt")) { dbgs() << TAG << "Deduplicate "
<< UV->size() << " uses of " << RFI.Name
<< (ReplVal ? " with an existing value\n" : "\n") <<
"\n"; } } while (false)
1617 << (ReplVal ? " with an existing value\n" : "\n") << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("openmp-opt")) { dbgs() << TAG << "Deduplicate "
<< UV->size() << " uses of " << RFI.Name
<< (ReplVal ? " with an existing value\n" : "\n") <<
"\n"; } } while (false)
;
1618
1619 assert((!ReplVal || (isa<Argument>(ReplVal) &&(static_cast <bool> ((!ReplVal || (isa<Argument>(
ReplVal) && cast<Argument>(ReplVal)->getParent
() == &F)) && "Unexpected replacement value!") ? void
(0) : __assert_fail ("(!ReplVal || (isa<Argument>(ReplVal) && cast<Argument>(ReplVal)->getParent() == &F)) && \"Unexpected replacement value!\""
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp"
, 1621, __extension__ __PRETTY_FUNCTION__))
1620 cast<Argument>(ReplVal)->getParent() == &F)) &&(static_cast <bool> ((!ReplVal || (isa<Argument>(
ReplVal) && cast<Argument>(ReplVal)->getParent
() == &F)) && "Unexpected replacement value!") ? void
(0) : __assert_fail ("(!ReplVal || (isa<Argument>(ReplVal) && cast<Argument>(ReplVal)->getParent() == &F)) && \"Unexpected replacement value!\""
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp"
, 1621, __extension__ __PRETTY_FUNCTION__))
1621 "Unexpected replacement value!")(static_cast <bool> ((!ReplVal || (isa<Argument>(
ReplVal) && cast<Argument>(ReplVal)->getParent
() == &F)) && "Unexpected replacement value!") ? void
(0) : __assert_fail ("(!ReplVal || (isa<Argument>(ReplVal) && cast<Argument>(ReplVal)->getParent() == &F)) && \"Unexpected replacement value!\""
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp"
, 1621, __extension__ __PRETTY_FUNCTION__))
;
1622
1623 // TODO: Use dominance to find a good position instead.
1624 auto CanBeMoved = [this](CallBase &CB) {
1625 unsigned NumArgs = CB.arg_size();
1626 if (NumArgs == 0)
1627 return true;
1628 if (CB.getArgOperand(0)->getType() != OMPInfoCache.OMPBuilder.IdentPtr)
1629 return false;
1630 for (unsigned U = 1; U < NumArgs; ++U)
1631 if (isa<Instruction>(CB.getArgOperand(U)))
1632 return false;
1633 return true;
1634 };
1635
1636 if (!ReplVal) {
1637 for (Use *U : *UV)
1638 if (CallInst *CI = getCallIfRegularCall(*U, &RFI)) {
1639 if (!CanBeMoved(*CI))
1640 continue;
1641
1642 // If the function is a kernel, dedup will move
1643 // the runtime call right after the kernel init callsite. Otherwise,
1644 // it will move it to the beginning of the caller function.
1645 if (isKernel(F)) {
1646 auto &KernelInitRFI = OMPInfoCache.RFIs[OMPRTL___kmpc_target_init];
1647 auto *KernelInitUV = KernelInitRFI.getUseVector(F);
1648
1649 if (KernelInitUV->empty())
1650 continue;
1651
1652 assert(KernelInitUV->size() == 1 &&(static_cast <bool> (KernelInitUV->size() == 1 &&
"Expected a single __kmpc_target_init in kernel\n") ? void (
0) : __assert_fail ("KernelInitUV->size() == 1 && \"Expected a single __kmpc_target_init in kernel\\n\""
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp"
, 1653, __extension__ __PRETTY_FUNCTION__))
1653 "Expected a single __kmpc_target_init in kernel\n")(static_cast <bool> (KernelInitUV->size() == 1 &&
"Expected a single __kmpc_target_init in kernel\n") ? void (
0) : __assert_fail ("KernelInitUV->size() == 1 && \"Expected a single __kmpc_target_init in kernel\\n\""
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp"
, 1653, __extension__ __PRETTY_FUNCTION__))
;
1654
1655 CallInst *KernelInitCI =
1656 getCallIfRegularCall(*KernelInitUV->front(), &KernelInitRFI);
1657 assert(KernelInitCI &&(static_cast <bool> (KernelInitCI && "Expected a call to __kmpc_target_init in kernel\n"
) ? void (0) : __assert_fail ("KernelInitCI && \"Expected a call to __kmpc_target_init in kernel\\n\""
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp"
, 1658, __extension__ __PRETTY_FUNCTION__))
1658 "Expected a call to __kmpc_target_init in kernel\n")(static_cast <bool> (KernelInitCI && "Expected a call to __kmpc_target_init in kernel\n"
) ? void (0) : __assert_fail ("KernelInitCI && \"Expected a call to __kmpc_target_init in kernel\\n\""
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp"
, 1658, __extension__ __PRETTY_FUNCTION__))
;
1659
1660 CI->moveAfter(KernelInitCI);
1661 } else
1662 CI->moveBefore(&*F.getEntryBlock().getFirstInsertionPt());
1663 ReplVal = CI;
1664 break;
1665 }
1666 if (!ReplVal)
1667 return false;
1668 }
1669
1670 // If we use a call as a replacement value we need to make sure the ident is
1671 // valid at the new location. For now we just pick a global one, either
1672 // existing and used by one of the calls, or created from scratch.
1673 if (CallBase *CI = dyn_cast<CallBase>(ReplVal)) {
1674 if (!CI->arg_empty() &&
1675 CI->getArgOperand(0)->getType() == OMPInfoCache.OMPBuilder.IdentPtr) {
1676 Value *Ident = getCombinedIdentFromCallUsesIn(RFI, F,
1677 /* GlobalOnly */ true);
1678 CI->setArgOperand(0, Ident);
1679 }
1680 }
1681
1682 bool Changed = false;
1683 auto ReplaceAndDeleteCB = [&](Use &U, Function &Caller) {
1684 CallInst *CI = getCallIfRegularCall(U, &RFI);
1685 if (!CI || CI == ReplVal || &F != &Caller)
1686 return false;
1687 assert(CI->getCaller() == &F && "Unexpected call!")(static_cast <bool> (CI->getCaller() == &F &&
"Unexpected call!") ? void (0) : __assert_fail ("CI->getCaller() == &F && \"Unexpected call!\""
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp"
, 1687, __extension__ __PRETTY_FUNCTION__))
;
1688
1689 auto Remark = [&](OptimizationRemark OR) {
1690 return OR << "OpenMP runtime call "
1691 << ore::NV("OpenMPOptRuntime", RFI.Name) << " deduplicated.";
1692 };
1693 if (CI->getDebugLoc())
1694 emitRemark<OptimizationRemark>(CI, "OMP170", Remark);
1695 else
1696 emitRemark<OptimizationRemark>(&F, "OMP170", Remark);
1697
1698 CGUpdater.removeCallSite(*CI);
1699 CI->replaceAllUsesWith(ReplVal);
1700 CI->eraseFromParent();
1701 ++NumOpenMPRuntimeCallsDeduplicated;
1702 Changed = true;
1703 return true;
1704 };
1705 RFI.foreachUse(SCC, ReplaceAndDeleteCB);
1706
1707 return Changed;
1708 }
1709
1710 /// Collect arguments that represent the global thread id in \p GTIdArgs.
1711 void collectGlobalThreadIdArguments(SmallSetVector<Value *, 16> &GTIdArgs) {
1712 // TODO: Below we basically perform a fixpoint iteration with a pessimistic
1713 // initialization. We could define an AbstractAttribute instead and
1714 // run the Attributor here once it can be run as an SCC pass.
1715
1716 // Helper to check the argument \p ArgNo at all call sites of \p F for
1717 // a GTId.
1718 auto CallArgOpIsGTId = [&](Function &F, unsigned ArgNo, CallInst &RefCI) {
1719 if (!F.hasLocalLinkage())
1720 return false;
1721 for (Use &U : F.uses()) {
1722 if (CallInst *CI = getCallIfRegularCall(U)) {
1723 Value *ArgOp = CI->getArgOperand(ArgNo);
1724 if (CI == &RefCI || GTIdArgs.count(ArgOp) ||
1725 getCallIfRegularCall(
1726 *ArgOp, &OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num]))
1727 continue;
1728 }
1729 return false;
1730 }
1731 return true;
1732 };
1733
1734 // Helper to identify uses of a GTId as GTId arguments.
1735 auto AddUserArgs = [&](Value &GTId) {
1736 for (Use &U : GTId.uses())
1737 if (CallInst *CI = dyn_cast<CallInst>(U.getUser()))
1738 if (CI->isArgOperand(&U))
1739 if (Function *Callee = CI->getCalledFunction())
1740 if (CallArgOpIsGTId(*Callee, U.getOperandNo(), *CI))
1741 GTIdArgs.insert(Callee->getArg(U.getOperandNo()));
1742 };
1743
1744 // The argument users of __kmpc_global_thread_num calls are GTIds.
1745 OMPInformationCache::RuntimeFunctionInfo &GlobThreadNumRFI =
1746 OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num];
1747
1748 GlobThreadNumRFI.foreachUse(SCC, [&](Use &U, Function &F) {
1749 if (CallInst *CI = getCallIfRegularCall(U, &GlobThreadNumRFI))
1750 AddUserArgs(*CI);
1751 return false;
1752 });
1753
1754 // Transitively search for more arguments by looking at the users of the
1755 // ones we know already. During the search the GTIdArgs vector is extended
1756 // so we cannot cache the size nor can we use a range based for.
1757 for (unsigned U = 0; U < GTIdArgs.size(); ++U)
1758 AddUserArgs(*GTIdArgs[U]);
1759 }
1760
1761 /// Kernel (=GPU) optimizations and utility functions
1762 ///
1763 ///{{
1764
1765 /// Check if \p F is a kernel, hence entry point for target offloading.
1766 bool isKernel(Function &F) { return OMPInfoCache.Kernels.count(&F); }
1767
1768 /// Cache to remember the unique kernel for a function.
1769 DenseMap<Function *, Optional<Kernel>> UniqueKernelMap;
1770
1771 /// Find the unique kernel that will execute \p F, if any.
1772 Kernel getUniqueKernelFor(Function &F);
1773
1774 /// Find the unique kernel that will execute \p I, if any.
1775 Kernel getUniqueKernelFor(Instruction &I) {
1776 return getUniqueKernelFor(*I.getFunction());
1777 }
1778
1779 /// Rewrite the device (=GPU) code state machine create in non-SPMD mode in
1780 /// the cases we can avoid taking the address of a function.
1781 bool rewriteDeviceCodeStateMachine();
1782
1783 ///
1784 ///}}
1785
1786 /// Emit a remark generically
1787 ///
1788 /// This template function can be used to generically emit a remark. The
1789 /// RemarkKind should be one of the following:
1790 /// - OptimizationRemark to indicate a successful optimization attempt
1791 /// - OptimizationRemarkMissed to report a failed optimization attempt
1792 /// - OptimizationRemarkAnalysis to provide additional information about an
1793 /// optimization attempt
1794 ///
1795 /// The remark is built using a callback function provided by the caller that
1796 /// takes a RemarkKind as input and returns a RemarkKind.
1797 template <typename RemarkKind, typename RemarkCallBack>
1798 void emitRemark(Instruction *I, StringRef RemarkName,
1799 RemarkCallBack &&RemarkCB) const {
1800 Function *F = I->getParent()->getParent();
1801 auto &ORE = OREGetter(F);
1802
1803 if (RemarkName.startswith("OMP"))
1804 ORE.emit([&]() {
1805 return RemarkCB(RemarkKind(DEBUG_TYPE"openmp-opt", RemarkName, I))
1806 << " [" << RemarkName << "]";
1807 });
1808 else
1809 ORE.emit(
1810 [&]() { return RemarkCB(RemarkKind(DEBUG_TYPE"openmp-opt", RemarkName, I)); });
1811 }
1812
1813 /// Emit a remark on a function.
1814 template <typename RemarkKind, typename RemarkCallBack>
1815 void emitRemark(Function *F, StringRef RemarkName,
1816 RemarkCallBack &&RemarkCB) const {
1817 auto &ORE = OREGetter(F);
1818
1819 if (RemarkName.startswith("OMP"))
1820 ORE.emit([&]() {
1821 return RemarkCB(RemarkKind(DEBUG_TYPE"openmp-opt", RemarkName, F))
1822 << " [" << RemarkName << "]";
1823 });
1824 else
1825 ORE.emit(
1826 [&]() { return RemarkCB(RemarkKind(DEBUG_TYPE"openmp-opt", RemarkName, F)); });
1827 }
1828
1829 /// RAII struct to temporarily change an RTL function's linkage to external.
1830 /// This prevents it from being mistakenly removed by other optimizations.
1831 struct ExternalizationRAII {
1832 ExternalizationRAII(OMPInformationCache &OMPInfoCache,
1833 RuntimeFunction RFKind)
1834 : Declaration(OMPInfoCache.RFIs[RFKind].Declaration) {
1835 if (!Declaration)
1836 return;
1837
1838 LinkageType = Declaration->getLinkage();
1839 Declaration->setLinkage(GlobalValue::ExternalLinkage);
1840 }
1841
1842 ~ExternalizationRAII() {
1843 if (!Declaration)
1844 return;
1845
1846 Declaration->setLinkage(LinkageType);
1847 }
1848
1849 Function *Declaration;
1850 GlobalValue::LinkageTypes LinkageType;
1851 };
1852
1853 /// The underlying module.
1854 Module &M;
1855
1856 /// The SCC we are operating on.
1857 SmallVectorImpl<Function *> &SCC;
1858
1859 /// Callback to update the call graph, the first argument is a removed call,
1860 /// the second an optional replacement call.
1861 CallGraphUpdater &CGUpdater;
1862
1863 /// Callback to get an OptimizationRemarkEmitter from a Function *
1864 OptimizationRemarkGetter OREGetter;
1865
1866 /// OpenMP-specific information cache. Also Used for Attributor runs.
1867 OMPInformationCache &OMPInfoCache;
1868
1869 /// Attributor instance.
1870 Attributor &A;
1871
1872 /// Helper function to run Attributor on SCC.
1873 bool runAttributor(bool IsModulePass) {
1874 if (SCC.empty())
1875 return false;
1876
1877 // Temporarily make these function have external linkage so the Attributor
1878 // doesn't remove them when we try to look them up later.
1879 ExternalizationRAII Parallel(OMPInfoCache, OMPRTL___kmpc_kernel_parallel);
1880 ExternalizationRAII EndParallel(OMPInfoCache,
1881 OMPRTL___kmpc_kernel_end_parallel);
1882 ExternalizationRAII BarrierSPMD(OMPInfoCache,
1883 OMPRTL___kmpc_barrier_simple_spmd);
1884 ExternalizationRAII BarrierGeneric(OMPInfoCache,
1885 OMPRTL___kmpc_barrier_simple_generic);
1886 ExternalizationRAII ThreadId(OMPInfoCache,
1887 OMPRTL___kmpc_get_hardware_thread_id_in_block);
1888
1889 registerAAs(IsModulePass);
1890
1891 ChangeStatus Changed = A.run();
1892
1893 LLVM_DEBUG(dbgs() << "[Attributor] Done with " << SCC.size()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("openmp-opt")) { dbgs() << "[Attributor] Done with " <<
SCC.size() << " functions, result: " << Changed <<
".\n"; } } while (false)
1894 << " functions, result: " << Changed << ".\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("openmp-opt")) { dbgs() << "[Attributor] Done with " <<
SCC.size() << " functions, result: " << Changed <<
".\n"; } } while (false)
;
1895
1896 return Changed == ChangeStatus::CHANGED;
1897 }
1898
1899 void registerFoldRuntimeCall(RuntimeFunction RF);
1900
1901 /// Populate the Attributor with abstract attribute opportunities in the
1902 /// function.
1903 void registerAAs(bool IsModulePass);
1904};
1905
1906Kernel OpenMPOpt::getUniqueKernelFor(Function &F) {
1907 if (!OMPInfoCache.ModuleSlice.count(&F))
1908 return nullptr;
1909
1910 // Use a scope to keep the lifetime of the CachedKernel short.
1911 {
1912 Optional<Kernel> &CachedKernel = UniqueKernelMap[&F];
1913 if (CachedKernel)
1914 return *CachedKernel;
1915
1916 // TODO: We should use an AA to create an (optimistic and callback
1917 // call-aware) call graph. For now we stick to simple patterns that
1918 // are less powerful, basically the worst fixpoint.
1919 if (isKernel(F)) {
1920 CachedKernel = Kernel(&F);
1921 return *CachedKernel;
1922 }
1923
1924 CachedKernel = nullptr;
1925 if (!F.hasLocalLinkage()) {
1926
1927 // See https://openmp.llvm.org/remarks/OptimizationRemarks.html
1928 auto Remark = [&](OptimizationRemarkAnalysis ORA) {
1929 return ORA << "Potentially unknown OpenMP target region caller.";
1930 };
1931 emitRemark<OptimizationRemarkAnalysis>(&F, "OMP100", Remark);
1932
1933 return nullptr;
1934 }
1935 }
1936
1937 auto GetUniqueKernelForUse = [&](const Use &U) -> Kernel {
1938 if (auto *Cmp = dyn_cast<ICmpInst>(U.getUser())) {
1939 // Allow use in equality comparisons.
1940 if (Cmp->isEquality())
1941 return getUniqueKernelFor(*Cmp);
1942 return nullptr;
1943 }
1944 if (auto *CB = dyn_cast<CallBase>(U.getUser())) {
1945 // Allow direct calls.
1946 if (CB->isCallee(&U))
1947 return getUniqueKernelFor(*CB);
1948
1949 OMPInformationCache::RuntimeFunctionInfo &KernelParallelRFI =
1950 OMPInfoCache.RFIs[OMPRTL___kmpc_parallel_51];
1951 // Allow the use in __kmpc_parallel_51 calls.
1952 if (OpenMPOpt::getCallIfRegularCall(*U.getUser(), &KernelParallelRFI))
1953 return getUniqueKernelFor(*CB);
1954 return nullptr;
1955 }
1956 // Disallow every other use.
1957 return nullptr;
1958 };
1959
1960 // TODO: In the future we want to track more than just a unique kernel.
1961 SmallPtrSet<Kernel, 2> PotentialKernels;
1962 OMPInformationCache::foreachUse(F, [&](const Use &U) {
1963 PotentialKernels.insert(GetUniqueKernelForUse(U));
1964 });
1965
1966 Kernel K = nullptr;
1967 if (PotentialKernels.size() == 1)
1968 K = *PotentialKernels.begin();
1969
1970 // Cache the result.
1971 UniqueKernelMap[&F] = K;
1972
1973 return K;
1974}
1975
1976bool OpenMPOpt::rewriteDeviceCodeStateMachine() {
1977 OMPInformationCache::RuntimeFunctionInfo &KernelParallelRFI =
1978 OMPInfoCache.RFIs[OMPRTL___kmpc_parallel_51];
1979
1980 bool Changed = false;
1981 if (!KernelParallelRFI)
1982 return Changed;
1983
1984 // If we have disabled state machine changes, exit
1985 if (DisableOpenMPOptStateMachineRewrite)
1986 return Changed;
1987
1988 for (Function *F : SCC) {
1989
1990 // Check if the function is a use in a __kmpc_parallel_51 call at
1991 // all.
1992 bool UnknownUse = false;
1993 bool KernelParallelUse = false;
1994 unsigned NumDirectCalls = 0;
1995
1996 SmallVector<Use *, 2> ToBeReplacedStateMachineUses;
1997 OMPInformationCache::foreachUse(*F, [&](Use &U) {
1998 if (auto *CB = dyn_cast<CallBase>(U.getUser()))
1999 if (CB->isCallee(&U)) {
2000 ++NumDirectCalls;
2001 return;
2002 }
2003
2004 if (isa<ICmpInst>(U.getUser())) {
2005 ToBeReplacedStateMachineUses.push_back(&U);
2006 return;
2007 }
2008
2009 // Find wrapper functions that represent parallel kernels.
2010 CallInst *CI =
2011 OpenMPOpt::getCallIfRegularCall(*U.getUser(), &KernelParallelRFI);
2012 const unsigned int WrapperFunctionArgNo = 6;
2013 if (!KernelParallelUse && CI &&
2014 CI->getArgOperandNo(&U) == WrapperFunctionArgNo) {
2015 KernelParallelUse = true;
2016 ToBeReplacedStateMachineUses.push_back(&U);
2017 return;
2018 }
2019 UnknownUse = true;
2020 });
2021
2022 // Do not emit a remark if we haven't seen a __kmpc_parallel_51
2023 // use.
2024 if (!KernelParallelUse)
2025 continue;
2026
2027 // If this ever hits, we should investigate.
2028 // TODO: Checking the number of uses is not a necessary restriction and
2029 // should be lifted.
2030 if (UnknownUse || NumDirectCalls != 1 ||
2031 ToBeReplacedStateMachineUses.size() > 2) {
2032 auto Remark = [&](OptimizationRemarkAnalysis ORA) {
2033 return ORA << "Parallel region is used in "
2034 << (UnknownUse ? "unknown" : "unexpected")
2035 << " ways. Will not attempt to rewrite the state machine.";
2036 };
2037 emitRemark<OptimizationRemarkAnalysis>(F, "OMP101", Remark);
2038 continue;
2039 }
2040
2041 // Even if we have __kmpc_parallel_51 calls, we (for now) give
2042 // up if the function is not called from a unique kernel.
2043 Kernel K = getUniqueKernelFor(*F);
2044 if (!K) {
2045 auto Remark = [&](OptimizationRemarkAnalysis ORA) {
2046 return ORA << "Parallel region is not called from a unique kernel. "
2047 "Will not attempt to rewrite the state machine.";
2048 };
2049 emitRemark<OptimizationRemarkAnalysis>(F, "OMP102", Remark);
2050 continue;
2051 }
2052
2053 // We now know F is a parallel body function called only from the kernel K.
2054 // We also identified the state machine uses in which we replace the
2055 // function pointer by a new global symbol for identification purposes. This
2056 // ensures only direct calls to the function are left.
2057
2058 Module &M = *F->getParent();
2059 Type *Int8Ty = Type::getInt8Ty(M.getContext());
2060
2061 auto *ID = new GlobalVariable(
2062 M, Int8Ty, /* isConstant */ true, GlobalValue::PrivateLinkage,
2063 UndefValue::get(Int8Ty), F->getName() + ".ID");
2064
2065 for (Use *U : ToBeReplacedStateMachineUses)
2066 U->set(ConstantExpr::getPointerBitCastOrAddrSpaceCast(
2067 ID, U->get()->getType()));
2068
2069 ++NumOpenMPParallelRegionsReplacedInGPUStateMachine;
2070
2071 Changed = true;
2072 }
2073
2074 return Changed;
2075}
2076
2077/// Abstract Attribute for tracking ICV values.
2078struct AAICVTracker : public StateWrapper<BooleanState, AbstractAttribute> {
2079 using Base = StateWrapper<BooleanState, AbstractAttribute>;
2080 AAICVTracker(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
2081
2082 void initialize(Attributor &A) override {
2083 Function *F = getAnchorScope();
2084 if (!F || !A.isFunctionIPOAmendable(*F))
2085 indicatePessimisticFixpoint();
2086 }
2087
2088 /// Returns true if value is assumed to be tracked.
2089 bool isAssumedTracked() const { return getAssumed(); }
2090
2091 /// Returns true if value is known to be tracked.
2092 bool isKnownTracked() const { return getAssumed(); }
2093
2094 /// Create an abstract attribute biew for the position \p IRP.
2095 static AAICVTracker &createForPosition(const IRPosition &IRP, Attributor &A);
2096
2097 /// Return the value with which \p I can be replaced for specific \p ICV.
2098 virtual Optional<Value *> getReplacementValue(InternalControlVar ICV,
2099 const Instruction *I,
2100 Attributor &A) const {
2101 return None;
2102 }
2103
2104 /// Return an assumed unique ICV value if a single candidate is found. If
2105 /// there cannot be one, return a nullptr. If it is not clear yet, return the
2106 /// Optional::NoneType.
2107 virtual Optional<Value *>
2108 getUniqueReplacementValue(InternalControlVar ICV) const = 0;
2109
2110 // Currently only nthreads is being tracked.
2111 // this array will only grow with time.
2112 InternalControlVar TrackableICVs[1] = {ICV_nthreads};
2113
2114 /// See AbstractAttribute::getName()
2115 const std::string getName() const override { return "AAICVTracker"; }
2116
2117 /// See AbstractAttribute::getIdAddr()
2118 const char *getIdAddr() const override { return &ID; }
2119
2120 /// This function should return true if the type of the \p AA is AAICVTracker
2121 static bool classof(const AbstractAttribute *AA) {
2122 return (AA->getIdAddr() == &ID);
2123 }
2124
2125 static const char ID;
2126};
2127
2128struct AAICVTrackerFunction : public AAICVTracker {
2129 AAICVTrackerFunction(const IRPosition &IRP, Attributor &A)
2130 : AAICVTracker(IRP, A) {}
2131
2132 // FIXME: come up with better string.
2133 const std::string getAsStr() const override { return "ICVTrackerFunction"; }
2134
2135 // FIXME: come up with some stats.
2136 void trackStatistics() const override {}
2137
2138 /// We don't manifest anything for this AA.
2139 ChangeStatus manifest(Attributor &A) override {
2140 return ChangeStatus::UNCHANGED;
2141 }
2142
2143 // Map of ICV to their values at specific program point.
2144 EnumeratedArray<DenseMap<Instruction *, Value *>, InternalControlVar,
2145 InternalControlVar::ICV___last>
2146 ICVReplacementValuesMap;
2147
2148 ChangeStatus updateImpl(Attributor &A) override {
2149 ChangeStatus HasChanged = ChangeStatus::UNCHANGED;
2150
2151 Function *F = getAnchorScope();
2152
2153 auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
2154
2155 for (InternalControlVar ICV : TrackableICVs) {
2156 auto &SetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Setter];
2157
2158 auto &ValuesMap = ICVReplacementValuesMap[ICV];
2159 auto TrackValues = [&](Use &U, Function &) {
2160 CallInst *CI = OpenMPOpt::getCallIfRegularCall(U);
2161 if (!CI)
2162 return false;
2163
2164 // FIXME: handle setters with more that 1 arguments.
2165 /// Track new value.
2166 if (ValuesMap.insert(std::make_pair(CI, CI->getArgOperand(0))).second)
2167 HasChanged = ChangeStatus::CHANGED;
2168
2169 return false;
2170 };
2171
2172 auto CallCheck = [&](Instruction &I) {
2173 Optional<Value *> ReplVal = getValueForCall(A, &I, ICV);
2174 if (ReplVal.hasValue() &&
2175 ValuesMap.insert(std::make_pair(&I, *ReplVal)).second)
2176 HasChanged = ChangeStatus::CHANGED;
2177
2178 return true;
2179 };
2180
2181 // Track all changes of an ICV.
2182 SetterRFI.foreachUse(TrackValues, F);
2183
2184 bool UsedAssumedInformation = false;
2185 A.checkForAllInstructions(CallCheck, *this, {Instruction::Call},
2186 UsedAssumedInformation,
2187 /* CheckBBLivenessOnly */ true);
2188
2189 /// TODO: Figure out a way to avoid adding entry in
2190 /// ICVReplacementValuesMap
2191 Instruction *Entry = &F->getEntryBlock().front();
2192 if (HasChanged == ChangeStatus::CHANGED && !ValuesMap.count(Entry))
2193 ValuesMap.insert(std::make_pair(Entry, nullptr));
2194 }
2195
2196 return HasChanged;
2197 }
2198
2199 /// Hepler to check if \p I is a call and get the value for it if it is
2200 /// unique.
2201 Optional<Value *> getValueForCall(Attributor &A, const Instruction *I,
2202 InternalControlVar &ICV) const {
2203
2204 const auto *CB = dyn_cast<CallBase>(I);
2205 if (!CB || CB->hasFnAttr("no_openmp") ||
2206 CB->hasFnAttr("no_openmp_routines"))
2207 return None;
2208
2209 auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
2210 auto &GetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Getter];
2211 auto &SetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Setter];
2212 Function *CalledFunction = CB->getCalledFunction();
2213
2214 // Indirect call, assume ICV changes.
2215 if (CalledFunction == nullptr)
2216 return nullptr;
2217 if (CalledFunction == GetterRFI.Declaration)
2218 return None;
2219 if (CalledFunction == SetterRFI.Declaration) {
2220 if (ICVReplacementValuesMap[ICV].count(I))
2221 return ICVReplacementValuesMap[ICV].lookup(I);
2222
2223 return nullptr;
2224 }
2225
2226 // Since we don't know, assume it changes the ICV.
2227 if (CalledFunction->isDeclaration())
2228 return nullptr;
2229
2230 const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>(
2231 *this, IRPosition::callsite_returned(*CB), DepClassTy::REQUIRED);
2232
2233 if (ICVTrackingAA.isAssumedTracked())
2234 return ICVTrackingAA.getUniqueReplacementValue(ICV);
2235
2236 // If we don't know, assume it changes.
2237 return nullptr;
2238 }
2239
2240 // We don't check unique value for a function, so return None.
2241 Optional<Value *>
2242 getUniqueReplacementValue(InternalControlVar ICV) const override {
2243 return None;
2244 }
2245
2246 /// Return the value with which \p I can be replaced for specific \p ICV.
2247 Optional<Value *> getReplacementValue(InternalControlVar ICV,
2248 const Instruction *I,
2249 Attributor &A) const override {
2250 const auto &ValuesMap = ICVReplacementValuesMap[ICV];
2251 if (ValuesMap.count(I))
2252 return ValuesMap.lookup(I);
2253
2254 SmallVector<const Instruction *, 16> Worklist;
2255 SmallPtrSet<const Instruction *, 16> Visited;
2256 Worklist.push_back(I);
2257
2258 Optional<Value *> ReplVal;
2259
2260 while (!Worklist.empty()) {
2261 const Instruction *CurrInst = Worklist.pop_back_val();
2262 if (!Visited.insert(CurrInst).second)
2263 continue;
2264
2265 const BasicBlock *CurrBB = CurrInst->getParent();
2266
2267 // Go up and look for all potential setters/calls that might change the
2268 // ICV.
2269 while ((CurrInst = CurrInst->getPrevNode())) {
2270 if (ValuesMap.count(CurrInst)) {
2271 Optional<Value *> NewReplVal = ValuesMap.lookup(CurrInst);
2272 // Unknown value, track new.
2273 if (!ReplVal.hasValue()) {
2274 ReplVal = NewReplVal;
2275 break;
2276 }
2277
2278 // If we found a new value, we can't know the icv value anymore.
2279 if (NewReplVal.hasValue())
2280 if (ReplVal != NewReplVal)
2281 return nullptr;
2282
2283 break;
2284 }
2285
2286 Optional<Value *> NewReplVal = getValueForCall(A, CurrInst, ICV);
2287 if (!NewReplVal.hasValue())
2288 continue;
2289
2290 // Unknown value, track new.
2291 if (!ReplVal.hasValue()) {
2292 ReplVal = NewReplVal;
2293 break;
2294 }
2295
2296 // if (NewReplVal.hasValue())
2297 // We found a new value, we can't know the icv value anymore.
2298 if (ReplVal != NewReplVal)
2299 return nullptr;
2300 }
2301
2302 // If we are in the same BB and we have a value, we are done.
2303 if (CurrBB == I->getParent() && ReplVal.hasValue())
2304 return ReplVal;
2305
2306 // Go through all predecessors and add terminators for analysis.
2307 for (const BasicBlock *Pred : predecessors(CurrBB))
2308 if (const Instruction *Terminator = Pred->getTerminator())
2309 Worklist.push_back(Terminator);
2310 }
2311
2312 return ReplVal;
2313 }
2314};
2315
2316struct AAICVTrackerFunctionReturned : AAICVTracker {
2317 AAICVTrackerFunctionReturned(const IRPosition &IRP, Attributor &A)
2318 : AAICVTracker(IRP, A) {}
2319
2320 // FIXME: come up with better string.
2321 const std::string getAsStr() const override {
2322 return "ICVTrackerFunctionReturned";
2323 }
2324
2325 // FIXME: come up with some stats.
2326 void trackStatistics() const override {}
2327
2328 /// We don't manifest anything for this AA.
2329 ChangeStatus manifest(Attributor &A) override {
2330 return ChangeStatus::UNCHANGED;
2331 }
2332
2333 // Map of ICV to their values at specific program point.
2334 EnumeratedArray<Optional<Value *>, InternalControlVar,
2335 InternalControlVar::ICV___last>
2336 ICVReplacementValuesMap;
2337
2338 /// Return the value with which \p I can be replaced for specific \p ICV.
2339 Optional<Value *>
2340 getUniqueReplacementValue(InternalControlVar ICV) const override {
2341 return ICVReplacementValuesMap[ICV];
2342 }
2343
2344 ChangeStatus updateImpl(Attributor &A) override {
2345 ChangeStatus Changed = ChangeStatus::UNCHANGED;
2346 const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>(
2347 *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED);
2348
2349 if (!ICVTrackingAA.isAssumedTracked())
2350 return indicatePessimisticFixpoint();
2351
2352 for (InternalControlVar ICV : TrackableICVs) {
2353 Optional<Value *> &ReplVal = ICVReplacementValuesMap[ICV];
2354 Optional<Value *> UniqueICVValue;
2355
2356 auto CheckReturnInst = [&](Instruction &I) {
2357 Optional<Value *> NewReplVal =
2358 ICVTrackingAA.getReplacementValue(ICV, &I, A);
2359
2360 // If we found a second ICV value there is no unique returned value.
2361 if (UniqueICVValue.hasValue() && UniqueICVValue != NewReplVal)
2362 return false;
2363
2364 UniqueICVValue = NewReplVal;
2365
2366 return true;
2367 };
2368
2369 bool UsedAssumedInformation = false;
2370 if (!A.checkForAllInstructions(CheckReturnInst, *this, {Instruction::Ret},
2371 UsedAssumedInformation,
2372 /* CheckBBLivenessOnly */ true))
2373 UniqueICVValue = nullptr;
2374
2375 if (UniqueICVValue == ReplVal)
2376 continue;
2377
2378 ReplVal = UniqueICVValue;
2379 Changed = ChangeStatus::CHANGED;
2380 }
2381
2382 return Changed;
2383 }
2384};
2385
2386struct AAICVTrackerCallSite : AAICVTracker {
2387 AAICVTrackerCallSite(const IRPosition &IRP, Attributor &A)
2388 : AAICVTracker(IRP, A) {}
2389
2390 void initialize(Attributor &A) override {
2391 Function *F = getAnchorScope();
2392 if (!F || !A.isFunctionIPOAmendable(*F))
2393 indicatePessimisticFixpoint();
2394
2395 // We only initialize this AA for getters, so we need to know which ICV it
2396 // gets.
2397 auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
2398 for (InternalControlVar ICV : TrackableICVs) {
2399 auto ICVInfo = OMPInfoCache.ICVs[ICV];
2400 auto &Getter = OMPInfoCache.RFIs[ICVInfo.Getter];
2401 if (Getter.Declaration == getAssociatedFunction()) {
2402 AssociatedICV = ICVInfo.Kind;
2403 return;
2404 }
2405 }
2406
2407 /// Unknown ICV.
2408 indicatePessimisticFixpoint();
2409 }
2410
2411 ChangeStatus manifest(Attributor &A) override {
2412 if (!ReplVal.hasValue() || !ReplVal.getValue())
2413 return ChangeStatus::UNCHANGED;
2414
2415 A.changeValueAfterManifest(*getCtxI(), **ReplVal);
2416 A.deleteAfterManifest(*getCtxI());
2417
2418 return ChangeStatus::CHANGED;
2419 }
2420
2421 // FIXME: come up with better string.
2422 const std::string getAsStr() const override { return "ICVTrackerCallSite"; }
2423
2424 // FIXME: come up with some stats.
2425 void trackStatistics() const override {}
2426
2427 InternalControlVar AssociatedICV;
2428 Optional<Value *> ReplVal;
2429
2430 ChangeStatus updateImpl(Attributor &A) override {
2431 const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>(
2432 *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED);
2433
2434 // We don't have any information, so we assume it changes the ICV.
2435 if (!ICVTrackingAA.isAssumedTracked())
2436 return indicatePessimisticFixpoint();
2437
2438 Optional<Value *> NewReplVal =
2439 ICVTrackingAA.getReplacementValue(AssociatedICV, getCtxI(), A);
2440
2441 if (ReplVal == NewReplVal)
2442 return ChangeStatus::UNCHANGED;
2443
2444 ReplVal = NewReplVal;
2445 return ChangeStatus::CHANGED;
2446 }
2447
2448 // Return the value with which associated value can be replaced for specific
2449 // \p ICV.
2450 Optional<Value *>
2451 getUniqueReplacementValue(InternalControlVar ICV) const override {
2452 return ReplVal;
2453 }
2454};
2455
2456struct AAICVTrackerCallSiteReturned : AAICVTracker {
2457 AAICVTrackerCallSiteReturned(const IRPosition &IRP, Attributor &A)
2458 : AAICVTracker(IRP, A) {}
2459
2460 // FIXME: come up with better string.
2461 const std::string getAsStr() const override {
2462 return "ICVTrackerCallSiteReturned";
2463 }
2464
2465 // FIXME: come up with some stats.
2466 void trackStatistics() const override {}
2467
2468 /// We don't manifest anything for this AA.
2469 ChangeStatus manifest(Attributor &A) override {
2470 return ChangeStatus::UNCHANGED;
2471 }
2472
2473 // Map of ICV to their values at specific program point.
2474 EnumeratedArray<Optional<Value *>, InternalControlVar,
2475 InternalControlVar::ICV___last>
2476 ICVReplacementValuesMap;
2477
2478 /// Return the value with which associated value can be replaced for specific
2479 /// \p ICV.
2480 Optional<Value *>
2481 getUniqueReplacementValue(InternalControlVar ICV) const override {
2482 return ICVReplacementValuesMap[ICV];
2483 }
2484
2485 ChangeStatus updateImpl(Attributor &A) override {
2486 ChangeStatus Changed = ChangeStatus::UNCHANGED;
2487 const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>(
2488 *this, IRPosition::returned(*getAssociatedFunction()),
2489 DepClassTy::REQUIRED);
2490
2491 // We don't have any information, so we assume it changes the ICV.
2492 if (!ICVTrackingAA.isAssumedTracked())
2493 return indicatePessimisticFixpoint();
2494
2495 for (InternalControlVar ICV : TrackableICVs) {
2496 Optional<Value *> &ReplVal = ICVReplacementValuesMap[ICV];
2497 Optional<Value *> NewReplVal =
2498 ICVTrackingAA.getUniqueReplacementValue(ICV);
2499
2500 if (ReplVal == NewReplVal)
2501 continue;
2502
2503 ReplVal = NewReplVal;
2504 Changed = ChangeStatus::CHANGED;
2505 }
2506 return Changed;
2507 }
2508};
2509
2510struct AAExecutionDomainFunction : public AAExecutionDomain {
2511 AAExecutionDomainFunction(const IRPosition &IRP, Attributor &A)
2512 : AAExecutionDomain(IRP, A) {}
2513
2514 const std::string getAsStr() const override {
2515 return "[AAExecutionDomain] " + std::to_string(SingleThreadedBBs.size()) +
2516 "/" + std::to_string(NumBBs) + " BBs thread 0 only.";
2517 }
2518
2519 /// See AbstractAttribute::trackStatistics().
2520 void trackStatistics() const override {}
2521
2522 void initialize(Attributor &A) override {
2523 Function *F = getAnchorScope();
2524 for (const auto &BB : *F)
2525 SingleThreadedBBs.insert(&BB);
2526 NumBBs = SingleThreadedBBs.size();
2527 }
2528
2529 ChangeStatus manifest(Attributor &A) override {
2530 LLVM_DEBUG({do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("openmp-opt")) { { for (const BasicBlock *BB : SingleThreadedBBs
) dbgs() << TAG << " Basic block @" << getAnchorScope
()->getName() << " " << BB->getName() <<
" is executed by a single thread.\n"; }; } } while (false)
2531 for (const BasicBlock *BB : SingleThreadedBBs)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("openmp-opt")) { { for (const BasicBlock *BB : SingleThreadedBBs
) dbgs() << TAG << " Basic block @" << getAnchorScope
()->getName() << " " << BB->getName() <<
" is executed by a single thread.\n"; }; } } while (false)
2532 dbgs() << TAG << " Basic block @" << getAnchorScope()->getName() << " "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("openmp-opt")) { { for (const BasicBlock *BB : SingleThreadedBBs
) dbgs() << TAG << " Basic block @" << getAnchorScope
()->getName() << " " << BB->getName() <<
" is executed by a single thread.\n"; }; } } while (false)
2533 << BB->getName() << " is executed by a single thread.\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("openmp-opt")) { { for (const BasicBlock *BB : SingleThreadedBBs
) dbgs() << TAG << " Basic block @" << getAnchorScope
()->getName() << " " << BB->getName() <<
" is executed by a single thread.\n"; }; } } while (false)
2534 })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("openmp-opt")) { { for (const BasicBlock *BB : SingleThreadedBBs
) dbgs() << TAG << " Basic block @" << getAnchorScope
()->getName() << " " << BB->getName() <<
" is executed by a single thread.\n"; }; } } while (false)
;
2535 return ChangeStatus::UNCHANGED;
2536 }
2537
2538 ChangeStatus updateImpl(Attributor &A) override;
2539
2540 /// Check if an instruction is executed by a single thread.
2541 bool isExecutedByInitialThreadOnly(const Instruction &I) const override {
2542 return isExecutedByInitialThreadOnly(*I.getParent());
2543 }
2544
2545 bool isExecutedByInitialThreadOnly(const BasicBlock &BB) const override {
2546 return isValidState() && SingleThreadedBBs.contains(&BB);
2547 }
2548
2549 /// Set of basic blocks that are executed by a single thread.
2550 DenseSet<const BasicBlock *> SingleThreadedBBs;
2551
2552 /// Total number of basic blocks in this function.
2553 long unsigned NumBBs;
2554};
2555
2556ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
2557 Function *F = getAnchorScope();
2558 ReversePostOrderTraversal<Function *> RPOT(F);
2559 auto NumSingleThreadedBBs = SingleThreadedBBs.size();
2560
2561 bool AllCallSitesKnown;
2562 auto PredForCallSite = [&](AbstractCallSite ACS) {
2563 const auto &ExecutionDomainAA = A.getAAFor<AAExecutionDomain>(
2564 *this, IRPosition::function(*ACS.getInstruction()->getFunction()),
2565 DepClassTy::REQUIRED);
2566 return ACS.isDirectCall() &&
2567 ExecutionDomainAA.isExecutedByInitialThreadOnly(
2568 *ACS.getInstruction());
2569 };
2570
2571 if (!A.checkForAllCallSites(PredForCallSite, *this,
2572 /* RequiresAllCallSites */ true,
2573 AllCallSitesKnown))
2574 SingleThreadedBBs.erase(&F->getEntryBlock());
2575
2576 auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
2577 auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_target_init];
2578
2579 // Check if the edge into the successor block contains a condition that only
2580 // lets the main thread execute it.
2581 auto IsInitialThreadOnly = [&](BranchInst *Edge, BasicBlock *SuccessorBB) {
2582 if (!Edge || !Edge->isConditional())
2583 return false;
2584 if (Edge->getSuccessor(0) != SuccessorBB)
2585 return false;
2586
2587 auto *Cmp = dyn_cast<CmpInst>(Edge->getCondition());
2588 if (!Cmp || !Cmp->isTrueWhenEqual() || !Cmp->isEquality())
2589 return false;
2590
2591 ConstantInt *C = dyn_cast<ConstantInt>(Cmp->getOperand(1));
2592 if (!C)
2593 return false;
2594
2595 // Match: -1 == __kmpc_target_init (for non-SPMD kernels only!)
2596 if (C->isAllOnesValue()) {
2597 auto *CB = dyn_cast<CallBase>(Cmp->getOperand(0));
2598 CB = CB ? OpenMPOpt::getCallIfRegularCall(*CB, &RFI) : nullptr;
2599 if (!CB)
2600 return false;
2601 const int InitModeArgNo = 1;
2602 auto *ModeCI = dyn_cast<ConstantInt>(CB->getOperand(InitModeArgNo));
2603 return ModeCI && (ModeCI->getSExtValue() & OMP_TGT_EXEC_MODE_GENERIC);
2604 }
2605
2606 if (C->isZero()) {
2607 // Match: 0 == llvm.nvvm.read.ptx.sreg.tid.x()
2608 if (auto *II = dyn_cast<IntrinsicInst>(Cmp->getOperand(0)))
2609 if (II->getIntrinsicID() == Intrinsic::nvvm_read_ptx_sreg_tid_x)
2610 return true;
2611
2612 // Match: 0 == llvm.amdgcn.workitem.id.x()
2613 if (auto *II = dyn_cast<IntrinsicInst>(Cmp->getOperand(0)))
2614 if (II->getIntrinsicID() == Intrinsic::amdgcn_workitem_id_x)
2615 return true;
2616 }
2617
2618 return false;
2619 };
2620
2621 // Merge all the predecessor states into the current basic block. A basic
2622 // block is executed by a single thread if all of its predecessors are.
2623 auto MergePredecessorStates = [&](BasicBlock *BB) {
2624 if (pred_empty(BB))
2625 return SingleThreadedBBs.contains(BB);
2626
2627 bool IsInitialThread = true;
2628 for (BasicBlock *PredBB : predecessors(BB)) {
2629 if (!IsInitialThreadOnly(dyn_cast<BranchInst>(PredBB->getTerminator()),
2630 BB))
2631 IsInitialThread &= SingleThreadedBBs.contains(PredBB);
2632 }
2633
2634 return IsInitialThread;
2635 };
2636
2637 for (auto *BB : RPOT) {
2638 if (!MergePredecessorStates(BB))
2639 SingleThreadedBBs.erase(BB);
2640 }
2641
2642 return (NumSingleThreadedBBs == SingleThreadedBBs.size())
2643 ? ChangeStatus::UNCHANGED
2644 : ChangeStatus::CHANGED;
2645}
2646
2647/// Try to replace memory allocation calls called by a single thread with a
2648/// static buffer of shared memory.
2649struct AAHeapToShared : public StateWrapper<BooleanState, AbstractAttribute> {
2650 using Base = StateWrapper<BooleanState, AbstractAttribute>;
2651 AAHeapToShared(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
2652
2653 /// Create an abstract attribute view for the position \p IRP.
2654 static AAHeapToShared &createForPosition(const IRPosition &IRP,
2655 Attributor &A);
2656
2657 /// Returns true if HeapToShared conversion is assumed to be possible.
2658 virtual bool isAssumedHeapToShared(CallBase &CB) const = 0;
2659
2660 /// Returns true if HeapToShared conversion is assumed and the CB is a
2661 /// callsite to a free operation to be removed.
2662 virtual bool isAssumedHeapToSharedRemovedFree(CallBase &CB) const = 0;
2663
2664 /// See AbstractAttribute::getName().
2665 const std::string getName() const override { return "AAHeapToShared"; }
2666
2667 /// See AbstractAttribute::getIdAddr().
2668 const char *getIdAddr() const override { return &ID; }
2669
2670 /// This function should return true if the type of the \p AA is
2671 /// AAHeapToShared.
2672 static bool classof(const AbstractAttribute *AA) {
2673 return (AA->getIdAddr() == &ID);
2674 }
2675
2676 /// Unique ID (due to the unique address)
2677 static const char ID;
2678};
2679
2680struct AAHeapToSharedFunction : public AAHeapToShared {
2681 AAHeapToSharedFunction(const IRPosition &IRP, Attributor &A)
2682 : AAHeapToShared(IRP, A) {}
2683
2684 const std::string getAsStr() const override {
2685 return "[AAHeapToShared] " + std::to_string(MallocCalls.size()) +
2686 " malloc calls eligible.";
2687 }
2688
2689 /// See AbstractAttribute::trackStatistics().
2690 void trackStatistics() const override {}
2691
2692 /// This functions finds free calls that will be removed by the
2693 /// HeapToShared transformation.
2694 void findPotentialRemovedFreeCalls(Attributor &A) {
2695 auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
2696 auto &FreeRFI = OMPInfoCache.RFIs[OMPRTL___kmpc_free_shared];
2697
2698 PotentialRemovedFreeCalls.clear();
2699 // Update free call users of found malloc calls.
2700 for (CallBase *CB : MallocCalls) {
2701 SmallVector<CallBase *, 4> FreeCalls;
2702 for (auto *U : CB->users()) {
2703 CallBase *C = dyn_cast<CallBase>(U);
2704 if (C && C->getCalledFunction() == FreeRFI.Declaration)
2705 FreeCalls.push_back(C);
2706 }
2707
2708 if (FreeCalls.size() != 1)
2709 continue;
2710
2711 PotentialRemovedFreeCalls.insert(FreeCalls.front());
2712 }
2713 }
2714
2715 void initialize(Attributor &A) override {
2716 auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
2717 auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared];
2718
2719 for (User *U : RFI.Declaration->users())
2720 if (CallBase *CB = dyn_cast<CallBase>(U))
2721 MallocCalls.insert(CB);
2722
2723 findPotentialRemovedFreeCalls(A);
2724 }
2725
2726 bool isAssumedHeapToShared(CallBase &CB) const override {
2727 return isValidState() && MallocCalls.count(&CB);
2728 }
2729
2730 bool isAssumedHeapToSharedRemovedFree(CallBase &CB) const override {
2731 return isValidState() && PotentialRemovedFreeCalls.count(&CB);
2732 }
2733
2734 ChangeStatus manifest(Attributor &A) override {
2735 if (MallocCalls.empty())
2736 return ChangeStatus::UNCHANGED;
2737
2738 auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
2739 auto &FreeCall = OMPInfoCache.RFIs[OMPRTL___kmpc_free_shared];
2740
2741 Function *F = getAnchorScope();
2742 auto *HS = A.lookupAAFor<AAHeapToStack>(IRPosition::function(*F), this,
2743 DepClassTy::OPTIONAL);
2744
2745 ChangeStatus Changed = ChangeStatus::UNCHANGED;
2746 for (CallBase *CB : MallocCalls) {
2747 // Skip replacing this if HeapToStack has already claimed it.
2748 if (HS && HS->isAssumedHeapToStack(*CB))
2749 continue;
2750
2751 // Find the unique free call to remove it.
2752 SmallVector<CallBase *, 4> FreeCalls;
2753 for (auto *U : CB->users()) {
2754 CallBase *C = dyn_cast<CallBase>(U);
2755 if (C && C->getCalledFunction() == FreeCall.Declaration)
2756 FreeCalls.push_back(C);
2757 }
2758 if (FreeCalls.size() != 1)
2759 continue;
2760
2761 ConstantInt *AllocSize = dyn_cast<ConstantInt>(CB->getArgOperand(0));
2762
2763 LLVM_DEBUG(dbgs() << TAG << "Replace globalization call " << *CBdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("openmp-opt")) { dbgs() << TAG << "Replace globalization call "
<< *CB << " with " << AllocSize->getZExtValue
() << " bytes of shared memory\n"; } } while (false)
2764 << " with " << AllocSize->getZExtValue()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("openmp-opt")) { dbgs() << TAG << "Replace globalization call "
<< *CB << " with " << AllocSize->getZExtValue
() << " bytes of shared memory\n"; } } while (false)
2765 << " bytes of shared memory\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("openmp-opt")) { dbgs() << TAG << "Replace globalization call "
<< *CB << " with " << AllocSize->getZExtValue
() << " bytes of shared memory\n"; } } while (false)
;
2766
2767 // Create a new shared memory buffer of the same size as the allocation
2768 // and replace all the uses of the original allocation with it.
2769 Module *M = CB->getModule();
2770 Type *Int8Ty = Type::getInt8Ty(M->getContext());
2771 Type *Int8ArrTy = ArrayType::get(Int8Ty, AllocSize->getZExtValue());
2772 auto *SharedMem = new GlobalVariable(
2773 *M, Int8ArrTy, /* IsConstant */ false, GlobalValue::InternalLinkage,
2774 UndefValue::get(Int8ArrTy), CB->getName(), nullptr,
2775 GlobalValue::NotThreadLocal,
2776 static_cast<unsigned>(AddressSpace::Shared));
2777 auto *NewBuffer =
2778 ConstantExpr::getPointerCast(SharedMem, Int8Ty->getPointerTo());
2779
2780 auto Remark = [&](OptimizationRemark OR) {
2781 return OR << "Replaced globalized variable with "
2782 << ore::NV("SharedMemory", AllocSize->getZExtValue())
2783 << ((AllocSize->getZExtValue() != 1) ? " bytes " : " byte ")
2784 << "of shared memory.";
2785 };
2786 A.emitRemark<OptimizationRemark>(CB, "OMP111", Remark);
2787
2788 SharedMem->setAlignment(MaybeAlign(32));
2789
2790 A.changeValueAfterManifest(*CB, *NewBuffer);
2791 A.deleteAfterManifest(*CB);
2792 A.deleteAfterManifest(*FreeCalls.front());
2793
2794 NumBytesMovedToSharedMemory += AllocSize->getZExtValue();
2795 Changed = ChangeStatus::CHANGED;
2796 }
2797
2798 return Changed;
2799 }
2800
2801 ChangeStatus updateImpl(Attributor &A) override {
2802 auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
2803 auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared];
2804 Function *F = getAnchorScope();
2805
2806 auto NumMallocCalls = MallocCalls.size();
2807
2808 // Only consider malloc calls executed by a single thread with a constant.
2809 for (User *U : RFI.Declaration->users()) {
2810 const auto &ED = A.getAAFor<AAExecutionDomain>(
2811 *this, IRPosition::function(*F), DepClassTy::REQUIRED);
2812 if (CallBase *CB = dyn_cast<CallBase>(U))
2813 if (!dyn_cast<ConstantInt>(CB->getArgOperand(0)) ||
2814 !ED.isExecutedByInitialThreadOnly(*CB))
2815 MallocCalls.erase(CB);
2816 }
2817
2818 findPotentialRemovedFreeCalls(A);
2819
2820 if (NumMallocCalls != MallocCalls.size())
2821 return ChangeStatus::CHANGED;
2822
2823 return ChangeStatus::UNCHANGED;
2824 }
2825
2826 /// Collection of all malloc calls in a function.
2827 SmallPtrSet<CallBase *, 4> MallocCalls;
2828 /// Collection of potentially removed free calls in a function.
2829 SmallPtrSet<CallBase *, 4> PotentialRemovedFreeCalls;
2830};
2831
2832struct AAKernelInfo : public StateWrapper<KernelInfoState, AbstractAttribute> {
2833 using Base = StateWrapper<KernelInfoState, AbstractAttribute>;
2834 AAKernelInfo(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
2835
2836 /// Statistics are tracked as part of manifest for now.
2837 void trackStatistics() const override {}
2838
2839 /// See AbstractAttribute::getAsStr()
2840 const std::string getAsStr() const override {
2841 if (!isValidState())
2842 return "<invalid>";
2843 return std::string(SPMDCompatibilityTracker.isAssumed() ? "SPMD"
2844 : "generic") +
2845 std::string(SPMDCompatibilityTracker.isAtFixpoint() ? " [FIX]"
2846 : "") +
2847 std::string(" #PRs: ") +
2848 (ReachedKnownParallelRegions.isValidState()
2849 ? std::to_string(ReachedKnownParallelRegions.size())
2850 : "<invalid>") +
2851 ", #Unknown PRs: " +
2852 (ReachedUnknownParallelRegions.isValidState()
2853 ? std::to_string(ReachedUnknownParallelRegions.size())
2854 : "<invalid>") +
2855 ", #Reaching Kernels: " +
2856 (ReachingKernelEntries.isValidState()
2857 ? std::to_string(ReachingKernelEntries.size())
2858 : "<invalid>");
2859 }
2860
2861 /// Create an abstract attribute biew for the position \p IRP.
2862 static AAKernelInfo &createForPosition(const IRPosition &IRP, Attributor &A);
2863
2864 /// See AbstractAttribute::getName()
2865 const std::string getName() const override { return "AAKernelInfo"; }
2866
2867 /// See AbstractAttribute::getIdAddr()
2868 const char *getIdAddr() const override { return &ID; }
2869
2870 /// This function should return true if the type of the \p AA is AAKernelInfo
2871 static bool classof(const AbstractAttribute *AA) {
2872 return (AA->getIdAddr() == &ID);
2873 }
2874
2875 static const char ID;
2876};
2877
2878/// The function kernel info abstract attribute, basically, what can we say
2879/// about a function with regards to the KernelInfoState.
2880struct AAKernelInfoFunction : AAKernelInfo {
2881 AAKernelInfoFunction(const IRPosition &IRP, Attributor &A)
2882 : AAKernelInfo(IRP, A) {}
2883
2884 SmallPtrSet<Instruction *, 4> GuardedInstructions;
2885
2886 SmallPtrSetImpl<Instruction *> &getGuardedInstructions() {
2887 return GuardedInstructions;
2888 }
2889
2890 /// See AbstractAttribute::initialize(...).
2891 void initialize(Attributor &A) override {
2892 // This is a high-level transform that might change the constant arguments
2893 // of the init and dinit calls. We need to tell the Attributor about this
2894 // to avoid other parts using the current constant value for simpliication.
2895 auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
2896
2897 Function *Fn = getAnchorScope();
2898 if (!OMPInfoCache.Kernels.count(Fn))
2899 return;
2900
2901 // Add itself to the reaching kernel and set IsKernelEntry.
2902 ReachingKernelEntries.insert(Fn);
2903 IsKernelEntry = true;
2904
2905 OMPInformationCache::RuntimeFunctionInfo &InitRFI =
2906 OMPInfoCache.RFIs[OMPRTL___kmpc_target_init];
2907 OMPInformationCache::RuntimeFunctionInfo &DeinitRFI =
2908 OMPInfoCache.RFIs[OMPRTL___kmpc_target_deinit];
2909
2910 // For kernels we perform more initialization work, first we find the init
2911 // and deinit calls.
2912 auto StoreCallBase = [](Use &U,
2913 OMPInformationCache::RuntimeFunctionInfo &RFI,
2914 CallBase *&Storage) {
2915 CallBase *CB = OpenMPOpt::getCallIfRegularCall(U, &RFI);
2916 assert(CB &&(static_cast <bool> (CB && "Unexpected use of __kmpc_target_init or __kmpc_target_deinit!"
) ? void (0) : __assert_fail ("CB && \"Unexpected use of __kmpc_target_init or __kmpc_target_deinit!\""
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp"
, 2917, __extension__ __PRETTY_FUNCTION__))
2917 "Unexpected use of __kmpc_target_init or __kmpc_target_deinit!")(static_cast <bool> (CB && "Unexpected use of __kmpc_target_init or __kmpc_target_deinit!"
) ? void (0) : __assert_fail ("CB && \"Unexpected use of __kmpc_target_init or __kmpc_target_deinit!\""
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp"
, 2917, __extension__ __PRETTY_FUNCTION__))
;
2918 assert(!Storage &&(static_cast <bool> (!Storage && "Multiple uses of __kmpc_target_init or __kmpc_target_deinit!"
) ? void (0) : __assert_fail ("!Storage && \"Multiple uses of __kmpc_target_init or __kmpc_target_deinit!\""
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp"
, 2919, __extension__ __PRETTY_FUNCTION__))
2919 "Multiple uses of __kmpc_target_init or __kmpc_target_deinit!")(static_cast <bool> (!Storage && "Multiple uses of __kmpc_target_init or __kmpc_target_deinit!"
) ? void (0) : __assert_fail ("!Storage && \"Multiple uses of __kmpc_target_init or __kmpc_target_deinit!\""
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp"
, 2919, __extension__ __PRETTY_FUNCTION__))
;
2920 Storage = CB;
2921 return false;
2922 };
2923 InitRFI.foreachUse(
2924 [&](Use &U, Function &) {
2925 StoreCallBase(U, InitRFI, KernelInitCB);
2926 return false;
2927 },
2928 Fn);
2929 DeinitRFI.foreachUse(
2930 [&](Use &U, Function &) {
2931 StoreCallBase(U, DeinitRFI, KernelDeinitCB);
2932 return false;
2933 },
2934 Fn);
2935
2936 // Ignore kernels without initializers such as global constructors.
2937 if (!KernelInitCB || !KernelDeinitCB) {
2938 indicateOptimisticFixpoint();
2939 return;
2940 }
2941
2942 // For kernels we might need to initialize/finalize the IsSPMD state and
2943 // we need to register a simplification callback so that the Attributor
2944 // knows the constant arguments to __kmpc_target_init and
2945 // __kmpc_target_deinit might actually change.
2946
2947 Attributor::SimplifictionCallbackTy StateMachineSimplifyCB =
2948 [&](const IRPosition &IRP, const AbstractAttribute *AA,
2949 bool &UsedAssumedInformation) -> Optional<Value *> {
2950 // IRP represents the "use generic state machine" argument of an
2951 // __kmpc_target_init call. We will answer this one with the internal
2952 // state. As long as we are not in an invalid state, we will create a
2953 // custom state machine so the value should be a `i1 false`. If we are
2954 // in an invalid state, we won't change the value that is in the IR.
2955 if (!ReachedKnownParallelRegions.isValidState())
2956 return nullptr;
2957 // If we have disabled state machine rewrites, don't make a custom one.
2958 if (DisableOpenMPOptStateMachineRewrite)
2959 return nullptr;
2960 if (AA)
2961 A.recordDependence(*this, *AA, DepClassTy::OPTIONAL);
2962 UsedAssumedInformation = !isAtFixpoint();
2963 auto *FalseVal =
2964 ConstantInt::getBool(IRP.getAnchorValue().getContext(), 0);
2965 return FalseVal;
2966 };
2967
2968 Attributor::SimplifictionCallbackTy ModeSimplifyCB =
2969 [&](const IRPosition &IRP, const AbstractAttribute *AA,
2970 bool &UsedAssumedInformation) -> Optional<Value *> {
2971 // IRP represents the "SPMDCompatibilityTracker" argument of an
2972 // __kmpc_target_init or
2973 // __kmpc_target_deinit call. We will answer this one with the internal
2974 // state.
2975 if (!SPMDCompatibilityTracker.isValidState())
2976 return nullptr;
2977 if (!SPMDCompatibilityTracker.isAtFixpoint()) {
2978 if (AA)
2979 A.recordDependence(*this, *AA, DepClassTy::OPTIONAL);
2980 UsedAssumedInformation = true;
2981 } else {
2982 UsedAssumedInformation = false;
2983 }
2984 auto *Val = ConstantInt::getSigned(
2985 IntegerType::getInt8Ty(IRP.getAnchorValue().getContext()),
2986 SPMDCompatibilityTracker.isAssumed() ? OMP_TGT_EXEC_MODE_SPMD
2987 : OMP_TGT_EXEC_MODE_GENERIC);
2988 return Val;
2989 };
2990
2991 Attributor::SimplifictionCallbackTy IsGenericModeSimplifyCB =
2992 [&](const IRPosition &IRP, const AbstractAttribute *AA,
2993 bool &UsedAssumedInformation) -> Optional<Value *> {
2994 // IRP represents the "RequiresFullRuntime" argument of an
2995 // __kmpc_target_init or __kmpc_target_deinit call. We will answer this
2996 // one with the internal state of the SPMDCompatibilityTracker, so if
2997 // generic then true, if SPMD then false.
2998 if (!SPMDCompatibilityTracker.isValidState())
2999 return nullptr;
3000 if (!SPMDCompatibilityTracker.isAtFixpoint()) {
3001 if (AA)
3002 A.recordDependence(*this, *AA, DepClassTy::OPTIONAL);
3003 UsedAssumedInformation = true;
3004 } else {
3005 UsedAssumedInformation = false;
3006 }
3007 auto *Val = ConstantInt::getBool(IRP.getAnchorValue().getContext(),
3008 !SPMDCompatibilityTracker.isAssumed());
3009 return Val;
3010 };
3011
3012 constexpr const int InitModeArgNo = 1;
3013 constexpr const int DeinitModeArgNo = 1;
3014 constexpr const int InitUseStateMachineArgNo = 2;
3015 constexpr const int InitRequiresFullRuntimeArgNo = 3;
3016 constexpr const int DeinitRequiresFullRuntimeArgNo = 2;
3017 A.registerSimplificationCallback(
3018 IRPosition::callsite_argument(*KernelInitCB, InitUseStateMachineArgNo),
3019 StateMachineSimplifyCB);
3020 A.registerSimplificationCallback(
3021 IRPosition::callsite_argument(*KernelInitCB, InitModeArgNo),
3022 ModeSimplifyCB);
3023 A.registerSimplificationCallback(
3024 IRPosition::callsite_argument(*KernelDeinitCB, DeinitModeArgNo),
3025 ModeSimplifyCB);
3026 A.registerSimplificationCallback(
3027 IRPosition::callsite_argument(*KernelInitCB,
3028 InitRequiresFullRuntimeArgNo),
3029 IsGenericModeSimplifyCB);
3030 A.registerSimplificationCallback(
3031 IRPosition::callsite_argument(*KernelDeinitCB,
3032 DeinitRequiresFullRuntimeArgNo),
3033 IsGenericModeSimplifyCB);
3034
3035 // Check if we know we are in SPMD-mode already.
3036 ConstantInt *ModeArg =
3037 dyn_cast<ConstantInt>(KernelInitCB->getArgOperand(InitModeArgNo));
3038 if (ModeArg && (ModeArg->getSExtValue() & OMP_TGT_EXEC_MODE_SPMD))
3039 SPMDCompatibilityTracker.indicateOptimisticFixpoint();
3040 // This is a generic region but SPMDization is disabled so stop tracking.
3041 else if (DisableOpenMPOptSPMDization)
3042 SPMDCompatibilityTracker.indicatePessimisticFixpoint();
3043 }
3044
3045 /// Sanitize the string \p S such that it is a suitable global symbol name.
3046 static std::string sanitizeForGlobalName(std::string S) {
3047 std::replace_if(
3048 S.begin(), S.end(),
3049 [](const char C) {
3050 return !((C >= 'a' && C <= 'z') || (C >= 'A' && C <= 'Z') ||
3051 (C >= '0' && C <= '9') || C == '_');
3052 },
3053 '.');
3054 return S;
3055 }
3056
3057 /// Modify the IR based on the KernelInfoState as the fixpoint iteration is
3058 /// finished now.
3059 ChangeStatus manifest(Attributor &A) override {
3060 // If we are not looking at a kernel with __kmpc_target_init and
3061 // __kmpc_target_deinit call we cannot actually manifest the information.
3062 if (!KernelInitCB || !KernelDeinitCB)
3063 return ChangeStatus::UNCHANGED;
3064
3065 // If we can we change the execution mode to SPMD-mode otherwise we build a
3066 // custom state machine.
3067 ChangeStatus Changed = ChangeStatus::UNCHANGED;
3068 if (!changeToSPMDMode(A, Changed))
3069 return buildCustomStateMachine(A);
3070
3071 return Changed;
3072 }
3073
3074 bool changeToSPMDMode(Attributor &A, ChangeStatus &Changed) {
3075 auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
3076
3077 if (!SPMDCompatibilityTracker.isAssumed()) {
3078 for (Instruction *NonCompatibleI : SPMDCompatibilityTracker) {
3079 if (!NonCompatibleI)
3080 continue;
3081
3082 // Skip diagnostics on calls to known OpenMP runtime functions for now.
3083 if (auto *CB = dyn_cast<CallBase>(NonCompatibleI))
3084 if (OMPInfoCache.RTLFunctions.contains(CB->getCalledFunction()))
3085 continue;
3086
3087 auto Remark = [&](OptimizationRemarkAnalysis ORA) {
3088 ORA << "Value has potential side effects preventing SPMD-mode "
3089 "execution";
3090 if (isa<CallBase>(NonCompatibleI)) {
3091 ORA << ". Add `__attribute__((assume(\"ompx_spmd_amenable\")))` to "
3092 "the called function to override";
3093 }
3094 return ORA << ".";
3095 };
3096 A.emitRemark<OptimizationRemarkAnalysis>(NonCompatibleI, "OMP121",
3097 Remark);
3098
3099 LLVM_DEBUG(dbgs() << TAG << "SPMD-incompatible side-effect: "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("openmp-opt")) { dbgs() << TAG << "SPMD-incompatible side-effect: "
<< *NonCompatibleI << "\n"; } } while (false)
3100 << *NonCompatibleI << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("openmp-opt")) { dbgs() << TAG << "SPMD-incompatible side-effect: "
<< *NonCompatibleI << "\n"; } } while (false)
;
3101 }
3102
3103 return false;
3104 }
3105
3106 // Check if the kernel is already in SPMD mode, if so, return success.
3107 Function *Kernel = getAnchorScope();
3108 GlobalVariable *ExecMode = Kernel->getParent()->getGlobalVariable(
3109 (Kernel->getName() + "_exec_mode").str());
3110 assert(ExecMode && "Kernel without exec mode?")(static_cast <bool> (ExecMode && "Kernel without exec mode?"
) ? void (0) : __assert_fail ("ExecMode && \"Kernel without exec mode?\""
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp"
, 3110, __extension__ __PRETTY_FUNCTION__))
;
3111 assert(ExecMode->getInitializer() && "ExecMode doesn't have initializer!")(static_cast <bool> (ExecMode->getInitializer() &&
"ExecMode doesn't have initializer!") ? void (0) : __assert_fail
("ExecMode->getInitializer() && \"ExecMode doesn't have initializer!\""
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp"
, 3111, __extension__ __PRETTY_FUNCTION__))
;
3112
3113 // Set the global exec mode flag to indicate SPMD-Generic mode.
3114 assert(isa<ConstantInt>(ExecMode->getInitializer()) &&(static_cast <bool> (isa<ConstantInt>(ExecMode->
getInitializer()) && "ExecMode is not an integer!") ?
void (0) : __assert_fail ("isa<ConstantInt>(ExecMode->getInitializer()) && \"ExecMode is not an integer!\""
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp"
, 3115, __extension__ __PRETTY_FUNCTION__))
3115 "ExecMode is not an integer!")(static_cast <bool> (isa<ConstantInt>(ExecMode->
getInitializer()) && "ExecMode is not an integer!") ?
void (0) : __assert_fail ("isa<ConstantInt>(ExecMode->getInitializer()) && \"ExecMode is not an integer!\""
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp"
, 3115, __extension__ __PRETTY_FUNCTION__))
;
3116 const int8_t ExecModeVal =
3117 cast<ConstantInt>(ExecMode->getInitializer())->getSExtValue();
3118 if (ExecModeVal != OMP_TGT_EXEC_MODE_GENERIC)
3119 return true;
3120
3121 // We will now unconditionally modify the IR, indicate a change.
3122 Changed = ChangeStatus::CHANGED;
3123
3124 auto CreateGuardedRegion = [&](Instruction *RegionStartI,
3125 Instruction *RegionEndI) {
3126 LoopInfo *LI = nullptr;
3127 DominatorTree *DT = nullptr;
3128 MemorySSAUpdater *MSU = nullptr;
3129 using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
3130
3131 BasicBlock *ParentBB = RegionStartI->getParent();
3132 Function *Fn = ParentBB->getParent();
3133 Module &M = *Fn->getParent();
3134
3135 // Create all the blocks and logic.
3136 // ParentBB:
3137 // goto RegionCheckTidBB
3138 // RegionCheckTidBB:
3139 // Tid = __kmpc_hardware_thread_id()
3140 // if (Tid != 0)
3141 // goto RegionBarrierBB
3142 // RegionStartBB:
3143 // <execute instructions guarded>
3144 // goto RegionEndBB
3145 // RegionEndBB:
3146 // <store escaping values to shared mem>
3147 // goto RegionBarrierBB
3148 // RegionBarrierBB:
3149 // __kmpc_simple_barrier_spmd()
3150 // // second barrier is omitted if lacking escaping values.
3151 // <load escaping values from shared mem>
3152 // __kmpc_simple_barrier_spmd()
3153 // goto RegionExitBB
3154 // RegionExitBB:
3155 // <execute rest of instructions>
3156
3157 BasicBlock *RegionEndBB = SplitBlock(ParentBB, RegionEndI->getNextNode(),
3158 DT, LI, MSU, "region.guarded.end");
3159 BasicBlock *RegionBarrierBB =
3160 SplitBlock(RegionEndBB, &*RegionEndBB->getFirstInsertionPt(), DT, LI,
3161 MSU, "region.barrier");
3162 BasicBlock *RegionExitBB =
3163 SplitBlock(RegionBarrierBB, &*RegionBarrierBB->getFirstInsertionPt(),
3164 DT, LI, MSU, "region.exit");
3165 BasicBlock *RegionStartBB =
3166 SplitBlock(ParentBB, RegionStartI, DT, LI, MSU, "region.guarded");
3167
3168 assert(ParentBB->getUniqueSuccessor() == RegionStartBB &&(static_cast <bool> (ParentBB->getUniqueSuccessor() ==
RegionStartBB && "Expected a different CFG") ? void (
0) : __assert_fail ("ParentBB->getUniqueSuccessor() == RegionStartBB && \"Expected a different CFG\""
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp"
, 3169, __extension__ __PRETTY_FUNCTION__))
3169 "Expected a different CFG")(static_cast <bool> (ParentBB->getUniqueSuccessor() ==
RegionStartBB && "Expected a different CFG") ? void (
0) : __assert_fail ("ParentBB->getUniqueSuccessor() == RegionStartBB && \"Expected a different CFG\""
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp"
, 3169, __extension__ __PRETTY_FUNCTION__))
;
3170
3171 BasicBlock *RegionCheckTidBB = SplitBlock(
3172 ParentBB, ParentBB->getTerminator(), DT, LI, MSU, "region.check.tid");
3173
3174 // Register basic blocks with the Attributor.
3175 A.registerManifestAddedBasicBlock(*RegionEndBB);
3176 A.registerManifestAddedBasicBlock(*RegionBarrierBB);
3177 A.registerManifestAddedBasicBlock(*RegionExitBB);
3178 A.registerManifestAddedBasicBlock(*RegionStartBB);
3179 A.registerManifestAddedBasicBlock(*RegionCheckTidBB);
3180
3181 bool HasBroadcastValues = false;
3182 // Find escaping outputs from the guarded region to outside users and
3183 // broadcast their values to them.
3184 for (Instruction &I : *RegionStartBB) {
3185 SmallPtrSet<Instruction *, 4> OutsideUsers;
3186 for (User *Usr : I.users()) {
3187 Instruction &UsrI = *cast<Instruction>(Usr);
3188 if (UsrI.getParent() != RegionStartBB)
3189 OutsideUsers.insert(&UsrI);
3190 }
3191
3192 if (OutsideUsers.empty())
3193 continue;
3194
3195 HasBroadcastValues = true;
3196
3197 // Emit a global variable in shared memory to store the broadcasted
3198 // value.
3199 auto *SharedMem = new GlobalVariable(
3200 M, I.getType(), /* IsConstant */ false,
3201 GlobalValue::InternalLinkage, UndefValue::get(I.getType()),
3202 sanitizeForGlobalName(
3203 (I.getName() + ".guarded.output.alloc").str()),
3204 nullptr, GlobalValue::NotThreadLocal,
3205 static_cast<unsigned>(AddressSpace::Shared));
3206
3207 // Emit a store instruction to update the value.
3208 new StoreInst(&I, SharedMem, RegionEndBB->getTerminator());
3209
3210 LoadInst *LoadI = new LoadInst(I.getType(), SharedMem,
3211 I.getName() + ".guarded.output.load",
3212 RegionBarrierBB->getTerminator());
3213
3214 // Emit a load instruction and replace uses of the output value.
3215 for (Instruction *UsrI : OutsideUsers)
3216 UsrI->replaceUsesOfWith(&I, LoadI);
3217 }
3218
3219 auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
3220
3221 // Go to tid check BB in ParentBB.
3222 const DebugLoc DL = ParentBB->getTerminator()->getDebugLoc();
3223 ParentBB->getTerminator()->eraseFromParent();
3224 OpenMPIRBuilder::LocationDescription Loc(
3225 InsertPointTy(ParentBB, ParentBB->end()), DL);
3226 OMPInfoCache.OMPBuilder.updateToLocation(Loc);
3227 auto *SrcLocStr = OMPInfoCache.OMPBuilder.getOrCreateSrcLocStr(Loc);
3228 Value *Ident = OMPInfoCache.OMPBuilder.getOrCreateIdent(SrcLocStr);
3229 BranchInst::Create(RegionCheckTidBB, ParentBB)->setDebugLoc(DL);
3230
3231 // Add check for Tid in RegionCheckTidBB
3232 RegionCheckTidBB->getTerminator()->eraseFromParent();
3233 OpenMPIRBuilder::LocationDescription LocRegionCheckTid(
3234 InsertPointTy(RegionCheckTidBB, RegionCheckTidBB->end()), DL);
3235 OMPInfoCache.OMPBuilder.updateToLocation(LocRegionCheckTid);
3236 FunctionCallee HardwareTidFn =
3237 OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
3238 M, OMPRTL___kmpc_get_hardware_thread_id_in_block);
3239 Value *Tid =
3240 OMPInfoCache.OMPBuilder.Builder.CreateCall(HardwareTidFn, {});
3241 Value *TidCheck = OMPInfoCache.OMPBuilder.Builder.CreateIsNull(Tid);
3242 OMPInfoCache.OMPBuilder.Builder
3243 .CreateCondBr(TidCheck, RegionStartBB, RegionBarrierBB)
3244 ->setDebugLoc(DL);
3245
3246 // First barrier for synchronization, ensures main thread has updated
3247 // values.
3248 FunctionCallee BarrierFn =
3249 OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
3250 M, OMPRTL___kmpc_barrier_simple_spmd);
3251 OMPInfoCache.OMPBuilder.updateToLocation(InsertPointTy(
3252 RegionBarrierBB, RegionBarrierBB->getFirstInsertionPt()));
3253 OMPInfoCache.OMPBuilder.Builder.CreateCall(BarrierFn, {Ident, Tid})
3254 ->setDebugLoc(DL);
3255
3256 // Second barrier ensures workers have read broadcast values.
3257 if (HasBroadcastValues)
3258 CallInst::Create(BarrierFn, {Ident, Tid}, "",
3259 RegionBarrierBB->getTerminator())
3260 ->setDebugLoc(DL);
3261 };
3262
3263 auto &AllocSharedRFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared];
3264 SmallPtrSet<BasicBlock *, 8> Visited;
3265 for (Instruction *GuardedI : SPMDCompatibilityTracker) {
3266 BasicBlock *BB = GuardedI->getParent();
3267 if (!Visited.insert(BB).second)
3268 continue;
3269
3270 SmallVector<std::pair<Instruction *, Instruction *>> Reorders;
3271 Instruction *LastEffect = nullptr;
3272 BasicBlock::reverse_iterator IP = BB->rbegin(), IPEnd = BB->rend();
3273 while (++IP != IPEnd) {
3274 if (!IP->mayHaveSideEffects() && !IP->mayReadFromMemory())
3275 continue;
3276 Instruction *I = &*IP;
3277 if (OpenMPOpt::getCallIfRegularCall(*I, &AllocSharedRFI))
3278 continue;
3279 if (!I->user_empty() || !SPMDCompatibilityTracker.contains(I)) {
3280 LastEffect = nullptr;
3281 continue;
3282 }
3283 if (LastEffect)
3284 Reorders.push_back({I, LastEffect});
3285 LastEffect = &*IP;
3286 }
3287 for (auto &Reorder : Reorders)
3288 Reorder.first->moveBefore(Reorder.second);
3289 }
3290
3291 SmallVector<std::pair<Instruction *, Instruction *>, 4> GuardedRegions;
3292
3293 for (Instruction *GuardedI : SPMDCompatibilityTracker) {
3294 BasicBlock *BB = GuardedI->getParent();
3295 auto *CalleeAA = A.lookupAAFor<AAKernelInfo>(
3296 IRPosition::function(*GuardedI->getFunction()), nullptr,
3297 DepClassTy::NONE);
3298 assert(CalleeAA != nullptr && "Expected Callee AAKernelInfo")(static_cast <bool> (CalleeAA != nullptr && "Expected Callee AAKernelInfo"
) ? void (0) : __assert_fail ("CalleeAA != nullptr && \"Expected Callee AAKernelInfo\""
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp"
, 3298, __extension__ __PRETTY_FUNCTION__))
;
3299 auto &CalleeAAFunction = *cast<AAKernelInfoFunction>(CalleeAA);
3300 // Continue if instruction is already guarded.
3301 if (CalleeAAFunction.getGuardedInstructions().contains(GuardedI))
3302 continue;
3303
3304 Instruction *GuardedRegionStart = nullptr, *GuardedRegionEnd = nullptr;
3305 for (Instruction &I : *BB) {
3306 // If instruction I needs to be guarded update the guarded region
3307 // bounds.
3308 if (SPMDCompatibilityTracker.contains(&I)) {
3309 CalleeAAFunction.getGuardedInstructions().insert(&I);
3310 if (GuardedRegionStart)
3311 GuardedRegionEnd = &I;
3312 else
3313 GuardedRegionStart = GuardedRegionEnd = &I;
3314
3315 continue;
3316 }
3317
3318 // Instruction I does not need guarding, store
3319 // any region found and reset bounds.
3320 if (GuardedRegionStart) {
3321 GuardedRegions.push_back(
3322 std::make_pair(GuardedRegionStart, GuardedRegionEnd));
3323 GuardedRegionStart = nullptr;
3324 GuardedRegionEnd = nullptr;
3325 }
3326 }
3327 }
3328
3329 for (auto &GR : GuardedRegions)
3330 CreateGuardedRegion(GR.first, GR.second);
3331
3332 // Adjust the global exec mode flag that tells the runtime what mode this
3333 // kernel is executed in.
3334 assert(ExecModeVal == OMP_TGT_EXEC_MODE_GENERIC &&(static_cast <bool> (ExecModeVal == OMP_TGT_EXEC_MODE_GENERIC
&& "Initially non-SPMD kernel has SPMD exec mode!") ?
void (0) : __assert_fail ("ExecModeVal == OMP_TGT_EXEC_MODE_GENERIC && \"Initially non-SPMD kernel has SPMD exec mode!\""
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp"
, 3335, __extension__ __PRETTY_FUNCTION__))
3335 "Initially non-SPMD kernel has SPMD exec mode!")(static_cast <bool> (ExecModeVal == OMP_TGT_EXEC_MODE_GENERIC
&& "Initially non-SPMD kernel has SPMD exec mode!") ?
void (0) : __assert_fail ("ExecModeVal == OMP_TGT_EXEC_MODE_GENERIC && \"Initially non-SPMD kernel has SPMD exec mode!\""
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp"
, 3335, __extension__ __PRETTY_FUNCTION__))
;
3336 ExecMode->setInitializer(
3337 ConstantInt::get(ExecMode->getInitializer()->getType(),
3338 ExecModeVal | OMP_TGT_EXEC_MODE_GENERIC_SPMD));
3339
3340 // Next rewrite the init and deinit calls to indicate we use SPMD-mode now.
3341 const int InitModeArgNo = 1;
3342 const int DeinitModeArgNo = 1;
3343 const int InitUseStateMachineArgNo = 2;
3344 const int InitRequiresFullRuntimeArgNo = 3;
3345 const int DeinitRequiresFullRuntimeArgNo = 2;
3346
3347 auto &Ctx = getAnchorValue().getContext();
3348 A.changeUseAfterManifest(
3349 KernelInitCB->getArgOperandUse(InitModeArgNo),
3350 *ConstantInt::getSigned(IntegerType::getInt8Ty(Ctx),
3351 OMP_TGT_EXEC_MODE_SPMD));
3352 A.changeUseAfterManifest(
3353 KernelInitCB->getArgOperandUse(InitUseStateMachineArgNo),
3354 *ConstantInt::getBool(Ctx, 0));
3355 A.changeUseAfterManifest(
3356 KernelDeinitCB->getArgOperandUse(DeinitModeArgNo),
3357 *ConstantInt::getSigned(IntegerType::getInt8Ty(Ctx),
3358 OMP_TGT_EXEC_MODE_SPMD));
3359 A.changeUseAfterManifest(
3360 KernelInitCB->getArgOperandUse(InitRequiresFullRuntimeArgNo),
3361 *ConstantInt::getBool(Ctx, 0));
3362 A.changeUseAfterManifest(
3363 KernelDeinitCB->getArgOperandUse(DeinitRequiresFullRuntimeArgNo),
3364 *ConstantInt::getBool(Ctx, 0));
3365
3366 ++NumOpenMPTargetRegionKernelsSPMD;
3367
3368 auto Remark = [&](OptimizationRemark OR) {
3369 return OR << "Transformed generic-mode kernel to SPMD-mode.";
3370 };
3371 A.emitRemark<OptimizationRemark>(KernelInitCB, "OMP120", Remark);
3372 return true;
3373 };
3374
3375 ChangeStatus buildCustomStateMachine(Attributor &A) {
3376 // If we have disabled state machine rewrites, don't make a custom one
3377 if (DisableOpenMPOptStateMachineRewrite)
3378 return ChangeStatus::UNCHANGED;
3379
3380 // Don't rewrite the state machine if we are not in a valid state.
3381 if (!ReachedKnownParallelRegions.isValidState())
3382 return ChangeStatus::UNCHANGED;
3383
3384 const int InitModeArgNo = 1;
3385 const int InitUseStateMachineArgNo = 2;
3386
3387 // Check if the current configuration is non-SPMD and generic state machine.
3388 // If we already have SPMD mode or a custom state machine we do not need to
3389 // go any further. If it is anything but a constant something is weird and
3390 // we give up.
3391 ConstantInt *UseStateMachine = dyn_cast<ConstantInt>(
3392 KernelInitCB->getArgOperand(InitUseStateMachineArgNo));
3393 ConstantInt *Mode =
3394 dyn_cast<ConstantInt>(KernelInitCB->getArgOperand(InitModeArgNo));
3395
3396 // If we are stuck with generic mode, try to create a custom device (=GPU)
3397 // state machine which is specialized for the parallel regions that are
3398 // reachable by the kernel.
3399 if (!UseStateMachine || UseStateMachine->isZero() || !Mode ||
3400 (Mode->getSExtValue() & OMP_TGT_EXEC_MODE_SPMD))
3401 return ChangeStatus::UNCHANGED;
3402
3403 // If not SPMD mode, indicate we use a custom state machine now.
3404 auto &Ctx = getAnchorValue().getContext();
3405 auto *FalseVal = ConstantInt::getBool(Ctx, 0);
3406 A.changeUseAfterManifest(
3407 KernelInitCB->getArgOperandUse(InitUseStateMachineArgNo), *FalseVal);
3408
3409 // If we don't actually need a state machine we are done here. This can
3410 // happen if there simply are no parallel regions. In the resulting kernel
3411 // all worker threads will simply exit right away, leaving the main thread
3412 // to do the work alone.
3413 if (!mayContainParallelRegion()) {
3414 ++NumOpenMPTargetRegionKernelsWithoutStateMachine;
3415
3416 auto Remark = [&](OptimizationRemark OR) {
3417 return OR << "Removing unused state machine from generic-mode kernel.";
3418 };
3419 A.emitRemark<OptimizationRemark>(KernelInitCB, "OMP130", Remark);
3420
3421 return ChangeStatus::CHANGED;
3422 }
3423
3424 // Keep track in the statistics of our new shiny custom state machine.
3425 if (ReachedUnknownParallelRegions.empty()) {
3426 ++NumOpenMPTargetRegionKernelsCustomStateMachineWithoutFallback;
3427
3428 auto Remark = [&](OptimizationRemark OR) {
3429 return OR << "Rewriting generic-mode kernel with a customized state "
3430 "machine.";
3431 };
3432 A.emitRemark<OptimizationRemark>(KernelInitCB, "OMP131", Remark);
3433 } else {
3434 ++NumOpenMPTargetRegionKernelsCustomStateMachineWithFallback;
3435
3436 auto Remark = [&](OptimizationRemarkAnalysis OR) {
3437 return OR << "Generic-mode kernel is executed with a customized state "
3438 "machine that requires a fallback.";
3439 };
3440 A.emitRemark<OptimizationRemarkAnalysis>(KernelInitCB, "OMP132", Remark);
3441
3442 // Tell the user why we ended up with a fallback.
3443 for (CallBase *UnknownParallelRegionCB : ReachedUnknownParallelRegions) {
3444 if (!UnknownParallelRegionCB)
3445 continue;
3446 auto Remark = [&](OptimizationRemarkAnalysis ORA) {
3447 return ORA << "Call may contain unknown parallel regions. Use "
3448 << "`__attribute__((assume(\"omp_no_parallelism\")))` to "
3449 "override.";
3450 };
3451 A.emitRemark<OptimizationRemarkAnalysis>(UnknownParallelRegionCB,
3452 "OMP133", Remark);
3453 }
3454 }
3455
3456 // Create all the blocks:
3457 //
3458 // InitCB = __kmpc_target_init(...)
3459 // bool IsWorker = InitCB >= 0;
3460 // if (IsWorker) {
3461 // SMBeginBB: __kmpc_barrier_simple_generic(...);
3462 // void *WorkFn;
3463 // bool Active = __kmpc_kernel_parallel(&WorkFn);
3464 // if (!WorkFn) return;
3465 // SMIsActiveCheckBB: if (Active) {
3466 // SMIfCascadeCurrentBB: if (WorkFn == <ParFn0>)
3467 // ParFn0(...);
3468 // SMIfCascadeCurrentBB: else if (WorkFn == <ParFn1>)
3469 // ParFn1(...);
3470 // ...
3471 // SMIfCascadeCurrentBB: else
3472 // ((WorkFnTy*)WorkFn)(...);
3473 // SMEndParallelBB: __kmpc_kernel_end_parallel(...);
3474 // }
3475 // SMDoneBB: __kmpc_barrier_simple_generic(...);
3476 // goto SMBeginBB;
3477 // }
3478 // UserCodeEntryBB: // user code
3479 // __kmpc_target_deinit(...)
3480 //
3481 Function *Kernel = getAssociatedFunction();
3482 assert(Kernel && "Expected an associated function!")(static_cast <bool> (Kernel && "Expected an associated function!"
) ? void (0) : __assert_fail ("Kernel && \"Expected an associated function!\""
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp"
, 3482, __extension__ __PRETTY_FUNCTION__))
;
3483
3484 BasicBlock *InitBB = KernelInitCB->getParent();
3485 BasicBlock *UserCodeEntryBB = InitBB->splitBasicBlock(
3486 KernelInitCB->getNextNode(), "thread.user_code.check");
3487 BasicBlock *StateMachineBeginBB = BasicBlock::Create(
3488 Ctx, "worker_state_machine.begin", Kernel, UserCodeEntryBB);
3489 BasicBlock *StateMachineFinishedBB = BasicBlock::Create(
3490 Ctx, "worker_state_machine.finished", Kernel, UserCodeEntryBB);
3491 BasicBlock *StateMachineIsActiveCheckBB = BasicBlock::Create(
3492 Ctx, "worker_state_machine.is_active.check", Kernel, UserCodeEntryBB);
3493 BasicBlock *StateMachineIfCascadeCurrentBB =
3494 BasicBlock::Create(Ctx, "worker_state_machine.parallel_region.check",
3495 Kernel, UserCodeEntryBB);
3496 BasicBlock *StateMachineEndParallelBB =
3497 BasicBlock::Create(Ctx, "worker_state_machine.parallel_region.end",
3498 Kernel, UserCodeEntryBB);
3499 BasicBlock *StateMachineDoneBarrierBB = BasicBlock::Create(
3500 Ctx, "worker_state_machine.done.barrier", Kernel, UserCodeEntryBB);
3501 A.registerManifestAddedBasicBlock(*InitBB);
3502 A.registerManifestAddedBasicBlock(*UserCodeEntryBB);
3503 A.registerManifestAddedBasicBlock(*StateMachineBeginBB);
3504 A.registerManifestAddedBasicBlock(*StateMachineFinishedBB);
3505 A.registerManifestAddedBasicBlock(*StateMachineIsActiveCheckBB);
3506 A.registerManifestAddedBasicBlock(*StateMachineIfCascadeCurrentBB);
3507 A.registerManifestAddedBasicBlock(*StateMachineEndParallelBB);
3508 A.registerManifestAddedBasicBlock(*StateMachineDoneBarrierBB);
3509
3510 const DebugLoc &DLoc = KernelInitCB->getDebugLoc();
3511 ReturnInst::Create(Ctx, StateMachineFinishedBB)->setDebugLoc(DLoc);
3512
3513 InitBB->getTerminator()->eraseFromParent();
3514 Instruction *IsWorker =
3515 ICmpInst::Create(ICmpInst::ICmp, llvm::CmpInst::ICMP_NE, KernelInitCB,
3516 ConstantInt::get(KernelInitCB->getType(), -1),
3517 "thread.is_worker", InitBB);
3518 IsWorker->setDebugLoc(DLoc);
3519 BranchInst::Create(StateMachineBeginBB, UserCodeEntryBB, IsWorker, InitBB);
3520
3521 Module &M = *Kernel->getParent();
3522
3523 // Create local storage for the work function pointer.
3524 const DataLayout &DL = M.getDataLayout();
3525 Type *VoidPtrTy = Type::getInt8PtrTy(Ctx);
3526 Instruction *WorkFnAI =
3527 new AllocaInst(VoidPtrTy, DL.getAllocaAddrSpace(), nullptr,
3528 "worker.work_fn.addr", &Kernel->getEntryBlock().front());
3529 WorkFnAI->setDebugLoc(DLoc);
3530
3531 auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
3532 OMPInfoCache.OMPBuilder.updateToLocation(
3533 OpenMPIRBuilder::LocationDescription(
3534 IRBuilder<>::InsertPoint(StateMachineBeginBB,
3535 StateMachineBeginBB->end()),
3536 DLoc));
3537
3538 Value *Ident = KernelInitCB->getArgOperand(0);
3539 Value *GTid = KernelInitCB;
3540
3541 FunctionCallee BarrierFn =
3542 OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
3543 M, OMPRTL___kmpc_barrier_simple_generic);
3544 CallInst::Create(BarrierFn, {Ident, GTid}, "", StateMachineBeginBB)
3545 ->setDebugLoc(DLoc);
3546
3547 if (WorkFnAI->getType()->getPointerAddressSpace() !=
3548 (unsigned int)AddressSpace::Generic) {
3549 WorkFnAI = new AddrSpaceCastInst(
3550 WorkFnAI,
3551 PointerType::getWithSamePointeeType(
3552 cast<PointerType>(WorkFnAI->getType()),
3553 (unsigned int)AddressSpace::Generic),
3554 WorkFnAI->getName() + ".generic", StateMachineBeginBB);
3555 WorkFnAI->setDebugLoc(DLoc);
3556 }
3557
3558 FunctionCallee KernelParallelFn =
3559 OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
3560 M, OMPRTL___kmpc_kernel_parallel);
3561 Instruction *IsActiveWorker = CallInst::Create(
3562 KernelParallelFn, {WorkFnAI}, "worker.is_active", StateMachineBeginBB);
3563 IsActiveWorker->setDebugLoc(DLoc);
3564 Instruction *WorkFn = new LoadInst(VoidPtrTy, WorkFnAI, "worker.work_fn",
3565 StateMachineBeginBB);
3566 WorkFn->setDebugLoc(DLoc);
3567
3568 FunctionType *ParallelRegionFnTy = FunctionType::get(
3569 Type::getVoidTy(Ctx), {Type::getInt16Ty(Ctx), Type::getInt32Ty(Ctx)},
3570 false);
3571 Value *WorkFnCast = BitCastInst::CreatePointerBitCastOrAddrSpaceCast(
3572 WorkFn, ParallelRegionFnTy->getPointerTo(), "worker.work_fn.addr_cast",
3573 StateMachineBeginBB);
3574
3575 Instruction *IsDone =
3576 ICmpInst::Create(ICmpInst::ICmp, llvm::CmpInst::ICMP_EQ, WorkFn,
3577 Constant::getNullValue(VoidPtrTy), "worker.is_done",
3578 StateMachineBeginBB);
3579 IsDone->setDebugLoc(DLoc);
3580 BranchInst::Create(StateMachineFinishedBB, StateMachineIsActiveCheckBB,
3581 IsDone, StateMachineBeginBB)
3582 ->setDebugLoc(DLoc);
3583
3584 BranchInst::Create(StateMachineIfCascadeCurrentBB,
3585 StateMachineDoneBarrierBB, IsActiveWorker,
3586 StateMachineIsActiveCheckBB)
3587 ->setDebugLoc(DLoc);
3588
3589 Value *ZeroArg =
3590 Constant::getNullValue(ParallelRegionFnTy->getParamType(0));
3591
3592 // Now that we have most of the CFG skeleton it is time for the if-cascade
3593 // that checks the function pointer we got from the runtime against the
3594 // parallel regions we expect, if there are any.
3595 for (int I = 0, E = ReachedKnownParallelRegions.size(); I < E; ++I) {
3596 auto *ParallelRegion = ReachedKnownParallelRegions[I];
3597 BasicBlock *PRExecuteBB = BasicBlock::Create(
3598 Ctx, "worker_state_machine.parallel_region.execute", Kernel,
3599 StateMachineEndParallelBB);
3600 CallInst::Create(ParallelRegion, {ZeroArg, GTid}, "", PRExecuteBB)
3601 ->setDebugLoc(DLoc);
3602 BranchInst::Create(StateMachineEndParallelBB, PRExecuteBB)
3603 ->setDebugLoc(DLoc);
3604
3605 BasicBlock *PRNextBB =
3606 BasicBlock::Create(Ctx, "worker_state_machine.parallel_region.check",
3607 Kernel, StateMachineEndParallelBB);
3608
3609 // Check if we need to compare the pointer at all or if we can just
3610 // call the parallel region function.
3611 Value *IsPR;
3612 if (I + 1 < E || !ReachedUnknownParallelRegions.empty()) {
3613 Instruction *CmpI = ICmpInst::Create(
3614 ICmpInst::ICmp, llvm::CmpInst::ICMP_EQ, WorkFnCast, ParallelRegion,
3615 "worker.check_parallel_region", StateMachineIfCascadeCurrentBB);
3616 CmpI->setDebugLoc(DLoc);
3617 IsPR = CmpI;
3618 } else {
3619 IsPR = ConstantInt::getTrue(Ctx);
3620 }
3621
3622 BranchInst::Create(PRExecuteBB, PRNextBB, IsPR,
3623 StateMachineIfCascadeCurrentBB)
3624 ->setDebugLoc(DLoc);
3625 StateMachineIfCascadeCurrentBB = PRNextBB;
3626 }
3627
3628 // At the end of the if-cascade we place the indirect function pointer call
3629 // in case we might need it, that is if there can be parallel regions we
3630 // have not handled in the if-cascade above.
3631 if (!ReachedUnknownParallelRegions.empty()) {
3632 StateMachineIfCascadeCurrentBB->setName(
3633 "worker_state_machine.parallel_region.fallback.execute");
3634 CallInst::Create(ParallelRegionFnTy, WorkFnCast, {ZeroArg, GTid}, "",
3635 StateMachineIfCascadeCurrentBB)
3636 ->setDebugLoc(DLoc);
3637 }
3638 BranchInst::Create(StateMachineEndParallelBB,
3639 StateMachineIfCascadeCurrentBB)
3640 ->setDebugLoc(DLoc);
3641
3642 CallInst::Create(OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
3643 M, OMPRTL___kmpc_kernel_end_parallel),
3644 {}, "", StateMachineEndParallelBB)
3645 ->setDebugLoc(DLoc);
3646 BranchInst::Create(StateMachineDoneBarrierBB, StateMachineEndParallelBB)
3647 ->setDebugLoc(DLoc);
3648
3649 CallInst::Create(BarrierFn, {Ident, GTid}, "", StateMachineDoneBarrierBB)
3650 ->setDebugLoc(DLoc);
3651 BranchInst::Create(StateMachineBeginBB, StateMachineDoneBarrierBB)
3652 ->setDebugLoc(DLoc);
3653
3654 return ChangeStatus::CHANGED;
3655 }
3656
3657 /// Fixpoint iteration update function. Will be called every time a dependence
3658 /// changed its state (and in the beginning).
3659 ChangeStatus updateImpl(Attributor &A) override {
3660 KernelInfoState StateBefore = getState();
3661
3662 // Callback to check a read/write instruction.
3663 auto CheckRWInst = [&](Instruction &I) {
3664 // We handle calls later.
3665 if (isa<CallBase>(I))
3666 return true;
3667 // We only care about write effects.
3668 if (!I.mayWriteToMemory())
3669 return true;
3670 if (auto *SI = dyn_cast<StoreInst>(&I)) {
3671 SmallVector<const Value *> Objects;
3672 getUnderlyingObjects(SI->getPointerOperand(), Objects);
3673 if (llvm::all_of(Objects,
3674 [](const Value *Obj) { return isa<AllocaInst>(Obj); }))
3675 return true;
3676 // Check for AAHeapToStack moved objects which must not be guarded.
3677 auto &HS = A.getAAFor<AAHeapToStack>(
3678 *this, IRPosition::function(*I.getFunction()),
3679 DepClassTy::OPTIONAL);
3680 if (llvm::all_of(Objects, [&HS](const Value *Obj) {
3681 auto *CB = dyn_cast<CallBase>(Obj);
3682 if (!CB)
3683 return false;
3684 return HS.isAssumedHeapToStack(*CB);
3685 })) {
3686 return true;
3687 }
3688 }
3689
3690 // Insert instruction that needs guarding.
3691 SPMDCompatibilityTracker.insert(&I);
3692 return true;
3693 };
3694
3695 bool UsedAssumedInformationInCheckRWInst = false;
3696 if (!SPMDCompatibilityTracker.isAtFixpoint())
3697 if (!A.checkForAllReadWriteInstructions(
3698 CheckRWInst, *this, UsedAssumedInformationInCheckRWInst))
3699 SPMDCompatibilityTracker.indicatePessimisticFixpoint();
3700
3701 if (!IsKernelEntry) {
3702 updateReachingKernelEntries(A);
3703 updateParallelLevels(A);
3704
3705 if (!ParallelLevels.isValidState())
3706 SPMDCompatibilityTracker.indicatePessimisticFixpoint();
3707 }
3708
3709 // Callback to check a call instruction.
3710 bool AllParallelRegionStatesWereFixed = true;
3711 bool AllSPMDStatesWereFixed = true;
3712 auto CheckCallInst = [&](Instruction &I) {
3713 auto &CB = cast<CallBase>(I);
3714 auto &CBAA = A.getAAFor<AAKernelInfo>(
3715 *this, IRPosition::callsite_function(CB), DepClassTy::OPTIONAL);
3716 getState() ^= CBAA.getState();
3717 AllSPMDStatesWereFixed &= CBAA.SPMDCompatibilityTracker.isAtFixpoint();
3718 AllParallelRegionStatesWereFixed &=
3719 CBAA.ReachedKnownParallelRegions.isAtFixpoint();
3720 AllParallelRegionStatesWereFixed &=
3721 CBAA.ReachedUnknownParallelRegions.isAtFixpoint();
3722 return true;
3723 };
3724
3725 bool UsedAssumedInformationInCheckCallInst = false;
3726 if (!A.checkForAllCallLikeInstructions(
3727 CheckCallInst, *this, UsedAssumedInformationInCheckCallInst)) {
3728 LLVM_DEBUG(dbgs() << TAGdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("openmp-opt")) { dbgs() << TAG << "Failed to visit all call-like instructions!\n"
;; } } while (false)
3729 << "Failed to visit all call-like instructions!\n";)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("openmp-opt")) { dbgs() << TAG << "Failed to visit all call-like instructions!\n"
;; } } while (false)
;
3730 return indicatePessimisticFixpoint();
3731 }
3732
3733 // If we haven't used any assumed information for the reached parallel
3734 // region states we can fix it.
3735 if (!UsedAssumedInformationInCheckCallInst &&
3736 AllParallelRegionStatesWereFixed) {
3737 ReachedKnownParallelRegions.indicateOptimisticFixpoint();
3738 ReachedUnknownParallelRegions.indicateOptimisticFixpoint();
3739 }
3740
3741 // If we are sure there are no parallel regions in the kernel we do not
3742 // want SPMD mode.
3743 if (IsKernelEntry && ReachedUnknownParallelRegions.isAtFixpoint() &&
3744 ReachedKnownParallelRegions.isAtFixpoint() &&
3745 ReachedUnknownParallelRegions.isValidState() &&
3746 ReachedKnownParallelRegions.isValidState() &&
3747 !mayContainParallelRegion())
3748 SPMDCompatibilityTracker.indicatePessimisticFixpoint();
3749
3750 // If we haven't used any assumed information for the SPMD state we can fix
3751 // it.
3752 if (!UsedAssumedInformationInCheckRWInst &&
3753 !UsedAssumedInformationInCheckCallInst && AllSPMDStatesWereFixed)
3754 SPMDCompatibilityTracker.indicateOptimisticFixpoint();
3755
3756 return StateBefore == getState() ? ChangeStatus::UNCHANGED
3757 : ChangeStatus::CHANGED;
3758 }
3759
3760private:
3761 /// Update info regarding reaching kernels.
3762 void updateReachingKernelEntries(Attributor &A) {
3763 auto PredCallSite = [&](AbstractCallSite ACS) {
3764 Function *Caller = ACS.getInstruction()->getFunction();
3765
3766 assert(Caller && "Caller is nullptr")(static_cast <bool> (Caller && "Caller is nullptr"
) ? void (0) : __assert_fail ("Caller && \"Caller is nullptr\""
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp"
, 3766, __extension__ __PRETTY_FUNCTION__))
;
1
Assuming 'Caller' is non-null
2
'?' condition is true
3767
3768 auto &CAA = A.getOrCreateAAFor<AAKernelInfo>(
3769 IRPosition::function(*Caller), this, DepClassTy::REQUIRED);
3770 if (CAA.ReachingKernelEntries.isValidState()) {
3
Calling 'IntegerStateBase::isValidState'
6
Returning from 'IntegerStateBase::isValidState'
7
Taking false branch
3771 ReachingKernelEntries ^= CAA.ReachingKernelEntries;
3772 return true;
3773 }
3774
3775 // We lost track of the caller of the associated function, any kernel
3776 // could reach now.
3777 ReachingKernelEntries.indicatePessimisticFixpoint();
8
Called C++ object pointer is null
3778
3779 return true;
3780 };
3781
3782 bool AllCallSitesKnown;
3783 if (!A.checkForAllCallSites(PredCallSite, *this,
3784 true /* RequireAllCallSites */,
3785 AllCallSitesKnown))
3786 ReachingKernelEntries.indicatePessimisticFixpoint();
3787 }
3788
3789 /// Update info regarding parallel levels.
3790 void updateParallelLevels(Attributor &A) {
3791 auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
3792 OMPInformationCache::RuntimeFunctionInfo &Parallel51RFI =
3793 OMPInfoCache.RFIs[OMPRTL___kmpc_parallel_51];
3794
3795 auto PredCallSite = [&](AbstractCallSite ACS) {
3796 Function *Caller = ACS.getInstruction()->getFunction();
3797
3798 assert(Caller && "Caller is nullptr")(static_cast <bool> (Caller && "Caller is nullptr"
) ? void (0) : __assert_fail ("Caller && \"Caller is nullptr\""
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp"
, 3798, __extension__ __PRETTY_FUNCTION__))
;
3799
3800 auto &CAA =
3801 A.getOrCreateAAFor<AAKernelInfo>(IRPosition::function(*Caller));
3802 if (CAA.ParallelLevels.isValidState()) {
3803 // Any function that is called by `__kmpc_parallel_51` will not be
3804 // folded as the parallel level in the function is updated. In order to
3805 // get it right, all the analysis would depend on the implentation. That
3806 // said, if in the future any change to the implementation, the analysis
3807 // could be wrong. As a consequence, we are just conservative here.
3808 if (Caller == Parallel51RFI.Declaration) {
3809 ParallelLevels.indicatePessimisticFixpoint();
3810 return true;
3811 }
3812
3813 ParallelLevels ^= CAA.ParallelLevels;
3814
3815 return true;
3816 }
3817
3818 // We lost track of the caller of the associated function, any kernel
3819 // could reach now.
3820 ParallelLevels.indicatePessimisticFixpoint();
3821
3822 return true;
3823 };
3824
3825 bool AllCallSitesKnown = true;
3826 if (!A.checkForAllCallSites(PredCallSite, *this,
3827 true /* RequireAllCallSites */,
3828 AllCallSitesKnown))
3829 ParallelLevels.indicatePessimisticFixpoint();
3830 }
3831};
3832
3833/// The call site kernel info abstract attribute, basically, what can we say
3834/// about a call site with regards to the KernelInfoState. For now this simply
3835/// forwards the information from the callee.
3836struct AAKernelInfoCallSite : AAKernelInfo {
3837 AAKernelInfoCallSite(const IRPosition &IRP, Attributor &A)
3838 : AAKernelInfo(IRP, A) {}
3839
3840 /// See AbstractAttribute::initialize(...).
3841 void initialize(Attributor &A) override {
3842 AAKernelInfo::initialize(A);
3843
3844 CallBase &CB = cast<CallBase>(getAssociatedValue());
3845 Function *Callee = getAssociatedFunction();
3846
3847 auto &AssumptionAA = A.getAAFor<AAAssumptionInfo>(
3848 *this, IRPosition::callsite_function(CB), DepClassTy::OPTIONAL);
3849
3850 // Check for SPMD-mode assumptions.
3851 if (AssumptionAA.hasAssumption("ompx_spmd_amenable")) {
3852 SPMDCompatibilityTracker.indicateOptimisticFixpoint();
3853 indicateOptimisticFixpoint();
3854 }
3855
3856 // First weed out calls we do not care about, that is readonly/readnone
3857 // calls, intrinsics, and "no_openmp" calls. Neither of these can reach a
3858 // parallel region or anything else we are looking for.
3859 if (!CB.mayWriteToMemory() || isa<IntrinsicInst>(CB)) {
3860 indicateOptimisticFixpoint();
3861 return;
3862 }
3863
3864 // Next we check if we know the callee. If it is a known OpenMP function
3865 // we will handle them explicitly in the switch below. If it is not, we
3866 // will use an AAKernelInfo object on the callee to gather information and
3867 // merge that into the current state. The latter happens in the updateImpl.
3868 auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
3869 const auto &It = OMPInfoCache.RuntimeFunctionIDMap.find(Callee);
3870 if (It == OMPInfoCache.RuntimeFunctionIDMap.end()) {
3871 // Unknown caller or declarations are not analyzable, we give up.
3872 if (!Callee || !A.isFunctionIPOAmendable(*Callee)) {
3873
3874 // Unknown callees might contain parallel regions, except if they have
3875 // an appropriate assumption attached.
3876 if (!(AssumptionAA.hasAssumption("omp_no_openmp") ||
3877 AssumptionAA.hasAssumption("omp_no_parallelism")))
3878 ReachedUnknownParallelRegions.insert(&CB);
3879
3880 // If SPMDCompatibilityTracker is not fixed, we need to give up on the
3881 // idea we can run something unknown in SPMD-mode.
3882 if (!SPMDCompatibilityTracker.isAtFixpoint()) {
3883 SPMDCompatibilityTracker.indicatePessimisticFixpoint();
3884 SPMDCompatibilityTracker.insert(&CB);
3885 }
3886
3887 // We have updated the state for this unknown call properly, there won't
3888 // be any change so we indicate a fixpoint.
3889 indicateOptimisticFixpoint();
3890 }
3891 // If the callee is known and can be used in IPO, we will update the state
3892 // based on the callee state in updateImpl.
3893 return;
3894 }
3895
3896 const unsigned int WrapperFunctionArgNo = 6;
3897 RuntimeFunction RF = It->getSecond();
3898 switch (RF) {
3899 // All the functions we know are compatible with SPMD mode.
3900 case OMPRTL___kmpc_is_spmd_exec_mode:
3901 case OMPRTL___kmpc_distribute_static_fini:
3902 case OMPRTL___kmpc_for_static_fini:
3903 case OMPRTL___kmpc_global_thread_num:
3904 case OMPRTL___kmpc_get_hardware_num_threads_in_block:
3905 case OMPRTL___kmpc_get_hardware_num_blocks:
3906 case OMPRTL___kmpc_single:
3907 case OMPRTL___kmpc_end_single:
3908 case OMPRTL___kmpc_master:
3909 case OMPRTL___kmpc_end_master:
3910 case OMPRTL___kmpc_barrier:
3911 break;
3912 case OMPRTL___kmpc_distribute_static_init_4:
3913 case OMPRTL___kmpc_distribute_static_init_4u:
3914 case OMPRTL___kmpc_distribute_static_init_8:
3915 case OMPRTL___kmpc_distribute_static_init_8u:
3916 case OMPRTL___kmpc_for_static_init_4:
3917 case OMPRTL___kmpc_for_static_init_4u:
3918 case OMPRTL___kmpc_for_static_init_8:
3919 case OMPRTL___kmpc_for_static_init_8u: {
3920 // Check the schedule and allow static schedule in SPMD mode.
3921 unsigned ScheduleArgOpNo = 2;
3922 auto *ScheduleTypeCI =
3923 dyn_cast<ConstantInt>(CB.getArgOperand(ScheduleArgOpNo));
3924 unsigned ScheduleTypeVal =
3925 ScheduleTypeCI ? ScheduleTypeCI->getZExtValue() : 0;
3926 switch (OMPScheduleType(ScheduleTypeVal)) {
3927 case OMPScheduleType::Static:
3928 case OMPScheduleType::StaticChunked:
3929 case OMPScheduleType::Distribute:
3930 case OMPScheduleType::DistributeChunked:
3931 break;
3932 default:
3933 SPMDCompatibilityTracker.indicatePessimisticFixpoint();
3934 SPMDCompatibilityTracker.insert(&CB);
3935 break;
3936 };
3937 } break;
3938 case OMPRTL___kmpc_target_init:
3939 KernelInitCB = &CB;
3940 break;
3941 case OMPRTL___kmpc_target_deinit:
3942 KernelDeinitCB = &CB;
3943 break;
3944 case OMPRTL___kmpc_parallel_51:
3945 if (auto *ParallelRegion = dyn_cast<Function>(
3946 CB.getArgOperand(WrapperFunctionArgNo)->stripPointerCasts())) {
3947 ReachedKnownParallelRegions.insert(ParallelRegion);
3948 break;
3949 }
3950 // The condition above should usually get the parallel region function
3951 // pointer and record it. In the off chance it doesn't we assume the
3952 // worst.
3953 ReachedUnknownParallelRegions.insert(&CB);
3954 break;
3955 case OMPRTL___kmpc_omp_task:
3956 // We do not look into tasks right now, just give up.
3957 SPMDCompatibilityTracker.insert(&CB);
3958 ReachedUnknownParallelRegions.insert(&CB);
3959 break;
3960 case OMPRTL___kmpc_alloc_shared:
3961 case OMPRTL___kmpc_free_shared:
3962 // Return without setting a fixpoint, to be resolved in updateImpl.
3963 return;
3964 default:
3965 // Unknown OpenMP runtime calls cannot be executed in SPMD-mode,
3966 // generally. However, they do not hide parallel regions.
3967 SPMDCompatibilityTracker.insert(&CB);
3968 break;
3969 }
3970 // All other OpenMP runtime calls will not reach parallel regions so they
3971 // can be safely ignored for now. Since it is a known OpenMP runtime call we
3972 // have now modeled all effects and there is no need for any update.
3973 indicateOptimisticFixpoint();
3974 }
3975
3976 ChangeStatus updateImpl(Attributor &A) override {
3977 // TODO: Once we have call site specific value information we can provide
3978 // call site specific liveness information and then it makes
3979 // sense to specialize attributes for call sites arguments instead of
3980 // redirecting requests to the callee argument.
3981 Function *F = getAssociatedFunction();
3982
3983 auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
3984 const auto &It = OMPInfoCache.RuntimeFunctionIDMap.find(F);
3985
3986 // If F is not a runtime function, propagate the AAKernelInfo of the callee.
3987 if (It == OMPInfoCache.RuntimeFunctionIDMap.end()) {
3988 const IRPosition &FnPos = IRPosition::function(*F);
3989 auto &FnAA = A.getAAFor<AAKernelInfo>(*this, FnPos, DepClassTy::REQUIRED);
3990 if (getState() == FnAA.getState())
3991 return ChangeStatus::UNCHANGED;
3992 getState() = FnAA.getState();
3993 return ChangeStatus::CHANGED;
3994 }
3995
3996 // F is a runtime function that allocates or frees memory, check
3997 // AAHeapToStack and AAHeapToShared.
3998 KernelInfoState StateBefore = getState();
3999 assert((It->getSecond() == OMPRTL___kmpc_alloc_shared ||(static_cast <bool> ((It->getSecond() == OMPRTL___kmpc_alloc_shared
|| It->getSecond() == OMPRTL___kmpc_free_shared) &&
"Expected a __kmpc_alloc_shared or __kmpc_free_shared runtime call"
) ? void (0) : __assert_fail ("(It->getSecond() == OMPRTL___kmpc_alloc_shared || It->getSecond() == OMPRTL___kmpc_free_shared) && \"Expected a __kmpc_alloc_shared or __kmpc_free_shared runtime call\""
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp"
, 4001, __extension__ __PRETTY_FUNCTION__))
4000 It->getSecond() == OMPRTL___kmpc_free_shared) &&(static_cast <bool> ((It->getSecond() == OMPRTL___kmpc_alloc_shared
|| It->getSecond() == OMPRTL___kmpc_free_shared) &&
"Expected a __kmpc_alloc_shared or __kmpc_free_shared runtime call"
) ? void (0) : __assert_fail ("(It->getSecond() == OMPRTL___kmpc_alloc_shared || It->getSecond() == OMPRTL___kmpc_free_shared) && \"Expected a __kmpc_alloc_shared or __kmpc_free_shared runtime call\""
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp"
, 4001, __extension__ __PRETTY_FUNCTION__))
4001 "Expected a __kmpc_alloc_shared or __kmpc_free_shared runtime call")(static_cast <bool> ((It->getSecond() == OMPRTL___kmpc_alloc_shared
|| It->getSecond() == OMPRTL___kmpc_free_shared) &&
"Expected a __kmpc_alloc_shared or __kmpc_free_shared runtime call"
) ? void (0) : __assert_fail ("(It->getSecond() == OMPRTL___kmpc_alloc_shared || It->getSecond() == OMPRTL___kmpc_free_shared) && \"Expected a __kmpc_alloc_shared or __kmpc_free_shared runtime call\""
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp"
, 4001, __extension__ __PRETTY_FUNCTION__))
;
4002
4003 CallBase &CB = cast<CallBase>(getAssociatedValue());
4004
4005 auto &HeapToStackAA = A.getAAFor<AAHeapToStack>(
4006 *this, IRPosition::function(*CB.getCaller()), DepClassTy::OPTIONAL);
4007 auto &HeapToSharedAA = A.getAAFor<AAHeapToShared>(
4008 *this, IRPosition::function(*CB.getCaller()), DepClassTy::OPTIONAL);
4009
4010 RuntimeFunction RF = It->getSecond();
4011
4012 switch (RF) {
4013 // If neither HeapToStack nor HeapToShared assume the call is removed,
4014 // assume SPMD incompatibility.
4015 case OMPRTL___kmpc_alloc_shared:
4016 if (!HeapToStackAA.isAssumedHeapToStack(CB) &&
4017 !HeapToSharedAA.isAssumedHeapToShared(CB))
4018 SPMDCompatibilityTracker.insert(&CB);
4019 break;
4020 case OMPRTL___kmpc_free_shared:
4021 if (!HeapToStackAA.isAssumedHeapToStackRemovedFree(CB) &&
4022 !HeapToSharedAA.isAssumedHeapToSharedRemovedFree(CB))
4023 SPMDCompatibilityTracker.insert(&CB);
4024 break;
4025 default:
4026 SPMDCompatibilityTracker.insert(&CB);
4027 }
4028
4029 return StateBefore == getState() ? ChangeStatus::UNCHANGED
4030 : ChangeStatus::CHANGED;
4031 }
4032};
4033
4034struct AAFoldRuntimeCall
4035 : public StateWrapper<BooleanState, AbstractAttribute> {
4036 using Base = StateWrapper<BooleanState, AbstractAttribute>;
4037
4038 AAFoldRuntimeCall(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
4039
4040 /// Statistics are tracked as part of manifest for now.
4041 void trackStatistics() const override {}
4042
4043 /// Create an abstract attribute biew for the position \p IRP.
4044 static AAFoldRuntimeCall &createForPosition(const IRPosition &IRP,
4045 Attributor &A);
4046
4047 /// See AbstractAttribute::getName()
4048 const std::string getName() const override { return "AAFoldRuntimeCall"; }
4049
4050 /// See AbstractAttribute::getIdAddr()
4051 const char *getIdAddr() const override { return &ID; }
4052
4053 /// This function should return true if the type of the \p AA is
4054 /// AAFoldRuntimeCall
4055 static bool classof(const AbstractAttribute *AA) {
4056 return (AA->getIdAddr() == &ID);
4057 }
4058
4059 static const char ID;
4060};
4061
4062struct AAFoldRuntimeCallCallSiteReturned : AAFoldRuntimeCall {
4063 AAFoldRuntimeCallCallSiteReturned(const IRPosition &IRP, Attributor &A)
4064 : AAFoldRuntimeCall(IRP, A) {}
4065
4066 /// See AbstractAttribute::getAsStr()
4067 const std::string getAsStr() const override {
4068 if (!isValidState())
4069 return "<invalid>";
4070
4071 std::string Str("simplified value: ");
4072
4073 if (!SimplifiedValue.hasValue())
4074 return Str + std::string("none");
4075
4076 if (!SimplifiedValue.getValue())
4077 return Str + std::string("nullptr");
4078
4079 if (ConstantInt *CI = dyn_cast<ConstantInt>(SimplifiedValue.getValue()))
4080 return Str + std::to_string(CI->getSExtValue());
4081
4082 return Str + std::string("unknown");
4083 }
4084
4085 void initialize(Attributor &A) override {
4086 if (DisableOpenMPOptFolding)
4087 indicatePessimisticFixpoint();
4088
4089 Function *Callee = getAssociatedFunction();
4090
4091 auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
4092 const auto &It = OMPInfoCache.RuntimeFunctionIDMap.find(Callee);
4093 assert(It != OMPInfoCache.RuntimeFunctionIDMap.end() &&(static_cast <bool> (It != OMPInfoCache.RuntimeFunctionIDMap
.end() && "Expected a known OpenMP runtime function")
? void (0) : __assert_fail ("It != OMPInfoCache.RuntimeFunctionIDMap.end() && \"Expected a known OpenMP runtime function\""
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp"
, 4094, __extension__ __PRETTY_FUNCTION__))
4094 "Expected a known OpenMP runtime function")(static_cast <bool> (It != OMPInfoCache.RuntimeFunctionIDMap
.end() && "Expected a known OpenMP runtime function")
? void (0) : __assert_fail ("It != OMPInfoCache.RuntimeFunctionIDMap.end() && \"Expected a known OpenMP runtime function\""
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp"
, 4094, __extension__ __PRETTY_FUNCTION__))
;
4095
4096 RFKind = It->getSecond();
4097
4098 CallBase &CB = cast<CallBase>(getAssociatedValue());
4099 A.registerSimplificationCallback(
4100 IRPosition::callsite_returned(CB),
4101 [&](const IRPosition &IRP, const AbstractAttribute *AA,
4102 bool &UsedAssumedInformation) -> Optional<Value *> {
4103 assert((isValidState() || (SimplifiedValue.hasValue() &&(static_cast <bool> ((isValidState() || (SimplifiedValue
.hasValue() && SimplifiedValue.getValue() == nullptr)
) && "Unexpected invalid state!") ? void (0) : __assert_fail
("(isValidState() || (SimplifiedValue.hasValue() && SimplifiedValue.getValue() == nullptr)) && \"Unexpected invalid state!\""
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp"
, 4105, __extension__ __PRETTY_FUNCTION__))
4104 SimplifiedValue.getValue() == nullptr)) &&(static_cast <bool> ((isValidState() || (SimplifiedValue
.hasValue() && SimplifiedValue.getValue() == nullptr)
) && "Unexpected invalid state!") ? void (0) : __assert_fail
("(isValidState() || (SimplifiedValue.hasValue() && SimplifiedValue.getValue() == nullptr)) && \"Unexpected invalid state!\""
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp"
, 4105, __extension__ __PRETTY_FUNCTION__))
4105 "Unexpected invalid state!")(static_cast <bool> ((isValidState() || (SimplifiedValue
.hasValue() && SimplifiedValue.getValue() == nullptr)
) && "Unexpected invalid state!") ? void (0) : __assert_fail
("(isValidState() || (SimplifiedValue.hasValue() && SimplifiedValue.getValue() == nullptr)) && \"Unexpected invalid state!\""
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp"
, 4105, __extension__ __PRETTY_FUNCTION__))
;
4106
4107 if (!isAtFixpoint()) {
4108 UsedAssumedInformation = true;
4109 if (AA)
4110 A.recordDependence(*this, *AA, DepClassTy::OPTIONAL);
4111 }
4112 return SimplifiedValue;
4113 });
4114 }
4115
4116 ChangeStatus updateImpl(Attributor &A) override {
4117 ChangeStatus Changed = ChangeStatus::UNCHANGED;
4118 switch (RFKind) {
4119 case OMPRTL___kmpc_is_spmd_exec_mode:
4120 Changed |= foldIsSPMDExecMode(A);
4121 break;
4122 case OMPRTL___kmpc_is_generic_main_thread_id:
4123 Changed |= foldIsGenericMainThread(A);
4124 break;
4125 case OMPRTL___kmpc_parallel_level:
4126 Changed |= foldParallelLevel(A);
4127 break;
4128 case OMPRTL___kmpc_get_hardware_num_threads_in_block:
4129 Changed = Changed | foldKernelFnAttribute(A, "omp_target_thread_limit");
4130 break;
4131 case OMPRTL___kmpc_get_hardware_num_blocks:
4132 Changed = Changed | foldKernelFnAttribute(A, "omp_target_num_teams");
4133 break;
4134 default:
4135 llvm_unreachable("Unhandled OpenMP runtime function!")::llvm::llvm_unreachable_internal("Unhandled OpenMP runtime function!"
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp"
, 4135)
;
4136 }
4137
4138 return Changed;
4139 }
4140
4141 ChangeStatus manifest(Attributor &A) override {
4142 ChangeStatus Changed = ChangeStatus::UNCHANGED;
4143
4144 if (SimplifiedValue.hasValue() && SimplifiedValue.getValue()) {
4145 Instruction &I = *getCtxI();
4146 A.changeValueAfterManifest(I, **SimplifiedValue);
4147 A.deleteAfterManifest(I);
4148
4149 CallBase *CB = dyn_cast<CallBase>(&I);
4150 auto Remark = [&](OptimizationRemark OR) {
4151 if (auto *C = dyn_cast<ConstantInt>(*SimplifiedValue))
4152 return OR << "Replacing OpenMP runtime call "
4153 << CB->getCalledFunction()->getName() << " with "
4154 << ore::NV("FoldedValue", C->getZExtValue()) << ".";
4155 return OR << "Replacing OpenMP runtime call "
4156 << CB->getCalledFunction()->getName() << ".";
4157 };
4158
4159 if (CB && EnableVerboseRemarks)
4160 A.emitRemark<OptimizationRemark>(CB, "OMP180", Remark);
4161
4162 LLVM_DEBUG(dbgs() << TAG << "Replacing runtime call: " << I << " with "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("openmp-opt")) { dbgs() << TAG << "Replacing runtime call: "
<< I << " with " << **SimplifiedValue <<
"\n"; } } while (false)
4163 << **SimplifiedValue << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("openmp-opt")) { dbgs() << TAG << "Replacing runtime call: "
<< I << " with " << **SimplifiedValue <<
"\n"; } } while (false)
;
4164
4165 Changed = ChangeStatus::CHANGED;
4166 }
4167
4168 return Changed;
4169 }
4170
4171 ChangeStatus indicatePessimisticFixpoint() override {
4172 SimplifiedValue = nullptr;
4173 return AAFoldRuntimeCall::indicatePessimisticFixpoint();
4174 }
4175
4176private:
4177 /// Fold __kmpc_is_spmd_exec_mode into a constant if possible.
4178 ChangeStatus foldIsSPMDExecMode(Attributor &A) {
4179 Optional<Value *> SimplifiedValueBefore = SimplifiedValue;
4180
4181 unsigned AssumedSPMDCount = 0, KnownSPMDCount = 0;
4182 unsigned AssumedNonSPMDCount = 0, KnownNonSPMDCount = 0;
4183 auto &CallerKernelInfoAA = A.getAAFor<AAKernelInfo>(
4184 *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED);
4185
4186 if (!CallerKernelInfoAA.ReachingKernelEntries.isValidState())
4187 return indicatePessimisticFixpoint();
4188
4189 for (Kernel K : CallerKernelInfoAA.ReachingKernelEntries) {
4190 auto &AA = A.getAAFor<AAKernelInfo>(*this, IRPosition::function(*K),
4191 DepClassTy::REQUIRED);
4192
4193 if (!AA.isValidState()) {
4194 SimplifiedValue = nullptr;
4195 return indicatePessimisticFixpoint();
4196 }
4197
4198 if (AA.SPMDCompatibilityTracker.isAssumed()) {
4199 if (AA.SPMDCompatibilityTracker.isAtFixpoint())
4200 ++KnownSPMDCount;
4201 else
4202 ++AssumedSPMDCount;
4203 } else {
4204 if (AA.SPMDCompatibilityTracker.isAtFixpoint())
4205 ++KnownNonSPMDCount;
4206 else
4207 ++AssumedNonSPMDCount;
4208 }
4209 }
4210
4211 if ((AssumedSPMDCount + KnownSPMDCount) &&
4212 (AssumedNonSPMDCount + KnownNonSPMDCount))
4213 return indicatePessimisticFixpoint();
4214
4215 auto &Ctx = getAnchorValue().getContext();
4216 if (KnownSPMDCount || AssumedSPMDCount) {
4217 assert(KnownNonSPMDCount == 0 && AssumedNonSPMDCount == 0 &&(static_cast <bool> (KnownNonSPMDCount == 0 && AssumedNonSPMDCount
== 0 && "Expected only SPMD kernels!") ? void (0) : __assert_fail
("KnownNonSPMDCount == 0 && AssumedNonSPMDCount == 0 && \"Expected only SPMD kernels!\""
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp"
, 4218, __extension__ __PRETTY_FUNCTION__))
4218 "Expected only SPMD kernels!")(static_cast <bool> (KnownNonSPMDCount == 0 && AssumedNonSPMDCount
== 0 && "Expected only SPMD kernels!") ? void (0) : __assert_fail
("KnownNonSPMDCount == 0 && AssumedNonSPMDCount == 0 && \"Expected only SPMD kernels!\""
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp"
, 4218, __extension__ __PRETTY_FUNCTION__))
;
4219 // All reaching kernels are in SPMD mode. Update all function calls to
4220 // __kmpc_is_spmd_exec_mode to 1.
4221 SimplifiedValue = ConstantInt::get(Type::getInt8Ty(Ctx), true);
4222 } else if (KnownNonSPMDCount || AssumedNonSPMDCount) {
4223 assert(KnownSPMDCount == 0 && AssumedSPMDCount == 0 &&(static_cast <bool> (KnownSPMDCount == 0 && AssumedSPMDCount
== 0 && "Expected only non-SPMD kernels!") ? void (0
) : __assert_fail ("KnownSPMDCount == 0 && AssumedSPMDCount == 0 && \"Expected only non-SPMD kernels!\""
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp"
, 4224, __extension__ __PRETTY_FUNCTION__))
4224 "Expected only non-SPMD kernels!")(static_cast <bool> (KnownSPMDCount == 0 && AssumedSPMDCount
== 0 && "Expected only non-SPMD kernels!") ? void (0
) : __assert_fail ("KnownSPMDCount == 0 && AssumedSPMDCount == 0 && \"Expected only non-SPMD kernels!\""
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp"
, 4224, __extension__ __PRETTY_FUNCTION__))
;
4225 // All reaching kernels are in non-SPMD mode. Update all function
4226 // calls to __kmpc_is_spmd_exec_mode to 0.
4227 SimplifiedValue = ConstantInt::get(Type::getInt8Ty(Ctx), false);
4228 } else {
4229 // We have empty reaching kernels, therefore we cannot tell if the
4230 // associated call site can be folded. At this moment, SimplifiedValue
4231 // must be none.
4232 assert(!SimplifiedValue.hasValue() && "SimplifiedValue should be none")(static_cast <bool> (!SimplifiedValue.hasValue() &&
"SimplifiedValue should be none") ? void (0) : __assert_fail
("!SimplifiedValue.hasValue() && \"SimplifiedValue should be none\""
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp"
, 4232, __extension__ __PRETTY_FUNCTION__))
;
4233 }
4234
4235 return SimplifiedValue == SimplifiedValueBefore ? ChangeStatus::UNCHANGED
4236 : ChangeStatus::CHANGED;
4237 }
4238
4239 /// Fold __kmpc_is_generic_main_thread_id into a constant if possible.
4240 ChangeStatus foldIsGenericMainThread(Attributor &A) {
4241 Optional<Value *> SimplifiedValueBefore = SimplifiedValue;
4242
4243 CallBase &CB = cast<CallBase>(getAssociatedValue());
4244 Function *F = CB.getFunction();
4245 const auto &ExecutionDomainAA = A.getAAFor<AAExecutionDomain>(
4246 *this, IRPosition::function(*F), DepClassTy::REQUIRED);
4247
4248 if (!ExecutionDomainAA.isValidState())
4249 return indicatePessimisticFixpoint();
4250
4251 auto &Ctx = getAnchorValue().getContext();
4252 if (ExecutionDomainAA.isExecutedByInitialThreadOnly(CB))
4253 SimplifiedValue = ConstantInt::get(Type::getInt8Ty(Ctx), true);
4254 else
4255 return indicatePessimisticFixpoint();
4256
4257 return SimplifiedValue == SimplifiedValueBefore ? ChangeStatus::UNCHANGED
4258 : ChangeStatus::CHANGED;
4259 }
4260
4261 /// Fold __kmpc_parallel_level into a constant if possible.
4262 ChangeStatus foldParallelLevel(Attributor &A) {
4263 Optional<Value *> SimplifiedValueBefore = SimplifiedValue;
4264
4265 auto &CallerKernelInfoAA = A.getAAFor<AAKernelInfo>(
4266 *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED);
4267
4268 if (!CallerKernelInfoAA.ParallelLevels.isValidState())
4269 return indicatePessimisticFixpoint();
4270
4271 if (!CallerKernelInfoAA.ReachingKernelEntries.isValidState())
4272 return indicatePessimisticFixpoint();
4273
4274 if (CallerKernelInfoAA.ReachingKernelEntries.empty()) {
4275 assert(!SimplifiedValue.hasValue() &&(static_cast <bool> (!SimplifiedValue.hasValue() &&
"SimplifiedValue should keep none at this point") ? void (0)
: __assert_fail ("!SimplifiedValue.hasValue() && \"SimplifiedValue should keep none at this point\""
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp"
, 4276, __extension__ __PRETTY_FUNCTION__))
4276 "SimplifiedValue should keep none at this point")(static_cast <bool> (!SimplifiedValue.hasValue() &&
"SimplifiedValue should keep none at this point") ? void (0)
: __assert_fail ("!SimplifiedValue.hasValue() && \"SimplifiedValue should keep none at this point\""
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp"
, 4276, __extension__ __PRETTY_FUNCTION__))
;
4277 return ChangeStatus::UNCHANGED;
4278 }
4279
4280 unsigned AssumedSPMDCount = 0, KnownSPMDCount = 0;
4281 unsigned AssumedNonSPMDCount = 0, KnownNonSPMDCount = 0;
4282 for (Kernel K : CallerKernelInfoAA.ReachingKernelEntries) {
4283 auto &AA = A.getAAFor<AAKernelInfo>(*this, IRPosition::function(*K),
4284 DepClassTy::REQUIRED);
4285 if (!AA.SPMDCompatibilityTracker.isValidState())
4286 return indicatePessimisticFixpoint();
4287
4288 if (AA.SPMDCompatibilityTracker.isAssumed()) {
4289 if (AA.SPMDCompatibilityTracker.isAtFixpoint())
4290 ++KnownSPMDCount;
4291 else
4292 ++AssumedSPMDCount;
4293 } else {
4294 if (AA.SPMDCompatibilityTracker.isAtFixpoint())
4295 ++KnownNonSPMDCount;
4296 else
4297 ++AssumedNonSPMDCount;
4298 }
4299 }
4300
4301 if ((AssumedSPMDCount + KnownSPMDCount) &&
4302 (AssumedNonSPMDCount + KnownNonSPMDCount))
4303 return indicatePessimisticFixpoint();
4304
4305 auto &Ctx = getAnchorValue().getContext();
4306 // If the caller can only be reached by SPMD kernel entries, the parallel
4307 // level is 1. Similarly, if the caller can only be reached by non-SPMD
4308 // kernel entries, it is 0.
4309 if (AssumedSPMDCount || KnownSPMDCount) {
4310 assert(KnownNonSPMDCount == 0 && AssumedNonSPMDCount == 0 &&(static_cast <bool> (KnownNonSPMDCount == 0 && AssumedNonSPMDCount
== 0 && "Expected only SPMD kernels!") ? void (0) : __assert_fail
("KnownNonSPMDCount == 0 && AssumedNonSPMDCount == 0 && \"Expected only SPMD kernels!\""
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp"
, 4311, __extension__ __PRETTY_FUNCTION__))
4311 "Expected only SPMD kernels!")(static_cast <bool> (KnownNonSPMDCount == 0 && AssumedNonSPMDCount
== 0 && "Expected only SPMD kernels!") ? void (0) : __assert_fail
("KnownNonSPMDCount == 0 && AssumedNonSPMDCount == 0 && \"Expected only SPMD kernels!\""
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp"
, 4311, __extension__ __PRETTY_FUNCTION__))
;
4312 SimplifiedValue = ConstantInt::get(Type::getInt8Ty(Ctx), 1);
4313 } else {
4314 assert(KnownSPMDCount == 0 && AssumedSPMDCount == 0 &&(static_cast <bool> (KnownSPMDCount == 0 && AssumedSPMDCount
== 0 && "Expected only non-SPMD kernels!") ? void (0
) : __assert_fail ("KnownSPMDCount == 0 && AssumedSPMDCount == 0 && \"Expected only non-SPMD kernels!\""
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp"
, 4315, __extension__ __PRETTY_FUNCTION__))
4315 "Expected only non-SPMD kernels!")(static_cast <bool> (KnownSPMDCount == 0 && AssumedSPMDCount
== 0 && "Expected only non-SPMD kernels!") ? void (0
) : __assert_fail ("KnownSPMDCount == 0 && AssumedSPMDCount == 0 && \"Expected only non-SPMD kernels!\""
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp"
, 4315, __extension__ __PRETTY_FUNCTION__))
;
4316 SimplifiedValue = ConstantInt::get(Type::getInt8Ty(Ctx), 0);
4317 }
4318 return SimplifiedValue == SimplifiedValueBefore ? ChangeStatus::UNCHANGED
4319 : ChangeStatus::CHANGED;
4320 }
4321
4322 ChangeStatus foldKernelFnAttribute(Attributor &A, llvm::StringRef Attr) {
4323 // Specialize only if all the calls agree with the attribute constant value
4324 int32_t CurrentAttrValue = -1;
4325 Optional<Value *> SimplifiedValueBefore = SimplifiedValue;
4326
4327 auto &CallerKernelInfoAA = A.getAAFor<AAKernelInfo>(
4328 *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED);
4329
4330 if (!CallerKernelInfoAA.ReachingKernelEntries.isValidState())
4331 return indicatePessimisticFixpoint();
4332
4333 // Iterate over the kernels that reach this function
4334 for (Kernel K : CallerKernelInfoAA.ReachingKernelEntries) {
4335 int32_t NextAttrVal = -1;
4336 if (K->hasFnAttribute(Attr))
4337 NextAttrVal =
4338 std::stoi(K->getFnAttribute(Attr).getValueAsString().str());
4339
4340 if (NextAttrVal == -1 ||
4341 (CurrentAttrValue != -1 && CurrentAttrValue != NextAttrVal))
4342 return indicatePessimisticFixpoint();
4343 CurrentAttrValue = NextAttrVal;
4344 }
4345
4346 if (CurrentAttrValue != -1) {
4347 auto &Ctx = getAnchorValue().getContext();
4348 SimplifiedValue =
4349 ConstantInt::get(Type::getInt32Ty(Ctx), CurrentAttrValue);
4350 }
4351 return SimplifiedValue == SimplifiedValueBefore ? ChangeStatus::UNCHANGED
4352 : ChangeStatus::CHANGED;
4353 }
4354
4355 /// An optional value the associated value is assumed to fold to. That is, we
4356 /// assume the associated value (which is a call) can be replaced by this
4357 /// simplified value.
4358 Optional<Value *> SimplifiedValue;
4359
4360 /// The runtime function kind of the callee of the associated call site.
4361 RuntimeFunction RFKind;
4362};
4363
4364} // namespace
4365
4366/// Register folding callsite
4367void OpenMPOpt::registerFoldRuntimeCall(RuntimeFunction RF) {
4368 auto &RFI = OMPInfoCache.RFIs[RF];
4369 RFI.foreachUse(SCC, [&](Use &U, Function &F) {
4370 CallInst *CI = OpenMPOpt::getCallIfRegularCall(U, &RFI);
4371 if (!CI)
4372 return false;
4373 A.getOrCreateAAFor<AAFoldRuntimeCall>(
4374 IRPosition::callsite_returned(*CI), /* QueryingAA */ nullptr,
4375 DepClassTy::NONE, /* ForceUpdate */ false,
4376 /* UpdateAfterInit */ false);
4377 return false;
4378 });
4379}
4380
4381void OpenMPOpt::registerAAs(bool IsModulePass) {
4382 if (SCC.empty())
4383
4384 return;
4385 if (IsModulePass) {
4386 // Ensure we create the AAKernelInfo AAs first and without triggering an
4387 // update. This will make sure we register all value simplification
4388 // callbacks before any other AA has the chance to create an AAValueSimplify
4389 // or similar.
4390 for (Function *Kernel : OMPInfoCache.Kernels)
4391 A.getOrCreateAAFor<AAKernelInfo>(
4392 IRPosition::function(*Kernel), /* QueryingAA */ nullptr,
4393 DepClassTy::NONE, /* ForceUpdate */ false,
4394 /* UpdateAfterInit */ false);
4395
4396 registerFoldRuntimeCall(OMPRTL___kmpc_is_generic_main_thread_id);
4397 registerFoldRuntimeCall(OMPRTL___kmpc_is_spmd_exec_mode);
4398 registerFoldRuntimeCall(OMPRTL___kmpc_parallel_level);
4399 registerFoldRuntimeCall(OMPRTL___kmpc_get_hardware_num_threads_in_block);
4400 registerFoldRuntimeCall(OMPRTL___kmpc_get_hardware_num_blocks);
4401 }
4402
4403 // Create CallSite AA for all Getters.
4404 for (int Idx = 0; Idx < OMPInfoCache.ICVs.size() - 1; ++Idx) {
4405 auto ICVInfo = OMPInfoCache.ICVs[static_cast<InternalControlVar>(Idx)];
4406
4407 auto &GetterRFI = OMPInfoCache.RFIs[ICVInfo.Getter];
4408
4409 auto CreateAA = [&](Use &U, Function &Caller) {
4410 CallInst *CI = OpenMPOpt::getCallIfRegularCall(U, &GetterRFI);
4411 if (!CI)
4412 return false;
4413
4414 auto &CB = cast<CallBase>(*CI);
4415
4416 IRPosition CBPos = IRPosition::callsite_function(CB);
4417 A.getOrCreateAAFor<AAICVTracker>(CBPos);
4418 return false;
4419 };
4420
4421 GetterRFI.foreachUse(SCC, CreateAA);
4422 }
4423 auto &GlobalizationRFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared];
4424 auto CreateAA = [&](Use &U, Function &F) {
4425 A.getOrCreateAAFor<AAHeapToShared>(IRPosition::function(F));
4426 return false;
4427 };
4428 if (!DisableOpenMPOptDeglobalization)
4429 GlobalizationRFI.foreachUse(SCC, CreateAA);
4430
4431 // Create an ExecutionDomain AA for every function and a HeapToStack AA for
4432 // every function if there is a device kernel.
4433 if (!isOpenMPDevice(M))
4434 return;
4435
4436 for (auto *F : SCC) {
4437 if (F->isDeclaration())
4438 continue;
4439
4440 A.getOrCreateAAFor<AAExecutionDomain>(IRPosition::function(*F));
4441 if (!DisableOpenMPOptDeglobalization)
4442 A.getOrCreateAAFor<AAHeapToStack>(IRPosition::function(*F));
4443
4444 for (auto &I : instructions(*F)) {
4445 if (auto *LI = dyn_cast<LoadInst>(&I)) {
4446 bool UsedAssumedInformation = false;
4447 A.getAssumedSimplified(IRPosition::value(*LI), /* AA */ nullptr,
4448 UsedAssumedInformation);
4449 }
4450 }
4451 }
4452}
4453
4454const char AAICVTracker::ID = 0;
4455const char AAKernelInfo::ID = 0;
4456const char AAExecutionDomain::ID = 0;
4457const char AAHeapToShared::ID = 0;
4458const char AAFoldRuntimeCall::ID = 0;
4459
4460AAICVTracker &AAICVTracker::createForPosition(const IRPosition &IRP,
4461 Attributor &A) {
4462 AAICVTracker *AA = nullptr;
4463 switch (IRP.getPositionKind()) {
4464 case IRPosition::IRP_INVALID:
4465 case IRPosition::IRP_FLOAT:
4466 case IRPosition::IRP_ARGUMENT:
4467 case IRPosition::IRP_CALL_SITE_ARGUMENT:
4468 llvm_unreachable("ICVTracker can only be created for function position!")::llvm::llvm_unreachable_internal("ICVTracker can only be created for function position!"
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp"
, 4468)
;
4469 case IRPosition::IRP_RETURNED:
4470 AA = new (A.Allocator) AAICVTrackerFunctionReturned(IRP, A);
4471 break;
4472 case IRPosition::IRP_CALL_SITE_RETURNED:
4473 AA = new (A.Allocator) AAICVTrackerCallSiteReturned(IRP, A);
4474 break;
4475 case IRPosition::IRP_CALL_SITE:
4476 AA = new (A.Allocator) AAICVTrackerCallSite(IRP, A);
4477 break;
4478 case IRPosition::IRP_FUNCTION:
4479 AA = new (A.Allocator) AAICVTrackerFunction(IRP, A);
4480 break;
4481 }
4482
4483 return *AA;
4484}
4485
4486AAExecutionDomain &AAExecutionDomain::createForPosition(const IRPosition &IRP,
4487 Attributor &A) {
4488 AAExecutionDomainFunction *AA = nullptr;
4489 switch (IRP.getPositionKind()) {
4490 case IRPosition::IRP_INVALID:
4491 case IRPosition::IRP_FLOAT:
4492 case IRPosition::IRP_ARGUMENT:
4493 case IRPosition::IRP_CALL_SITE_ARGUMENT:
4494 case IRPosition::IRP_RETURNED:
4495 case IRPosition::IRP_CALL_SITE_RETURNED:
4496 case IRPosition::IRP_CALL_SITE:
4497 llvm_unreachable(::llvm::llvm_unreachable_internal("AAExecutionDomain can only be created for function position!"
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp"
, 4498)
4498 "AAExecutionDomain can only be created for function position!")::llvm::llvm_unreachable_internal("AAExecutionDomain can only be created for function position!"
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp"
, 4498)
;
4499 case IRPosition::IRP_FUNCTION:
4500 AA = new (A.Allocator) AAExecutionDomainFunction(IRP, A);
4501 break;
4502 }
4503
4504 return *AA;
4505}
4506
4507AAHeapToShared &AAHeapToShared::createForPosition(const IRPosition &IRP,
4508 Attributor &A) {
4509 AAHeapToSharedFunction *AA = nullptr;
4510 switch (IRP.getPositionKind()) {
4511 case IRPosition::IRP_INVALID:
4512 case IRPosition::IRP_FLOAT:
4513 case IRPosition::IRP_ARGUMENT:
4514 case IRPosition::IRP_CALL_SITE_ARGUMENT:
4515 case IRPosition::IRP_RETURNED:
4516 case IRPosition::IRP_CALL_SITE_RETURNED:
4517 case IRPosition::IRP_CALL_SITE:
4518 llvm_unreachable(::llvm::llvm_unreachable_internal("AAHeapToShared can only be created for function position!"
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp"
, 4519)
4519 "AAHeapToShared can only be created for function position!")::llvm::llvm_unreachable_internal("AAHeapToShared can only be created for function position!"
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp"
, 4519)
;
4520 case IRPosition::IRP_FUNCTION:
4521 AA = new (A.Allocator) AAHeapToSharedFunction(IRP, A);
4522 break;
4523 }
4524
4525 return *AA;
4526}
4527
4528AAKernelInfo &AAKernelInfo::createForPosition(const IRPosition &IRP,
4529 Attributor &A) {
4530 AAKernelInfo *AA = nullptr;
4531 switch (IRP.getPositionKind()) {
4532 case IRPosition::IRP_INVALID:
4533 case IRPosition::IRP_FLOAT:
4534 case IRPosition::IRP_ARGUMENT:
4535 case IRPosition::IRP_RETURNED:
4536 case IRPosition::IRP_CALL_SITE_RETURNED:
4537 case IRPosition::IRP_CALL_SITE_ARGUMENT:
4538 llvm_unreachable("KernelInfo can only be created for function position!")::llvm::llvm_unreachable_internal("KernelInfo can only be created for function position!"
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp"
, 4538)
;
4539 case IRPosition::IRP_CALL_SITE:
4540 AA = new (A.Allocator) AAKernelInfoCallSite(IRP, A);
4541 break;
4542 case IRPosition::IRP_FUNCTION:
4543 AA = new (A.Allocator) AAKernelInfoFunction(IRP, A);
4544 break;
4545 }
4546
4547 return *AA;
4548}
4549
4550AAFoldRuntimeCall &AAFoldRuntimeCall::createForPosition(const IRPosition &IRP,
4551 Attributor &A) {
4552 AAFoldRuntimeCall *AA = nullptr;
4553 switch (IRP.getPositionKind()) {
4554 case IRPosition::IRP_INVALID:
4555 case IRPosition::IRP_FLOAT:
4556 case IRPosition::IRP_ARGUMENT:
4557 case IRPosition::IRP_RETURNED:
4558 case IRPosition::IRP_FUNCTION:
4559 case IRPosition::IRP_CALL_SITE:
4560 case IRPosition::IRP_CALL_SITE_ARGUMENT:
4561 llvm_unreachable("KernelInfo can only be created for call site position!")::llvm::llvm_unreachable_internal("KernelInfo can only be created for call site position!"
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp"
, 4561)
;
4562 case IRPosition::IRP_CALL_SITE_RETURNED:
4563 AA = new (A.Allocator) AAFoldRuntimeCallCallSiteReturned(IRP, A);
4564 break;
4565 }
4566
4567 return *AA;
4568}
4569
4570PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) {
4571 if (!containsOpenMP(M))
4572 return PreservedAnalyses::all();
4573 if (DisableOpenMPOptimizations)
4574 return PreservedAnalyses::all();
4575
4576 FunctionAnalysisManager &FAM =
4577 AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
4578 KernelSet Kernels = getDeviceKernels(M);
4579
4580 auto IsCalled = [&](Function &F) {
4581 if (Kernels.contains(&F))
4582 return true;
4583 for (const User *U : F.users())
4584 if (!isa<BlockAddress>(U))
4585 return true;
4586 return false;
4587 };
4588
4589 auto EmitRemark = [&](Function &F) {
4590 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
4591 ORE.emit([&]() {
4592 OptimizationRemarkAnalysis ORA(DEBUG_TYPE"openmp-opt", "OMP140", &F);
4593 return ORA << "Could not internalize function. "
4594 << "Some optimizations may not be possible. [OMP140]";
4595 });
4596 };
4597
4598 // Create internal copies of each function if this is a kernel Module. This
4599 // allows iterprocedural passes to see every call edge.
4600 DenseMap<Function *, Function *> InternalizedMap;
4601 if (isOpenMPDevice(M)) {
4602 SmallPtrSet<Function *, 16> InternalizeFns;
4603 for (Function &F : M)
4604 if (!F.isDeclaration() && !Kernels.contains(&F) && IsCalled(F) &&
4605 !DisableInternalization) {
4606 if (Attributor::isInternalizable(F)) {
4607 InternalizeFns.insert(&F);
4608 } else if (!F.hasLocalLinkage() && !F.hasFnAttribute(Attribute::Cold)) {
4609 EmitRemark(F);
4610 }
4611 }
4612
4613 Attributor::internalizeFunctions(InternalizeFns, InternalizedMap);
4614 }
4615
4616 // Look at every function in the Module unless it was internalized.
4617 SmallVector<Function *, 16> SCC;
4618 for (Function &F : M)
4619 if (!F.isDeclaration() && !InternalizedMap.lookup(&F))
4620 SCC.push_back(&F);
4621
4622 if (SCC.empty())
4623 return PreservedAnalyses::all();
4624
4625 AnalysisGetter AG(FAM);
4626
4627 auto OREGetter = [&FAM](Function *F) -> OptimizationRemarkEmitter & {
4628 return FAM.getResult<OptimizationRemarkEmitterAnalysis>(*F);
4629 };
4630
4631 BumpPtrAllocator Allocator;
4632 CallGraphUpdater CGUpdater;
4633
4634 SetVector<Function *> Functions(SCC.begin(), SCC.end());
4635 OMPInformationCache InfoCache(M, AG, Allocator, /*CGSCC*/ Functions, Kernels);
4636
4637 unsigned MaxFixpointIterations =
4638 (isOpenMPDevice(M)) ? SetFixpointIterations : 32;
4639 Attributor A(Functions, InfoCache, CGUpdater, nullptr, true, false,
4640 MaxFixpointIterations, OREGetter, DEBUG_TYPE"openmp-opt");
4641
4642 OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A);
4643 bool Changed = OMPOpt.run(true);
4644
4645 // Optionally inline device functions for potentially better performance.
4646 if (AlwaysInlineDeviceFunctions && isOpenMPDevice(M))
4647 for (Function &F : M)
4648 if (!F.isDeclaration() && !Kernels.contains(&F) &&
4649 !F.hasFnAttribute(Attribute::NoInline))
4650 F.addFnAttr(Attribute::AlwaysInline);
4651
4652 if (PrintModuleAfterOptimizations)
4653 LLVM_DEBUG(dbgs() << TAG << "Module after OpenMPOpt Module Pass:\n" << M)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("openmp-opt")) { dbgs() << TAG << "Module after OpenMPOpt Module Pass:\n"
<< M; } } while (false)
;
4654
4655 if (Changed)
4656 return PreservedAnalyses::none();
4657
4658 return PreservedAnalyses::all();
4659}
4660
4661PreservedAnalyses OpenMPOptCGSCCPass::run(LazyCallGraph::SCC &C,
4662 CGSCCAnalysisManager &AM,
4663 LazyCallGraph &CG,
4664 CGSCCUpdateResult &UR) {
4665 if (!containsOpenMP(*C.begin()->getFunction().getParent()))
4666 return PreservedAnalyses::all();
4667 if (DisableOpenMPOptimizations)
4668 return PreservedAnalyses::all();
4669
4670 SmallVector<Function *, 16> SCC;
4671 // If there are kernels in the module, we have to run on all SCC's.
4672 for (LazyCallGraph::Node &N : C) {
4673 Function *Fn = &N.getFunction();
4674 SCC.push_back(Fn);
4675 }
4676
4677 if (SCC.empty())
4678 return PreservedAnalyses::all();
4679
4680 Module &M = *C.begin()->getFunction().getParent();
4681
4682 KernelSet Kernels = getDeviceKernels(M);
4683
4684 FunctionAnalysisManager &FAM =
4685 AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager();
4686
4687 AnalysisGetter AG(FAM);
4688
4689 auto OREGetter = [&FAM](Function *F) -> OptimizationRemarkEmitter & {
4690 return FAM.getResult<OptimizationRemarkEmitterAnalysis>(*F);
4691 };
4692
4693 BumpPtrAllocator Allocator;
4694 CallGraphUpdater CGUpdater;
4695 CGUpdater.initialize(CG, C, AM, UR);
4696
4697 SetVector<Function *> Functions(SCC.begin(), SCC.end());
4698 OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG, Allocator,
4699 /*CGSCC*/ Functions, Kernels);
4700
4701 unsigned MaxFixpointIterations =
4702 (isOpenMPDevice(M)) ? SetFixpointIterations : 32;
4703 Attributor A(Functions, InfoCache, CGUpdater, nullptr, false, true,
4704 MaxFixpointIterations, OREGetter, DEBUG_TYPE"openmp-opt");
4705
4706 OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A);
4707 bool Changed = OMPOpt.run(false);
4708
4709 if (PrintModuleAfterOptimizations)
4710 LLVM_DEBUG(dbgs() << TAG << "Module after OpenMPOpt CGSCC Pass:\n" << M)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("openmp-opt")) { dbgs() << TAG << "Module after OpenMPOpt CGSCC Pass:\n"
<< M; } } while (false)
;
4711
4712 if (Changed)
4713 return PreservedAnalyses::none();
4714
4715 return PreservedAnalyses::all();
4716}
4717
4718namespace {
4719
4720struct OpenMPOptCGSCCLegacyPass : public CallGraphSCCPass {
4721 CallGraphUpdater CGUpdater;
4722 static char ID;
4723
4724 OpenMPOptCGSCCLegacyPass() : CallGraphSCCPass(ID) {
4725 initializeOpenMPOptCGSCCLegacyPassPass(*PassRegistry::getPassRegistry());
4726 }
4727
4728 void getAnalysisUsage(AnalysisUsage &AU) const override {
4729 CallGraphSCCPass::getAnalysisUsage(AU);
4730 }
4731
4732 bool runOnSCC(CallGraphSCC &CGSCC) override {
4733 if (!containsOpenMP(CGSCC.getCallGraph().getModule()))
4734 return false;
4735 if (DisableOpenMPOptimizations || skipSCC(CGSCC))
4736 return false;
4737
4738 SmallVector<Function *, 16> SCC;
4739 // If there are kernels in the module, we have to run on all SCC's.
4740 for (CallGraphNode *CGN : CGSCC) {
4741 Function *Fn = CGN->getFunction();
4742 if (!Fn || Fn->isDeclaration())
4743 continue;
4744 SCC.push_back(Fn);
4745 }
4746
4747 if (SCC.empty())
4748 return false;
4749
4750 Module &M = CGSCC.getCallGraph().getModule();
4751 KernelSet Kernels = getDeviceKernels(M);
4752
4753 CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
4754 CGUpdater.initialize(CG, CGSCC);
4755
4756 // Maintain a map of functions to avoid rebuilding the ORE
4757 DenseMap<Function *, std::unique_ptr<OptimizationRemarkEmitter>> OREMap;
4758 auto OREGetter = [&OREMap](Function *F) -> OptimizationRemarkEmitter & {
4759 std::unique_ptr<OptimizationRemarkEmitter> &ORE = OREMap[F];
4760 if (!ORE)
4761 ORE = std::make_unique<OptimizationRemarkEmitter>(F);
4762 return *ORE;
4763 };
4764
4765 AnalysisGetter AG;
4766 SetVector<Function *> Functions(SCC.begin(), SCC.end());
4767 BumpPtrAllocator Allocator;
4768 OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG,
4769 Allocator,
4770 /*CGSCC*/ Functions, Kernels);
4771
4772 unsigned MaxFixpointIterations =
4773 (isOpenMPDevice(M)) ? SetFixpointIterations : 32;
4774 Attributor A(Functions, InfoCache, CGUpdater, nullptr, false, true,
4775 MaxFixpointIterations, OREGetter, DEBUG_TYPE"openmp-opt");
4776
4777 OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A);
4778 bool Result = OMPOpt.run(false);
4779
4780 if (PrintModuleAfterOptimizations)
4781 LLVM_DEBUG(dbgs() << TAG << "Module after OpenMPOpt CGSCC Pass:\n" << M)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("openmp-opt")) { dbgs() << TAG << "Module after OpenMPOpt CGSCC Pass:\n"
<< M; } } while (false)
;
4782
4783 return Result;
4784 }
4785
4786 bool doFinalization(CallGraph &CG) override { return CGUpdater.finalize(); }
4787};
4788
4789} // end anonymous namespace
4790
4791KernelSet llvm::omp::getDeviceKernels(Module &M) {
4792 // TODO: Create a more cross-platform way of determining device kernels.
4793 NamedMDNode *MD = M.getOrInsertNamedMetadata("nvvm.annotations");
4794 KernelSet Kernels;
4795
4796 if (!MD)
4797 return Kernels;
4798
4799 for (auto *Op : MD->operands()) {
4800 if (Op->getNumOperands() < 2)
4801 continue;
4802 MDString *KindID = dyn_cast<MDString>(Op->getOperand(1));
4803 if (!KindID || KindID->getString() != "kernel")
4804 continue;
4805
4806 Function *KernelFn =
4807 mdconst::dyn_extract_or_null<Function>(Op->getOperand(0));
4808 if (!KernelFn)
4809 continue;
4810
4811 ++NumOpenMPTargetRegionKernels;
4812
4813 Kernels.insert(KernelFn);
4814 }
4815
4816 return Kernels;
4817}
4818
4819bool llvm::omp::containsOpenMP(Module &M) {
4820 Metadata *MD = M.getModuleFlag("openmp");
4821 if (!MD)
4822 return false;
4823
4824 return true;
4825}
4826
4827bool llvm::omp::isOpenMPDevice(Module &M) {
4828 Metadata *MD = M.getModuleFlag("openmp-device");
4829 if (!MD)
4830 return false;
4831
4832 return true;
4833}
4834
4835char OpenMPOptCGSCCLegacyPass::ID = 0;
4836
4837INITIALIZE_PASS_BEGIN(OpenMPOptCGSCCLegacyPass, "openmp-opt-cgscc",static void *initializeOpenMPOptCGSCCLegacyPassPassOnce(PassRegistry
&Registry) {
4838 "OpenMP specific optimizations", false, false)static void *initializeOpenMPOptCGSCCLegacyPassPassOnce(PassRegistry
&Registry) {
4839INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)initializeCallGraphWrapperPassPass(Registry);
4840INITIALIZE_PASS_END(OpenMPOptCGSCCLegacyPass, "openmp-opt-cgscc",PassInfo *PI = new PassInfo( "OpenMP specific optimizations",
"openmp-opt-cgscc", &OpenMPOptCGSCCLegacyPass::ID, PassInfo
::NormalCtor_t(callDefaultCtor<OpenMPOptCGSCCLegacyPass>
), false, false); Registry.registerPass(*PI, true); return PI
; } static llvm::once_flag InitializeOpenMPOptCGSCCLegacyPassPassFlag
; void llvm::initializeOpenMPOptCGSCCLegacyPassPass(PassRegistry
&Registry) { llvm::call_once(InitializeOpenMPOptCGSCCLegacyPassPassFlag
, initializeOpenMPOptCGSCCLegacyPassPassOnce, std::ref(Registry
)); }
4841 "OpenMP specific optimizations", false, false)PassInfo *PI = new PassInfo( "OpenMP specific optimizations",
"openmp-opt-cgscc", &OpenMPOptCGSCCLegacyPass::ID, PassInfo
::NormalCtor_t(callDefaultCtor<OpenMPOptCGSCCLegacyPass>
), false, false); Registry.registerPass(*PI, true); return PI
; } static llvm::once_flag InitializeOpenMPOptCGSCCLegacyPassPassFlag
; void llvm::initializeOpenMPOptCGSCCLegacyPassPass(PassRegistry
&Registry) { llvm::call_once(InitializeOpenMPOptCGSCCLegacyPassPassFlag
, initializeOpenMPOptCGSCCLegacyPassPassOnce, std::ref(Registry
)); }
4842
4843Pass *llvm::createOpenMPOptCGSCCLegacyPass() {
4844 return new OpenMPOptCGSCCLegacyPass();
4845}

/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/include/llvm/Transforms/IPO/Attributor.h

1//===- Attributor.h --- Module-wide attribute deduction ---------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Attributor: An inter procedural (abstract) "attribute" deduction framework.
10//
11// The Attributor framework is an inter procedural abstract analysis (fixpoint
12// iteration analysis). The goal is to allow easy deduction of new attributes as
13// well as information exchange between abstract attributes in-flight.
14//
15// The Attributor class is the driver and the link between the various abstract
16// attributes. The Attributor will iterate until a fixpoint state is reached by
17// all abstract attributes in-flight, or until it will enforce a pessimistic fix
18// point because an iteration limit is reached.
19//
20// Abstract attributes, derived from the AbstractAttribute class, actually
21// describe properties of the code. They can correspond to actual LLVM-IR
22// attributes, or they can be more general, ultimately unrelated to LLVM-IR
23// attributes. The latter is useful when an abstract attributes provides
24// information to other abstract attributes in-flight but we might not want to
25// manifest the information. The Attributor allows to query in-flight abstract
26// attributes through the `Attributor::getAAFor` method (see the method
27// description for an example). If the method is used by an abstract attribute
28// P, and it results in an abstract attribute Q, the Attributor will
29// automatically capture a potential dependence from Q to P. This dependence
30// will cause P to be reevaluated whenever Q changes in the future.
31//
32// The Attributor will only reevaluate abstract attributes that might have
33// changed since the last iteration. That means that the Attribute will not
34// revisit all instructions/blocks/functions in the module but only query
35// an update from a subset of the abstract attributes.
36//
37// The update method `AbstractAttribute::updateImpl` is implemented by the
38// specific "abstract attribute" subclasses. The method is invoked whenever the
39// currently assumed state (see the AbstractState class) might not be valid
40// anymore. This can, for example, happen if the state was dependent on another
41// abstract attribute that changed. In every invocation, the update method has
42// to adjust the internal state of an abstract attribute to a point that is
43// justifiable by the underlying IR and the current state of abstract attributes
44// in-flight. Since the IR is given and assumed to be valid, the information
45// derived from it can be assumed to hold. However, information derived from
46// other abstract attributes is conditional on various things. If the justifying
47// state changed, the `updateImpl` has to revisit the situation and potentially
48// find another justification or limit the optimistic assumes made.
49//
50// Change is the key in this framework. Until a state of no-change, thus a
51// fixpoint, is reached, the Attributor will query the abstract attributes
52// in-flight to re-evaluate their state. If the (current) state is too
53// optimistic, hence it cannot be justified anymore through other abstract
54// attributes or the state of the IR, the state of the abstract attribute will
55// have to change. Generally, we assume abstract attribute state to be a finite
56// height lattice and the update function to be monotone. However, these
57// conditions are not enforced because the iteration limit will guarantee
58// termination. If an optimistic fixpoint is reached, or a pessimistic fix
59// point is enforced after a timeout, the abstract attributes are tasked to
60// manifest their result in the IR for passes to come.
61//
62// Attribute manifestation is not mandatory. If desired, there is support to
63// generate a single or multiple LLVM-IR attributes already in the helper struct
64// IRAttribute. In the simplest case, a subclass inherits from IRAttribute with
65// a proper Attribute::AttrKind as template parameter. The Attributor
66// manifestation framework will then create and place a new attribute if it is
67// allowed to do so (based on the abstract state). Other use cases can be
68// achieved by overloading AbstractAttribute or IRAttribute methods.
69//
70//
71// The "mechanics" of adding a new "abstract attribute":
72// - Define a class (transitively) inheriting from AbstractAttribute and one
73// (which could be the same) that (transitively) inherits from AbstractState.
74// For the latter, consider the already available BooleanState and
75// {Inc,Dec,Bit}IntegerState if they fit your needs, e.g., you require only a
76// number tracking or bit-encoding.
77// - Implement all pure methods. Also use overloading if the attribute is not
78// conforming with the "default" behavior: A (set of) LLVM-IR attribute(s) for
79// an argument, call site argument, function return value, or function. See
80// the class and method descriptions for more information on the two
81// "Abstract" classes and their respective methods.
82// - Register opportunities for the new abstract attribute in the
83// `Attributor::identifyDefaultAbstractAttributes` method if it should be
84// counted as a 'default' attribute.
85// - Add sufficient tests.
86// - Add a Statistics object for bookkeeping. If it is a simple (set of)
87// attribute(s) manifested through the Attributor manifestation framework, see
88// the bookkeeping function in Attributor.cpp.
89// - If instructions with a certain opcode are interesting to the attribute, add
90// that opcode to the switch in `Attributor::identifyAbstractAttributes`. This
91// will make it possible to query all those instructions through the
92// `InformationCache::getOpcodeInstMapForFunction` interface and eliminate the
93// need to traverse the IR repeatedly.
94//
95//===----------------------------------------------------------------------===//
96
97#ifndef LLVM_TRANSFORMS_IPO_ATTRIBUTOR_H
98#define LLVM_TRANSFORMS_IPO_ATTRIBUTOR_H
99
100#include "llvm/ADT/DenseSet.h"
101#include "llvm/ADT/GraphTraits.h"
102#include "llvm/ADT/MapVector.h"
103#include "llvm/ADT/STLExtras.h"
104#include "llvm/ADT/SetOperations.h"
105#include "llvm/ADT/SetVector.h"
106#include "llvm/ADT/Triple.h"
107#include "llvm/ADT/iterator.h"
108#include "llvm/Analysis/AssumeBundleQueries.h"
109#include "llvm/Analysis/CFG.h"
110#include "llvm/Analysis/CGSCCPassManager.h"
111#include "llvm/Analysis/LazyCallGraph.h"
112#include "llvm/Analysis/LoopInfo.h"
113#include "llvm/Analysis/MustExecute.h"
114#include "llvm/Analysis/OptimizationRemarkEmitter.h"
115#include "llvm/Analysis/PostDominators.h"
116#include "llvm/Analysis/TargetLibraryInfo.h"
117#include "llvm/IR/AbstractCallSite.h"
118#include "llvm/IR/ConstantRange.h"
119#include "llvm/IR/PassManager.h"
120#include "llvm/Support/Allocator.h"
121#include "llvm/Support/Casting.h"
122#include "llvm/Support/GraphWriter.h"
123#include "llvm/Support/TimeProfiler.h"
124#include "llvm/Transforms/Utils/CallGraphUpdater.h"
125
126namespace llvm {
127
128struct AADepGraphNode;
129struct AADepGraph;
130struct Attributor;
131struct AbstractAttribute;
132struct InformationCache;
133struct AAIsDead;
134struct AttributorCallGraph;
135
136class AAManager;
137class AAResults;
138class Function;
139
140/// Abstract Attribute helper functions.
141namespace AA {
142
143/// Return true if \p V is dynamically unique, that is, there are no two
144/// "instances" of \p V at runtime with different values.
145bool isDynamicallyUnique(Attributor &A, const AbstractAttribute &QueryingAA,
146 const Value &V);
147
148/// Return true if \p V is a valid value in \p Scope, that is a constant or an
149/// instruction/argument of \p Scope.
150bool isValidInScope(const Value &V, const Function *Scope);
151
152/// Return true if \p V is a valid value at position \p CtxI, that is a
153/// constant, an argument of the same function as \p CtxI, or an instruction in
154/// that function that dominates \p CtxI.
155bool isValidAtPosition(const Value &V, const Instruction &CtxI,
156 InformationCache &InfoCache);
157
158/// Try to convert \p V to type \p Ty without introducing new instructions. If
159/// this is not possible return `nullptr`. Note: this function basically knows
160/// how to cast various constants.
161Value *getWithType(Value &V, Type &Ty);
162
163/// Return the combination of \p A and \p B such that the result is a possible
164/// value of both. \p B is potentially casted to match the type \p Ty or the
165/// type of \p A if \p Ty is null.
166///
167/// Examples:
168/// X + none => X
169/// not_none + undef => not_none
170/// V1 + V2 => nullptr
171Optional<Value *>
172combineOptionalValuesInAAValueLatice(const Optional<Value *> &A,
173 const Optional<Value *> &B, Type *Ty);
174
175/// Return the initial value of \p Obj with type \p Ty if that is a constant.
176Constant *getInitialValueForObj(Value &Obj, Type &Ty);
177
178/// Collect all potential underlying objects of \p Ptr at position \p CtxI in
179/// \p Objects. Assumed information is used and dependences onto \p QueryingAA
180/// are added appropriately.
181///
182/// \returns True if \p Objects contains all assumed underlying objects, and
183/// false if something went wrong and the objects could not be
184/// determined.
185bool getAssumedUnderlyingObjects(Attributor &A, const Value &Ptr,
186 SmallVectorImpl<Value *> &Objects,
187 const AbstractAttribute &QueryingAA,
188 const Instruction *CtxI);
189
190/// Collect all potential values of the one stored by \p SI into
191/// \p PotentialCopies. That is, the only copies that were made via the
192/// store are assumed to be known and all in \p PotentialCopies. Dependences
193/// onto \p QueryingAA are properly tracked, \p UsedAssumedInformation will
194/// inform the caller if assumed information was used.
195///
196/// \returns True if the assumed potential copies are all in \p PotentialCopies,
197/// false if something went wrong and the copies could not be
198/// determined.
199bool getPotentialCopiesOfStoredValue(
200 Attributor &A, StoreInst &SI, SmallSetVector<Value *, 4> &PotentialCopies,
201 const AbstractAttribute &QueryingAA, bool &UsedAssumedInformation);
202
203} // namespace AA
204
205/// The value passed to the line option that defines the maximal initialization
206/// chain length.
207extern unsigned MaxInitializationChainLength;
208
209///{
210enum class ChangeStatus {
211 CHANGED,
212 UNCHANGED,
213};
214
215ChangeStatus operator|(ChangeStatus l, ChangeStatus r);
216ChangeStatus &operator|=(ChangeStatus &l, ChangeStatus r);
217ChangeStatus operator&(ChangeStatus l, ChangeStatus r);
218ChangeStatus &operator&=(ChangeStatus &l, ChangeStatus r);
219
220enum class DepClassTy {
221 REQUIRED, ///< The target cannot be valid if the source is not.
222 OPTIONAL, ///< The target may be valid if the source is not.
223 NONE, ///< Do not track a dependence between source and target.
224};
225///}
226
227/// The data structure for the nodes of a dependency graph
228struct AADepGraphNode {
229public:
230 virtual ~AADepGraphNode(){};
231 using DepTy = PointerIntPair<AADepGraphNode *, 1>;
232
233protected:
234 /// Set of dependency graph nodes which should be updated if this one
235 /// is updated. The bit encodes if it is optional.
236 TinyPtrVector<DepTy> Deps;
237
238 static AADepGraphNode *DepGetVal(DepTy &DT) { return DT.getPointer(); }
239 static AbstractAttribute *DepGetValAA(DepTy &DT) {
240 return cast<AbstractAttribute>(DT.getPointer());
241 }
242
243 operator AbstractAttribute *() { return cast<AbstractAttribute>(this); }
244
245public:
246 using iterator =
247 mapped_iterator<TinyPtrVector<DepTy>::iterator, decltype(&DepGetVal)>;
248 using aaiterator =
249 mapped_iterator<TinyPtrVector<DepTy>::iterator, decltype(&DepGetValAA)>;
250
251 aaiterator begin() { return aaiterator(Deps.begin(), &DepGetValAA); }
252 aaiterator end() { return aaiterator(Deps.end(), &DepGetValAA); }
253 iterator child_begin() { return iterator(Deps.begin(), &DepGetVal); }
254 iterator child_end() { return iterator(Deps.end(), &DepGetVal); }
255
256 virtual void print(raw_ostream &OS) const { OS << "AADepNode Impl\n"; }
257 TinyPtrVector<DepTy> &getDeps() { return Deps; }
258
259 friend struct Attributor;
260 friend struct AADepGraph;
261};
262
263/// The data structure for the dependency graph
264///
265/// Note that in this graph if there is an edge from A to B (A -> B),
266/// then it means that B depends on A, and when the state of A is
267/// updated, node B should also be updated
268struct AADepGraph {
269 AADepGraph() {}
270 ~AADepGraph() {}
271
272 using DepTy = AADepGraphNode::DepTy;
273 static AADepGraphNode *DepGetVal(DepTy &DT) { return DT.getPointer(); }
274 using iterator =
275 mapped_iterator<TinyPtrVector<DepTy>::iterator, decltype(&DepGetVal)>;
276
277 /// There is no root node for the dependency graph. But the SCCIterator
278 /// requires a single entry point, so we maintain a fake("synthetic") root
279 /// node that depends on every node.
280 AADepGraphNode SyntheticRoot;
281 AADepGraphNode *GetEntryNode() { return &SyntheticRoot; }
282
283 iterator begin() { return SyntheticRoot.child_begin(); }
284 iterator end() { return SyntheticRoot.child_end(); }
285
286 void viewGraph();
287
288 /// Dump graph to file
289 void dumpGraph();
290
291 /// Print dependency graph
292 void print();
293};
294
295/// Helper to describe and deal with positions in the LLVM-IR.
296///
297/// A position in the IR is described by an anchor value and an "offset" that
298/// could be the argument number, for call sites and arguments, or an indicator
299/// of the "position kind". The kinds, specified in the Kind enum below, include
300/// the locations in the attribute list, i.a., function scope and return value,
301/// as well as a distinction between call sites and functions. Finally, there
302/// are floating values that do not have a corresponding attribute list
303/// position.
304struct IRPosition {
305 // NOTE: In the future this definition can be changed to support recursive
306 // functions.
307 using CallBaseContext = CallBase;
308
309 /// The positions we distinguish in the IR.
310 enum Kind : char {
311 IRP_INVALID, ///< An invalid position.
312 IRP_FLOAT, ///< A position that is not associated with a spot suitable
313 ///< for attributes. This could be any value or instruction.
314 IRP_RETURNED, ///< An attribute for the function return value.
315 IRP_CALL_SITE_RETURNED, ///< An attribute for a call site return value.
316 IRP_FUNCTION, ///< An attribute for a function (scope).
317 IRP_CALL_SITE, ///< An attribute for a call site (function scope).
318 IRP_ARGUMENT, ///< An attribute for a function argument.
319 IRP_CALL_SITE_ARGUMENT, ///< An attribute for a call site argument.
320 };
321
322 /// Default constructor available to create invalid positions implicitly. All
323 /// other positions need to be created explicitly through the appropriate
324 /// static member function.
325 IRPosition() : Enc(nullptr, ENC_VALUE) { verify(); }
326
327 /// Create a position describing the value of \p V.
328 static const IRPosition value(const Value &V,
329 const CallBaseContext *CBContext = nullptr) {
330 if (auto *Arg = dyn_cast<Argument>(&V))
331 return IRPosition::argument(*Arg, CBContext);
332 if (auto *CB = dyn_cast<CallBase>(&V))
333 return IRPosition::callsite_returned(*CB);
334 return IRPosition(const_cast<Value &>(V), IRP_FLOAT, CBContext);
335 }
336
337 /// Create a position describing the function scope of \p F.
338 /// \p CBContext is used for call base specific analysis.
339 static const IRPosition function(const Function &F,
340 const CallBaseContext *CBContext = nullptr) {
341 return IRPosition(const_cast<Function &>(F), IRP_FUNCTION, CBContext);
342 }
343
344 /// Create a position describing the returned value of \p F.
345 /// \p CBContext is used for call base specific analysis.
346 static const IRPosition returned(const Function &F,
347 const CallBaseContext *CBContext = nullptr) {
348 return IRPosition(const_cast<Function &>(F), IRP_RETURNED, CBContext);
349 }
350
351 /// Create a position describing the argument \p Arg.
352 /// \p CBContext is used for call base specific analysis.
353 static const IRPosition argument(const Argument &Arg,
354 const CallBaseContext *CBContext = nullptr) {
355 return IRPosition(const_cast<Argument &>(Arg), IRP_ARGUMENT, CBContext);
356 }
357
358 /// Create a position describing the function scope of \p CB.
359 static const IRPosition callsite_function(const CallBase &CB) {
360 return IRPosition(const_cast<CallBase &>(CB), IRP_CALL_SITE);
361 }
362
363 /// Create a position describing the returned value of \p CB.
364 static const IRPosition callsite_returned(const CallBase &CB) {
365 return IRPosition(const_cast<CallBase &>(CB), IRP_CALL_SITE_RETURNED);
366 }
367
368 /// Create a position describing the argument of \p CB at position \p ArgNo.
369 static const IRPosition callsite_argument(const CallBase &CB,
370 unsigned ArgNo) {
371 return IRPosition(const_cast<Use &>(CB.getArgOperandUse(ArgNo)),
372 IRP_CALL_SITE_ARGUMENT);
373 }
374
375 /// Create a position describing the argument of \p ACS at position \p ArgNo.
376 static const IRPosition callsite_argument(AbstractCallSite ACS,
377 unsigned ArgNo) {
378 if (ACS.getNumArgOperands() <= ArgNo)
379 return IRPosition();
380 int CSArgNo = ACS.getCallArgOperandNo(ArgNo);
381 if (CSArgNo >= 0)
382 return IRPosition::callsite_argument(
383 cast<CallBase>(*ACS.getInstruction()), CSArgNo);
384 return IRPosition();
385 }
386
387 /// Create a position with function scope matching the "context" of \p IRP.
388 /// If \p IRP is a call site (see isAnyCallSitePosition()) then the result
389 /// will be a call site position, otherwise the function position of the
390 /// associated function.
391 static const IRPosition
392 function_scope(const IRPosition &IRP,
393 const CallBaseContext *CBContext = nullptr) {
394 if (IRP.isAnyCallSitePosition()) {
395 return IRPosition::callsite_function(
396 cast<CallBase>(IRP.getAnchorValue()));
397 }
398 assert(IRP.getAssociatedFunction())(static_cast <bool> (IRP.getAssociatedFunction()) ? void
(0) : __assert_fail ("IRP.getAssociatedFunction()", "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/include/llvm/Transforms/IPO/Attributor.h"
, 398, __extension__ __PRETTY_FUNCTION__))
;
399 return IRPosition::function(*IRP.getAssociatedFunction(), CBContext);
400 }
401
402 bool operator==(const IRPosition &RHS) const {
403 return Enc == RHS.Enc && RHS.CBContext == CBContext;
404 }
405 bool operator!=(const IRPosition &RHS) const { return !(*this == RHS); }
406
407 /// Return the value this abstract attribute is anchored with.
408 ///
409 /// The anchor value might not be the associated value if the latter is not
410 /// sufficient to determine where arguments will be manifested. This is, so
411 /// far, only the case for call site arguments as the value is not sufficient
412 /// to pinpoint them. Instead, we can use the call site as an anchor.
413 Value &getAnchorValue() const {
414 switch (getEncodingBits()) {
415 case ENC_VALUE:
416 case ENC_RETURNED_VALUE:
417 case ENC_FLOATING_FUNCTION:
418 return *getAsValuePtr();
419 case ENC_CALL_SITE_ARGUMENT_USE:
420 return *(getAsUsePtr()->getUser());
421 default:
422 llvm_unreachable("Unkown encoding!")::llvm::llvm_unreachable_internal("Unkown encoding!", "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/include/llvm/Transforms/IPO/Attributor.h"
, 422)
;
423 };
424 }
425
426 /// Return the associated function, if any.
427 Function *getAssociatedFunction() const {
428 if (auto *CB = dyn_cast<CallBase>(&getAnchorValue())) {
429 // We reuse the logic that associates callback calles to arguments of a
430 // call site here to identify the callback callee as the associated
431 // function.
432 if (Argument *Arg = getAssociatedArgument())
433 return Arg->getParent();
434 return CB->getCalledFunction();
435 }
436 return getAnchorScope();
437 }
438
439 /// Return the associated argument, if any.
440 Argument *getAssociatedArgument() const;
441
442 /// Return true if the position refers to a function interface, that is the
443 /// function scope, the function return, or an argument.
444 bool isFnInterfaceKind() const {
445 switch (getPositionKind()) {
446 case IRPosition::IRP_FUNCTION:
447 case IRPosition::IRP_RETURNED:
448 case IRPosition::IRP_ARGUMENT:
449 return true;
450 default:
451 return false;
452 }
453 }
454
455 /// Return the Function surrounding the anchor value.
456 Function *getAnchorScope() const {
457 Value &V = getAnchorValue();
458 if (isa<Function>(V))
459 return &cast<Function>(V);
460 if (isa<Argument>(V))
461 return cast<Argument>(V).getParent();
462 if (isa<Instruction>(V))
463 return cast<Instruction>(V).getFunction();
464 return nullptr;
465 }
466
467 /// Return the context instruction, if any.
468 Instruction *getCtxI() const {
469 Value &V = getAnchorValue();
470 if (auto *I = dyn_cast<Instruction>(&V))
471 return I;
472 if (auto *Arg = dyn_cast<Argument>(&V))
473 if (!Arg->getParent()->isDeclaration())
474 return &Arg->getParent()->getEntryBlock().front();
475 if (auto *F = dyn_cast<Function>(&V))
476 if (!F->isDeclaration())
477 return &(F->getEntryBlock().front());
478 return nullptr;
479 }
480
481 /// Return the value this abstract attribute is associated with.
482 Value &getAssociatedValue() const {
483 if (getCallSiteArgNo() < 0 || isa<Argument>(&getAnchorValue()))
484 return getAnchorValue();
485 assert(isa<CallBase>(&getAnchorValue()) && "Expected a call base!")(static_cast <bool> (isa<CallBase>(&getAnchorValue
()) && "Expected a call base!") ? void (0) : __assert_fail
("isa<CallBase>(&getAnchorValue()) && \"Expected a call base!\""
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/include/llvm/Transforms/IPO/Attributor.h"
, 485, __extension__ __PRETTY_FUNCTION__))
;
486 return *cast<CallBase>(&getAnchorValue())
487 ->getArgOperand(getCallSiteArgNo());
488 }
489
490 /// Return the type this abstract attribute is associated with.
491 Type *getAssociatedType() const {
492 if (getPositionKind() == IRPosition::IRP_RETURNED)
493 return getAssociatedFunction()->getReturnType();
494 return getAssociatedValue().getType();
495 }
496
497 /// Return the callee argument number of the associated value if it is an
498 /// argument or call site argument, otherwise a negative value. In contrast to
499 /// `getCallSiteArgNo` this method will always return the "argument number"
500 /// from the perspective of the callee. This may not the same as the call site
501 /// if this is a callback call.
502 int getCalleeArgNo() const {
503 return getArgNo(/* CallbackCalleeArgIfApplicable */ true);
504 }
505
506 /// Return the call site argument number of the associated value if it is an
507 /// argument or call site argument, otherwise a negative value. In contrast to
508 /// `getCalleArgNo` this method will always return the "operand number" from
509 /// the perspective of the call site. This may not the same as the callee
510 /// perspective if this is a callback call.
511 int getCallSiteArgNo() const {
512 return getArgNo(/* CallbackCalleeArgIfApplicable */ false);
513 }
514
515 /// Return the index in the attribute list for this position.
516 unsigned getAttrIdx() const {
517 switch (getPositionKind()) {
518 case IRPosition::IRP_INVALID:
519 case IRPosition::IRP_FLOAT:
520 break;
521 case IRPosition::IRP_FUNCTION:
522 case IRPosition::IRP_CALL_SITE:
523 return AttributeList::FunctionIndex;
524 case IRPosition::IRP_RETURNED:
525 case IRPosition::IRP_CALL_SITE_RETURNED:
526 return AttributeList::ReturnIndex;
527 case IRPosition::IRP_ARGUMENT:
528 case IRPosition::IRP_CALL_SITE_ARGUMENT:
529 return getCallSiteArgNo() + AttributeList::FirstArgIndex;
530 }
531 llvm_unreachable(::llvm::llvm_unreachable_internal("There is no attribute index for a floating or invalid position!"
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/include/llvm/Transforms/IPO/Attributor.h"
, 532)
532 "There is no attribute index for a floating or invalid position!")::llvm::llvm_unreachable_internal("There is no attribute index for a floating or invalid position!"
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/include/llvm/Transforms/IPO/Attributor.h"
, 532)
;
533 }
534
535 /// Return the associated position kind.
536 Kind getPositionKind() const {
537 char EncodingBits = getEncodingBits();
538 if (EncodingBits == ENC_CALL_SITE_ARGUMENT_USE)
539 return IRP_CALL_SITE_ARGUMENT;
540 if (EncodingBits == ENC_FLOATING_FUNCTION)
541 return IRP_FLOAT;
542
543 Value *V = getAsValuePtr();
544 if (!V)
545 return IRP_INVALID;
546 if (isa<Argument>(V))
547 return IRP_ARGUMENT;
548 if (isa<Function>(V))
549 return isReturnPosition(EncodingBits) ? IRP_RETURNED : IRP_FUNCTION;
550 if (isa<CallBase>(V))
551 return isReturnPosition(EncodingBits) ? IRP_CALL_SITE_RETURNED
552 : IRP_CALL_SITE;
553 return IRP_FLOAT;
554 }
555
556 /// TODO: Figure out if the attribute related helper functions should live
557 /// here or somewhere else.
558
559 /// Return true if any kind in \p AKs existing in the IR at a position that
560 /// will affect this one. See also getAttrs(...).
561 /// \param IgnoreSubsumingPositions Flag to determine if subsuming positions,
562 /// e.g., the function position if this is an
563 /// argument position, should be ignored.
564 bool hasAttr(ArrayRef<Attribute::AttrKind> AKs,
565 bool IgnoreSubsumingPositions = false,
566 Attributor *A = nullptr) const;
567
568 /// Return the attributes of any kind in \p AKs existing in the IR at a
569 /// position that will affect this one. While each position can only have a
570 /// single attribute of any kind in \p AKs, there are "subsuming" positions
571 /// that could have an attribute as well. This method returns all attributes
572 /// found in \p Attrs.
573 /// \param IgnoreSubsumingPositions Flag to determine if subsuming positions,
574 /// e.g., the function position if this is an
575 /// argument position, should be ignored.
576 void getAttrs(ArrayRef<Attribute::AttrKind> AKs,
577 SmallVectorImpl<Attribute> &Attrs,
578 bool IgnoreSubsumingPositions = false,
579 Attributor *A = nullptr) const;
580
581 /// Remove the attribute of kind \p AKs existing in the IR at this position.
582 void removeAttrs(ArrayRef<Attribute::AttrKind> AKs) const {
583 if (getPositionKind() == IRP_INVALID || getPositionKind() == IRP_FLOAT)
584 return;
585
586 AttributeList AttrList;
587 auto *CB = dyn_cast<CallBase>(&getAnchorValue());
588 if (CB)
589 AttrList = CB->getAttributes();
590 else
591 AttrList = getAssociatedFunction()->getAttributes();
592
593 LLVMContext &Ctx = getAnchorValue().getContext();
594 for (Attribute::AttrKind AK : AKs)
595 AttrList = AttrList.removeAttributeAtIndex(Ctx, getAttrIdx(), AK);
596
597 if (CB)
598 CB->setAttributes(AttrList);
599 else
600 getAssociatedFunction()->setAttributes(AttrList);
601 }
602
603 bool isAnyCallSitePosition() const {
604 switch (getPositionKind()) {
605 case IRPosition::IRP_CALL_SITE:
606 case IRPosition::IRP_CALL_SITE_RETURNED:
607 case IRPosition::IRP_CALL_SITE_ARGUMENT:
608 return true;
609 default:
610 return false;
611 }
612 }
613
614 /// Return true if the position is an argument or call site argument.
615 bool isArgumentPosition() const {
616 switch (getPositionKind()) {
617 case IRPosition::IRP_ARGUMENT:
618 case IRPosition::IRP_CALL_SITE_ARGUMENT:
619 return true;
620 default:
621 return false;
622 }
623 }
624
625 /// Return the same position without the call base context.
626 IRPosition stripCallBaseContext() const {
627 IRPosition Result = *this;
628 Result.CBContext = nullptr;
629 return Result;
630 }
631
632 /// Get the call base context from the position.
633 const CallBaseContext *getCallBaseContext() const { return CBContext; }
634
635 /// Check if the position has any call base context.
636 bool hasCallBaseContext() const { return CBContext != nullptr; }
637
638 /// Special DenseMap key values.
639 ///
640 ///{
641 static const IRPosition EmptyKey;
642 static const IRPosition TombstoneKey;
643 ///}
644
645 /// Conversion into a void * to allow reuse of pointer hashing.
646 operator void *() const { return Enc.getOpaqueValue(); }
647
648private:
649 /// Private constructor for special values only!
650 explicit IRPosition(void *Ptr, const CallBaseContext *CBContext = nullptr)
651 : CBContext(CBContext) {
652 Enc.setFromOpaqueValue(Ptr);
653 }
654
655 /// IRPosition anchored at \p AnchorVal with kind/argument numbet \p PK.
656 explicit IRPosition(Value &AnchorVal, Kind PK,
657 const CallBaseContext *CBContext = nullptr)
658 : CBContext(CBContext) {
659 switch (PK) {
660 case IRPosition::IRP_INVALID:
661 llvm_unreachable("Cannot create invalid IRP with an anchor value!")::llvm::llvm_unreachable_internal("Cannot create invalid IRP with an anchor value!"
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/include/llvm/Transforms/IPO/Attributor.h"
, 661)
;
662 break;
663 case IRPosition::IRP_FLOAT:
664 // Special case for floating functions.
665 if (isa<Function>(AnchorVal))
666 Enc = {&AnchorVal, ENC_FLOATING_FUNCTION};
667 else
668 Enc = {&AnchorVal, ENC_VALUE};
669 break;
670 case IRPosition::IRP_FUNCTION:
671 case IRPosition::IRP_CALL_SITE:
672 Enc = {&AnchorVal, ENC_VALUE};
673 break;
674 case IRPosition::IRP_RETURNED:
675 case IRPosition::IRP_CALL_SITE_RETURNED:
676 Enc = {&AnchorVal, ENC_RETURNED_VALUE};
677 break;
678 case IRPosition::IRP_ARGUMENT:
679 Enc = {&AnchorVal, ENC_VALUE};
680 break;
681 case IRPosition::IRP_CALL_SITE_ARGUMENT:
682 llvm_unreachable(::llvm::llvm_unreachable_internal("Cannot create call site argument IRP with an anchor value!"
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/include/llvm/Transforms/IPO/Attributor.h"
, 683)
683 "Cannot create call site argument IRP with an anchor value!")::llvm::llvm_unreachable_internal("Cannot create call site argument IRP with an anchor value!"
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/include/llvm/Transforms/IPO/Attributor.h"
, 683)
;
684 break;
685 }
686 verify();
687 }
688
689 /// Return the callee argument number of the associated value if it is an
690 /// argument or call site argument. See also `getCalleeArgNo` and
691 /// `getCallSiteArgNo`.
692 int getArgNo(bool CallbackCalleeArgIfApplicable) const {
693 if (CallbackCalleeArgIfApplicable)
694 if (Argument *Arg = getAssociatedArgument())
695 return Arg->getArgNo();
696 switch (getPositionKind()) {
697 case IRPosition::IRP_ARGUMENT:
698 return cast<Argument>(getAsValuePtr())->getArgNo();
699 case IRPosition::IRP_CALL_SITE_ARGUMENT: {
700 Use &U = *getAsUsePtr();
701 return cast<CallBase>(U.getUser())->getArgOperandNo(&U);
702 }
703 default:
704 return -1;
705 }
706 }
707
708 /// IRPosition for the use \p U. The position kind \p PK needs to be
709 /// IRP_CALL_SITE_ARGUMENT, the anchor value is the user, the associated value
710 /// the used value.
711 explicit IRPosition(Use &U, Kind PK) {
712 assert(PK == IRP_CALL_SITE_ARGUMENT &&(static_cast <bool> (PK == IRP_CALL_SITE_ARGUMENT &&
"Use constructor is for call site arguments only!") ? void (
0) : __assert_fail ("PK == IRP_CALL_SITE_ARGUMENT && \"Use constructor is for call site arguments only!\""
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/include/llvm/Transforms/IPO/Attributor.h"
, 713, __extension__ __PRETTY_FUNCTION__))
713 "Use constructor is for call site arguments only!")(static_cast <bool> (PK == IRP_CALL_SITE_ARGUMENT &&
"Use constructor is for call site arguments only!") ? void (
0) : __assert_fail ("PK == IRP_CALL_SITE_ARGUMENT && \"Use constructor is for call site arguments only!\""
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/include/llvm/Transforms/IPO/Attributor.h"
, 713, __extension__ __PRETTY_FUNCTION__))
;
714 Enc = {&U, ENC_CALL_SITE_ARGUMENT_USE};
715 verify();
716 }
717
718 /// Verify internal invariants.
719 void verify();
720
721 /// Return the attributes of kind \p AK existing in the IR as attribute.
722 bool getAttrsFromIRAttr(Attribute::AttrKind AK,
723 SmallVectorImpl<Attribute> &Attrs) const;
724
725 /// Return the attributes of kind \p AK existing in the IR as operand bundles
726 /// of an llvm.assume.
727 bool getAttrsFromAssumes(Attribute::AttrKind AK,
728 SmallVectorImpl<Attribute> &Attrs,
729 Attributor &A) const;
730
731 /// Return the underlying pointer as Value *, valid for all positions but
732 /// IRP_CALL_SITE_ARGUMENT.
733 Value *getAsValuePtr() const {
734 assert(getEncodingBits() != ENC_CALL_SITE_ARGUMENT_USE &&(static_cast <bool> (getEncodingBits() != ENC_CALL_SITE_ARGUMENT_USE
&& "Not a value pointer!") ? void (0) : __assert_fail
("getEncodingBits() != ENC_CALL_SITE_ARGUMENT_USE && \"Not a value pointer!\""
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/include/llvm/Transforms/IPO/Attributor.h"
, 735, __extension__ __PRETTY_FUNCTION__))
735 "Not a value pointer!")(static_cast <bool> (getEncodingBits() != ENC_CALL_SITE_ARGUMENT_USE
&& "Not a value pointer!") ? void (0) : __assert_fail
("getEncodingBits() != ENC_CALL_SITE_ARGUMENT_USE && \"Not a value pointer!\""
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/include/llvm/Transforms/IPO/Attributor.h"
, 735, __extension__ __PRETTY_FUNCTION__))
;
736 return reinterpret_cast<Value *>(Enc.getPointer());
737 }
738
739 /// Return the underlying pointer as Use *, valid only for
740 /// IRP_CALL_SITE_ARGUMENT positions.
741 Use *getAsUsePtr() const {
742 assert(getEncodingBits() == ENC_CALL_SITE_ARGUMENT_USE &&(static_cast <bool> (getEncodingBits() == ENC_CALL_SITE_ARGUMENT_USE
&& "Not a value pointer!") ? void (0) : __assert_fail
("getEncodingBits() == ENC_CALL_SITE_ARGUMENT_USE && \"Not a value pointer!\""
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/include/llvm/Transforms/IPO/Attributor.h"
, 743, __extension__ __PRETTY_FUNCTION__))
743 "Not a value pointer!")(static_cast <bool> (getEncodingBits() == ENC_CALL_SITE_ARGUMENT_USE
&& "Not a value pointer!") ? void (0) : __assert_fail
("getEncodingBits() == ENC_CALL_SITE_ARGUMENT_USE && \"Not a value pointer!\""
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/include/llvm/Transforms/IPO/Attributor.h"
, 743, __extension__ __PRETTY_FUNCTION__))
;
744 return reinterpret_cast<Use *>(Enc.getPointer());
745 }
746
747 /// Return true if \p EncodingBits describe a returned or call site returned
748 /// position.
749 static bool isReturnPosition(char EncodingBits) {
750 return EncodingBits == ENC_RETURNED_VALUE;
751 }
752
753 /// Return true if the encoding bits describe a returned or call site returned
754 /// position.
755 bool isReturnPosition() const { return isReturnPosition(getEncodingBits()); }
756
757 /// The encoding of the IRPosition is a combination of a pointer and two
758 /// encoding bits. The values of the encoding bits are defined in the enum
759 /// below. The pointer is either a Value* (for the first three encoding bit
760 /// combinations) or Use* (for ENC_CALL_SITE_ARGUMENT_USE).
761 ///
762 ///{
763 enum {
764 ENC_VALUE = 0b00,
765 ENC_RETURNED_VALUE = 0b01,
766 ENC_FLOATING_FUNCTION = 0b10,
767 ENC_CALL_SITE_ARGUMENT_USE = 0b11,
768 };
769
770 // Reserve the maximal amount of bits so there is no need to mask out the
771 // remaining ones. We will not encode anything else in the pointer anyway.
772 static constexpr int NumEncodingBits =
773 PointerLikeTypeTraits<void *>::NumLowBitsAvailable;
774 static_assert(NumEncodingBits >= 2, "At least two bits are required!");
775
776 /// The pointer with the encoding bits.
777 PointerIntPair<void *, NumEncodingBits, char> Enc;
778 ///}
779
780 /// Call base context. Used for callsite specific analysis.
781 const CallBaseContext *CBContext = nullptr;
782
783 /// Return the encoding bits.
784 char getEncodingBits() const { return Enc.getInt(); }
785};
786
787/// Helper that allows IRPosition as a key in a DenseMap.
788template <> struct DenseMapInfo<IRPosition> {
789 static inline IRPosition getEmptyKey() { return IRPosition::EmptyKey; }
790 static inline IRPosition getTombstoneKey() {
791 return IRPosition::TombstoneKey;
792 }
793 static unsigned getHashValue(const IRPosition &IRP) {
794 return (DenseMapInfo<void *>::getHashValue(IRP) << 4) ^
795 (DenseMapInfo<Value *>::getHashValue(IRP.getCallBaseContext()));
796 }
797
798 static bool isEqual(const IRPosition &a, const IRPosition &b) {
799 return a == b;
800 }
801};
802
803/// A visitor class for IR positions.
804///
805/// Given a position P, the SubsumingPositionIterator allows to visit "subsuming
806/// positions" wrt. attributes/information. Thus, if a piece of information
807/// holds for a subsuming position, it also holds for the position P.
808///
809/// The subsuming positions always include the initial position and then,
810/// depending on the position kind, additionally the following ones:
811/// - for IRP_RETURNED:
812/// - the function (IRP_FUNCTION)
813/// - for IRP_ARGUMENT:
814/// - the function (IRP_FUNCTION)
815/// - for IRP_CALL_SITE:
816/// - the callee (IRP_FUNCTION), if known
817/// - for IRP_CALL_SITE_RETURNED:
818/// - the callee (IRP_RETURNED), if known
819/// - the call site (IRP_FUNCTION)
820/// - the callee (IRP_FUNCTION), if known
821/// - for IRP_CALL_SITE_ARGUMENT:
822/// - the argument of the callee (IRP_ARGUMENT), if known
823/// - the callee (IRP_FUNCTION), if known
824/// - the position the call site argument is associated with if it is not
825/// anchored to the call site, e.g., if it is an argument then the argument
826/// (IRP_ARGUMENT)
827class SubsumingPositionIterator {
828 SmallVector<IRPosition, 4> IRPositions;
829 using iterator = decltype(IRPositions)::iterator;
830
831public:
832 SubsumingPositionIterator(const IRPosition &IRP);
833 iterator begin() { return IRPositions.begin(); }
834 iterator end() { return IRPositions.end(); }
835};
836
837/// Wrapper for FunctoinAnalysisManager.
838struct AnalysisGetter {
839 template <typename Analysis>
840 typename Analysis::Result *getAnalysis(const Function &F) {
841 if (!FAM || !F.getParent())
842 return nullptr;
843 return &FAM->getResult<Analysis>(const_cast<Function &>(F));
844 }
845
846 AnalysisGetter(FunctionAnalysisManager &FAM) : FAM(&FAM) {}
847 AnalysisGetter() {}
848
849private:
850 FunctionAnalysisManager *FAM = nullptr;
851};
852
853/// Data structure to hold cached (LLVM-IR) information.
854///
855/// All attributes are given an InformationCache object at creation time to
856/// avoid inspection of the IR by all of them individually. This default
857/// InformationCache will hold information required by 'default' attributes,
858/// thus the ones deduced when Attributor::identifyDefaultAbstractAttributes(..)
859/// is called.
860///
861/// If custom abstract attributes, registered manually through
862/// Attributor::registerAA(...), need more information, especially if it is not
863/// reusable, it is advised to inherit from the InformationCache and cast the
864/// instance down in the abstract attributes.
865struct InformationCache {
866 InformationCache(const Module &M, AnalysisGetter &AG,
867 BumpPtrAllocator &Allocator, SetVector<Function *> *CGSCC)
868 : DL(M.getDataLayout()), Allocator(Allocator),
869 Explorer(
870 /* ExploreInterBlock */ true, /* ExploreCFGForward */ true,
871 /* ExploreCFGBackward */ true,
872 /* LIGetter */
873 [&](const Function &F) { return AG.getAnalysis<LoopAnalysis>(F); },
874 /* DTGetter */
875 [&](const Function &F) {
876 return AG.getAnalysis<DominatorTreeAnalysis>(F);
877 },
878 /* PDTGetter */
879 [&](const Function &F) {
880 return AG.getAnalysis<PostDominatorTreeAnalysis>(F);
881 }),
882 AG(AG), CGSCC(CGSCC), TargetTriple(M.getTargetTriple()) {
883 if (CGSCC)
884 initializeModuleSlice(*CGSCC);
885 }
886
887 ~InformationCache() {
888 // The FunctionInfo objects are allocated via a BumpPtrAllocator, we call
889 // the destructor manually.
890 for (auto &It : FuncInfoMap)
891 It.getSecond()->~FunctionInfo();
892 }
893
894 /// Apply \p CB to all uses of \p F. If \p LookThroughConstantExprUses is
895 /// true, constant expression users are not given to \p CB but their uses are
896 /// traversed transitively.
897 template <typename CBTy>
898 static void foreachUse(Function &F, CBTy CB,
899 bool LookThroughConstantExprUses = true) {
900 SmallVector<Use *, 8> Worklist(make_pointer_range(F.uses()));
901
902 for (unsigned Idx = 0; Idx < Worklist.size(); ++Idx) {
903 Use &U = *Worklist[Idx];
904
905 // Allow use in constant bitcasts and simply look through them.
906 if (LookThroughConstantExprUses && isa<ConstantExpr>(U.getUser())) {
907 for (Use &CEU : cast<ConstantExpr>(U.getUser())->uses())
908 Worklist.push_back(&CEU);
909 continue;
910 }
911
912 CB(U);
913 }
914 }
915
916 /// Initialize the ModuleSlice member based on \p SCC. ModuleSlices contains
917 /// (a subset of) all functions that we can look at during this SCC traversal.
918 /// This includes functions (transitively) called from the SCC and the
919 /// (transitive) callers of SCC functions. We also can look at a function if
920 /// there is a "reference edge", i.a., if the function somehow uses (!=calls)
921 /// a function in the SCC or a caller of a function in the SCC.
922 void initializeModuleSlice(SetVector<Function *> &SCC) {
923 ModuleSlice.insert(SCC.begin(), SCC.end());
924
925 SmallPtrSet<Function *, 16> Seen;
926 SmallVector<Function *, 16> Worklist(SCC.begin(), SCC.end());
927 while (!Worklist.empty()) {
928 Function *F = Worklist.pop_back_val();
929 ModuleSlice.insert(F);
930
931 for (Instruction &I : instructions(*F))
932 if (auto *CB = dyn_cast<CallBase>(&I))
933 if (Function *Callee = CB->getCalledFunction())
934 if (Seen.insert(Callee).second)
935 Worklist.push_back(Callee);
936 }
937
938 Seen.clear();
939 Worklist.append(SCC.begin(), SCC.end());
940 while (!Worklist.empty()) {
941 Function *F = Worklist.pop_back_val();
942 ModuleSlice.insert(F);
943
944 // Traverse all transitive uses.
945 foreachUse(*F, [&](Use &U) {
946 if (auto *UsrI = dyn_cast<Instruction>(U.getUser()))
947 if (Seen.insert(UsrI->getFunction()).second)
948 Worklist.push_back(UsrI->getFunction());
949 });
950 }
951 }
952
953 /// The slice of the module we are allowed to look at.
954 SmallPtrSet<Function *, 8> ModuleSlice;
955
956 /// A vector type to hold instructions.
957 using InstructionVectorTy = SmallVector<Instruction *, 8>;
958
959 /// A map type from opcodes to instructions with this opcode.
960 using OpcodeInstMapTy = DenseMap<unsigned, InstructionVectorTy *>;
961
962 /// Return the map that relates "interesting" opcodes with all instructions
963 /// with that opcode in \p F.
964 OpcodeInstMapTy &getOpcodeInstMapForFunction(const Function &F) {
965 return getFunctionInfo(F).OpcodeInstMap;
966 }
967
968 /// Return the instructions in \p F that may read or write memory.
969 InstructionVectorTy &getReadOrWriteInstsForFunction(const Function &F) {
970 return getFunctionInfo(F).RWInsts;
971 }
972
973 /// Return MustBeExecutedContextExplorer
974 MustBeExecutedContextExplorer &getMustBeExecutedContextExplorer() {
975 return Explorer;
976 }
977
978 /// Return TargetLibraryInfo for function \p F.
979 TargetLibraryInfo *getTargetLibraryInfoForFunction(const Function &F) {
980 return AG.getAnalysis<TargetLibraryAnalysis>(F);
981 }
982
983 /// Return AliasAnalysis Result for function \p F.
984 AAResults *getAAResultsForFunction(const Function &F);
985
986 /// Return true if \p Arg is involved in a must-tail call, thus the argument
987 /// of the caller or callee.
988 bool isInvolvedInMustTailCall(const Argument &Arg) {
989 FunctionInfo &FI = getFunctionInfo(*Arg.getParent());
990 return FI.CalledViaMustTail || FI.ContainsMustTailCall;
991 }
992
993 /// Return the analysis result from a pass \p AP for function \p F.
994 template <typename AP>
995 typename AP::Result *getAnalysisResultForFunction(const Function &F) {
996 return AG.getAnalysis<AP>(F);
997 }
998
999 /// Return SCC size on call graph for function \p F or 0 if unknown.
1000 unsigned getSccSize(const Function &F) {
1001 if (CGSCC && CGSCC->count(const_cast<Function *>(&F)))
1002 return CGSCC->size();
1003 return 0;
1004 }
1005
1006 /// Return datalayout used in the module.
1007 const DataLayout &getDL() { return DL; }
1008
1009 /// Return the map conaining all the knowledge we have from `llvm.assume`s.
1010 const RetainedKnowledgeMap &getKnowledgeMap() const { return KnowledgeMap; }
1011
1012 /// Return if \p To is potentially reachable form \p From or not
1013 /// If the same query was answered, return cached result
1014 bool getPotentiallyReachable(const Instruction &From, const Instruction &To) {
1015 auto KeyPair = std::make_pair(&From, &To);
1016 auto Iter = PotentiallyReachableMap.find(KeyPair);
1017 if (Iter != PotentiallyReachableMap.end())
1018 return Iter->second;
1019 const Function &F = *From.getFunction();
1020 bool Result = true;
1021 if (From.getFunction() == To.getFunction())
1022 Result = isPotentiallyReachable(&From, &To, nullptr,
1023 AG.getAnalysis<DominatorTreeAnalysis>(F),
1024 AG.getAnalysis<LoopAnalysis>(F));
1025 PotentiallyReachableMap.insert(std::make_pair(KeyPair, Result));
1026 return Result;
1027 }
1028
1029 /// Check whether \p F is part of module slice.
1030 bool isInModuleSlice(const Function &F) {
1031 return ModuleSlice.count(const_cast<Function *>(&F));
1032 }
1033
1034 /// Return true if the stack (llvm::Alloca) can be accessed by other threads.
1035 bool stackIsAccessibleByOtherThreads() { return !targetIsGPU(); }
1036
1037 /// Return true if the target is a GPU.
1038 bool targetIsGPU() {
1039 return TargetTriple.isAMDGPU() || TargetTriple.isNVPTX();
1040 }
1041
1042private:
1043 struct FunctionInfo {
1044 ~FunctionInfo();
1045
1046 /// A nested map that remembers all instructions in a function with a
1047 /// certain instruction opcode (Instruction::getOpcode()).
1048 OpcodeInstMapTy OpcodeInstMap;
1049
1050 /// A map from functions to their instructions that may read or write
1051 /// memory.
1052 InstructionVectorTy RWInsts;
1053
1054 /// Function is called by a `musttail` call.
1055 bool CalledViaMustTail;
1056
1057 /// Function contains a `musttail` call.
1058 bool ContainsMustTailCall;
1059 };
1060
1061 /// A map type from functions to informatio about it.
1062 DenseMap<const Function *, FunctionInfo *> FuncInfoMap;
1063
1064 /// Return information about the function \p F, potentially by creating it.
1065 FunctionInfo &getFunctionInfo(const Function &F) {
1066 FunctionInfo *&FI = FuncInfoMap[&F];
1067 if (!FI) {
1068 FI = new (Allocator) FunctionInfo();
1069 initializeInformationCache(F, *FI);
1070 }
1071 return *FI;
1072 }
1073
1074 /// Initialize the function information cache \p FI for the function \p F.
1075 ///
1076 /// This method needs to be called for all function that might be looked at
1077 /// through the information cache interface *prior* to looking at them.
1078 void initializeInformationCache(const Function &F, FunctionInfo &FI);
1079
1080 /// The datalayout used in the module.
1081 const DataLayout &DL;
1082
1083 /// The allocator used to allocate memory, e.g. for `FunctionInfo`s.
1084 BumpPtrAllocator &Allocator;
1085
1086 /// MustBeExecutedContextExplorer
1087 MustBeExecutedContextExplorer Explorer;
1088
1089 /// A map with knowledge retained in `llvm.assume` instructions.
1090 RetainedKnowledgeMap KnowledgeMap;
1091
1092 /// Getters for analysis.
1093 AnalysisGetter &AG;
1094
1095 /// The underlying CGSCC, or null if not available.
1096 SetVector<Function *> *CGSCC;
1097
1098 /// Set of inlineable functions
1099 SmallPtrSet<const Function *, 8> InlineableFunctions;
1100
1101 /// A map for caching results of queries for isPotentiallyReachable
1102 DenseMap<std::pair<const Instruction *, const Instruction *>, bool>
1103 PotentiallyReachableMap;
1104
1105 /// The triple describing the target machine.
1106 Triple TargetTriple;
1107
1108 /// Give the Attributor access to the members so
1109 /// Attributor::identifyDefaultAbstractAttributes(...) can initialize them.
1110 friend struct Attributor;
1111};
1112
1113/// The fixpoint analysis framework that orchestrates the attribute deduction.
1114///
1115/// The Attributor provides a general abstract analysis framework (guided
1116/// fixpoint iteration) as well as helper functions for the deduction of
1117/// (LLVM-IR) attributes. However, also other code properties can be deduced,
1118/// propagated, and ultimately manifested through the Attributor framework. This
1119/// is particularly useful if these properties interact with attributes and a
1120/// co-scheduled deduction allows to improve the solution. Even if not, thus if
1121/// attributes/properties are completely isolated, they should use the
1122/// Attributor framework to reduce the number of fixpoint iteration frameworks
1123/// in the code base. Note that the Attributor design makes sure that isolated
1124/// attributes are not impacted, in any way, by others derived at the same time
1125/// if there is no cross-reasoning performed.
1126///
1127/// The public facing interface of the Attributor is kept simple and basically
1128/// allows abstract attributes to one thing, query abstract attributes
1129/// in-flight. There are two reasons to do this:
1130/// a) The optimistic state of one abstract attribute can justify an
1131/// optimistic state of another, allowing to framework to end up with an
1132/// optimistic (=best possible) fixpoint instead of one based solely on
1133/// information in the IR.
1134/// b) This avoids reimplementing various kinds of lookups, e.g., to check
1135/// for existing IR attributes, in favor of a single lookups interface
1136/// provided by an abstract attribute subclass.
1137///
1138/// NOTE: The mechanics of adding a new "concrete" abstract attribute are
1139/// described in the file comment.
1140struct Attributor {
1141
1142 using OptimizationRemarkGetter =
1143 function_ref<OptimizationRemarkEmitter &(Function *)>;
1144
1145 /// Constructor
1146 ///
1147 /// \param Functions The set of functions we are deriving attributes for.
1148 /// \param InfoCache Cache to hold various information accessible for
1149 /// the abstract attributes.
1150 /// \param CGUpdater Helper to update an underlying call graph.
1151 /// \param Allowed If not null, a set limiting the attribute opportunities.
1152 /// \param DeleteFns Whether to delete functions.
1153 /// \param RewriteSignatures Whether to rewrite function signatures.
1154 Attributor(SetVector<Function *> &Functions, InformationCache &InfoCache,
1155 CallGraphUpdater &CGUpdater,
1156 DenseSet<const char *> *Allowed = nullptr, bool DeleteFns = true,
1157 bool RewriteSignatures = true)
1158 : Allocator(InfoCache.Allocator), Functions(Functions),
1159 InfoCache(InfoCache), CGUpdater(CGUpdater), Allowed(Allowed),
1160 DeleteFns(DeleteFns), RewriteSignatures(RewriteSignatures),
1161 MaxFixpointIterations(None), OREGetter(None), PassName("") {}
1162
1163 /// Constructor
1164 ///
1165 /// \param Functions The set of functions we are deriving attributes for.
1166 /// \param InfoCache Cache to hold various information accessible for
1167 /// the abstract attributes.
1168 /// \param CGUpdater Helper to update an underlying call graph.
1169 /// \param Allowed If not null, a set limiting the attribute opportunities.
1170 /// \param DeleteFns Whether to delete functions
1171 /// \param RewriteSignatures Whether to rewrite function signatures.
1172 /// \param MaxFixpointIterations Maximum number of iterations to run until
1173 /// fixpoint.
1174 /// \param OREGetter A callback function that returns an ORE object from a
1175 /// Function pointer.
1176 /// \param PassName The name of the pass emitting remarks.
1177 Attributor(SetVector<Function *> &Functions, InformationCache &InfoCache,
1178 CallGraphUpdater &CGUpdater, DenseSet<const char *> *Allowed,
1179 bool DeleteFns, bool RewriteSignatures,
1180 Optional<unsigned> MaxFixpointIterations,
1181 OptimizationRemarkGetter OREGetter, const char *PassName)
1182 : Allocator(InfoCache.Allocator), Functions(Functions),
1183 InfoCache(InfoCache), CGUpdater(CGUpdater), Allowed(Allowed),
1184 DeleteFns(DeleteFns), RewriteSignatures(RewriteSignatures),
1185 MaxFixpointIterations(MaxFixpointIterations),
1186 OREGetter(Optional<OptimizationRemarkGetter>(OREGetter)),
1187 PassName(PassName) {}
1188
1189 ~Attributor();
1190
1191 /// Run the analyses until a fixpoint is reached or enforced (timeout).
1192 ///
1193 /// The attributes registered with this Attributor can be used after as long
1194 /// as the Attributor is not destroyed (it owns the attributes now).
1195 ///
1196 /// \Returns CHANGED if the IR was changed, otherwise UNCHANGED.
1197 ChangeStatus run();
1198
1199 /// Lookup an abstract attribute of type \p AAType at position \p IRP. While
1200 /// no abstract attribute is found equivalent positions are checked, see
1201 /// SubsumingPositionIterator. Thus, the returned abstract attribute
1202 /// might be anchored at a different position, e.g., the callee if \p IRP is a
1203 /// call base.
1204 ///
1205 /// This method is the only (supported) way an abstract attribute can retrieve
1206 /// information from another abstract attribute. As an example, take an
1207 /// abstract attribute that determines the memory access behavior for a
1208 /// argument (readnone, readonly, ...). It should use `getAAFor` to get the
1209 /// most optimistic information for other abstract attributes in-flight, e.g.
1210 /// the one reasoning about the "captured" state for the argument or the one
1211 /// reasoning on the memory access behavior of the function as a whole.
1212 ///
1213 /// If the DepClass enum is set to `DepClassTy::None` the dependence from
1214 /// \p QueryingAA to the return abstract attribute is not automatically
1215 /// recorded. This should only be used if the caller will record the
1216 /// dependence explicitly if necessary, thus if it the returned abstract
1217 /// attribute is used for reasoning. To record the dependences explicitly use
1218 /// the `Attributor::recordDependence` method.
1219 template <typename AAType>
1220 const AAType &getAAFor(const AbstractAttribute &QueryingAA,
1221 const IRPosition &IRP, DepClassTy DepClass) {
1222 return getOrCreateAAFor<AAType>(IRP, &QueryingAA, DepClass,
1223 /* ForceUpdate */ false);
1224 }
1225
1226 /// Similar to getAAFor but the return abstract attribute will be updated (via
1227 /// `AbstractAttribute::update`) even if it is found in the cache. This is
1228 /// especially useful for AAIsDead as changes in liveness can make updates
1229 /// possible/useful that were not happening before as the abstract attribute
1230 /// was assumed dead.
1231 template <typename AAType>
1232 const AAType &getAndUpdateAAFor(const AbstractAttribute &QueryingAA,
1233 const IRPosition &IRP, DepClassTy DepClass) {
1234 return getOrCreateAAFor<AAType>(IRP, &QueryingAA, DepClass,
1235 /* ForceUpdate */ true);
1236 }
1237
1238 /// The version of getAAFor that allows to omit a querying abstract
1239 /// attribute. Using this after Attributor started running is restricted to
1240 /// only the Attributor itself. Initial seeding of AAs can be done via this
1241 /// function.
1242 /// NOTE: ForceUpdate is ignored in any stage other than the update stage.
1243 template <typename AAType>
1244 const AAType &getOrCreateAAFor(IRPosition IRP,
1245 const AbstractAttribute *QueryingAA,
1246 DepClassTy DepClass, bool ForceUpdate = false,
1247 bool UpdateAfterInit = true) {
1248 if (!shouldPropagateCallBaseContext(IRP))
1249 IRP = IRP.stripCallBaseContext();
1250
1251 if (AAType *AAPtr = lookupAAFor<AAType>(IRP, QueryingAA, DepClass,
1252 /* AllowInvalidState */ true)) {
1253 if (ForceUpdate && Phase == AttributorPhase::UPDATE)
1254 updateAA(*AAPtr);
1255 return *AAPtr;
1256 }
1257
1258 // No matching attribute found, create one.
1259 // Use the static create method.
1260 auto &AA = AAType::createForPosition(IRP, *this);
1261
1262 // If we are currenty seeding attributes, enforce seeding rules.
1263 if (Phase == AttributorPhase::SEEDING && !shouldSeedAttribute(AA)) {
1264 AA.getState().indicatePessimisticFixpoint();
1265 return AA;
1266 }
1267
1268 registerAA(AA);
1269
1270 // For now we ignore naked and optnone functions.
1271 bool Invalidate = Allowed && !Allowed->count(&AAType::ID);
1272 const Function *FnScope = IRP.getAnchorScope();
1273 if (FnScope)
1274 Invalidate |= FnScope->hasFnAttribute(Attribute::Naked) ||
1275 FnScope->hasFnAttribute(Attribute::OptimizeNone);
1276
1277 // Avoid too many nested initializations to prevent a stack overflow.
1278 Invalidate |= InitializationChainLength > MaxInitializationChainLength;
1279
1280 // Bootstrap the new attribute with an initial update to propagate
1281 // information, e.g., function -> call site. If it is not on a given
1282 // Allowed we will not perform updates at all.
1283 if (Invalidate) {
1284 AA.getState().indicatePessimisticFixpoint();
1285 return AA;
1286 }
1287
1288 {
1289 TimeTraceScope TimeScope(AA.getName() + "::initialize");
1290 ++InitializationChainLength;
1291 AA.initialize(*this);
1292 --InitializationChainLength;
1293 }
1294
1295 // Initialize and update is allowed for code outside of the current function
1296 // set, but only if it is part of module slice we are allowed to look at.
1297 // Only exception is AAIsDeadFunction whose initialization is prevented
1298 // directly, since we don't to compute it twice.
1299 if (FnScope && !Functions.count(const_cast<Function *>(FnScope))) {
1300 if (!getInfoCache().isInModuleSlice(*FnScope)) {
1301 AA.getState().indicatePessimisticFixpoint();
1302 return AA;
1303 }
1304 }
1305
1306 // If this is queried in the manifest stage, we force the AA to indicate
1307 // pessimistic fixpoint immediately.
1308 if (Phase == AttributorPhase::MANIFEST) {
1309 AA.getState().indicatePessimisticFixpoint();
1310 return AA;
1311 }
1312
1313 // Allow seeded attributes to declare dependencies.
1314 // Remember the seeding state.
1315 if (UpdateAfterInit) {
1316 AttributorPhase OldPhase = Phase;
1317 Phase = AttributorPhase::UPDATE;
1318
1319 updateAA(AA);
1320
1321 Phase = OldPhase;
1322 }
1323
1324 if (QueryingAA && AA.getState().isValidState())
1325 recordDependence(AA, const_cast<AbstractAttribute &>(*QueryingAA),
1326 DepClass);
1327 return AA;
1328 }
1329 template <typename AAType>
1330 const AAType &getOrCreateAAFor(const IRPosition &IRP) {
1331 return getOrCreateAAFor<AAType>(IRP, /* QueryingAA */ nullptr,
1332 DepClassTy::NONE);
1333 }
1334
1335 /// Return the attribute of \p AAType for \p IRP if existing and valid. This
1336 /// also allows non-AA users lookup.
1337 template <typename AAType>
1338 AAType *lookupAAFor(const IRPosition &IRP,
1339 const AbstractAttribute *QueryingAA = nullptr,
1340 DepClassTy DepClass = DepClassTy::OPTIONAL,
1341 bool AllowInvalidState = false) {
1342 static_assert(std::is_base_of<AbstractAttribute, AAType>::value,
1343 "Cannot query an attribute with a type not derived from "
1344 "'AbstractAttribute'!");
1345 // Lookup the abstract attribute of type AAType. If found, return it after
1346 // registering a dependence of QueryingAA on the one returned attribute.
1347 AbstractAttribute *AAPtr = AAMap.lookup({&AAType::ID, IRP});
1348 if (!AAPtr)
1349 return nullptr;
1350
1351 AAType *AA = static_cast<AAType *>(AAPtr);
1352
1353 // Do not register a dependence on an attribute with an invalid state.
1354 if (DepClass != DepClassTy::NONE && QueryingAA &&
1355 AA->getState().isValidState())
1356 recordDependence(*AA, const_cast<AbstractAttribute &>(*QueryingAA),
1357 DepClass);
1358
1359 // Return nullptr if this attribute has an invalid state.
1360 if (!AllowInvalidState && !AA->getState().isValidState())
1361 return nullptr;
1362 return AA;
1363 }
1364
1365 /// Explicitly record a dependence from \p FromAA to \p ToAA, that is if
1366 /// \p FromAA changes \p ToAA should be updated as well.
1367 ///
1368 /// This method should be used in conjunction with the `getAAFor` method and
1369 /// with the DepClass enum passed to the method set to None. This can
1370 /// be beneficial to avoid false dependences but it requires the users of
1371 /// `getAAFor` to explicitly record true dependences through this method.
1372 /// The \p DepClass flag indicates if the dependence is striclty necessary.
1373 /// That means for required dependences, if \p FromAA changes to an invalid
1374 /// state, \p ToAA can be moved to a pessimistic fixpoint because it required
1375 /// information from \p FromAA but none are available anymore.
1376 void recordDependence(const AbstractAttribute &FromAA,
1377 const AbstractAttribute &ToAA, DepClassTy DepClass);
1378
1379 /// Introduce a new abstract attribute into the fixpoint analysis.
1380 ///
1381 /// Note that ownership of the attribute is given to the Attributor. It will
1382 /// invoke delete for the Attributor on destruction of the Attributor.
1383 ///
1384 /// Attributes are identified by their IR position (AAType::getIRPosition())
1385 /// and the address of their static member (see AAType::ID).
1386 template <typename AAType> AAType &registerAA(AAType &AA) {
1387 static_assert(std::is_base_of<AbstractAttribute, AAType>::value,
1388 "Cannot register an attribute with a type not derived from "
1389 "'AbstractAttribute'!");
1390 // Put the attribute in the lookup map structure and the container we use to
1391 // keep track of all attributes.
1392 const IRPosition &IRP = AA.getIRPosition();
1393 AbstractAttribute *&AAPtr = AAMap[{&AAType::ID, IRP}];
1394
1395 assert(!AAPtr && "Attribute already in map!")(static_cast <bool> (!AAPtr && "Attribute already in map!"
) ? void (0) : __assert_fail ("!AAPtr && \"Attribute already in map!\""
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/include/llvm/Transforms/IPO/Attributor.h"
, 1395, __extension__ __PRETTY_FUNCTION__))
;
1396 AAPtr = &AA;
1397
1398 // Register AA with the synthetic root only before the manifest stage.
1399 if (Phase == AttributorPhase::SEEDING || Phase == AttributorPhase::UPDATE)
1400 DG.SyntheticRoot.Deps.push_back(
1401 AADepGraphNode::DepTy(&AA, unsigned(DepClassTy::REQUIRED)));
1402
1403 return AA;
1404 }
1405
1406 /// Return the internal information cache.
1407 InformationCache &getInfoCache() { return InfoCache; }
1408
1409 /// Return true if this is a module pass, false otherwise.
1410 bool isModulePass() const {
1411 return !Functions.empty() &&
1412 Functions.size() == Functions.front()->getParent()->size();
1413 }
1414
1415 /// Return true if we derive attributes for \p Fn
1416 bool isRunOn(Function &Fn) const {
1417 return Functions.empty() || Functions.count(&Fn);
1418 }
1419
1420 /// Determine opportunities to derive 'default' attributes in \p F and create
1421 /// abstract attribute objects for them.
1422 ///
1423 /// \param F The function that is checked for attribute opportunities.
1424 ///
1425 /// Note that abstract attribute instances are generally created even if the
1426 /// IR already contains the information they would deduce. The most important
1427 /// reason for this is the single interface, the one of the abstract attribute
1428 /// instance, which can be queried without the need to look at the IR in
1429 /// various places.
1430 void identifyDefaultAbstractAttributes(Function &F);
1431
1432 /// Determine whether the function \p F is IPO amendable
1433 ///
1434 /// If a function is exactly defined or it has alwaysinline attribute
1435 /// and is viable to be inlined, we say it is IPO amendable
1436 bool isFunctionIPOAmendable(const Function &F) {
1437 return F.hasExactDefinition() || InfoCache.InlineableFunctions.count(&F);
1438 }
1439
1440 /// Mark the internal function \p F as live.
1441 ///
1442 /// This will trigger the identification and initialization of attributes for
1443 /// \p F.
1444 void markLiveInternalFunction(const Function &F) {
1445 assert(F.hasLocalLinkage() &&(static_cast <bool> (F.hasLocalLinkage() && "Only local linkage is assumed dead initially."
) ? void (0) : __assert_fail ("F.hasLocalLinkage() && \"Only local linkage is assumed dead initially.\""
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/include/llvm/Transforms/IPO/Attributor.h"
, 1446, __extension__ __PRETTY_FUNCTION__))
1446 "Only local linkage is assumed dead initially.")(static_cast <bool> (F.hasLocalLinkage() && "Only local linkage is assumed dead initially."
) ? void (0) : __assert_fail ("F.hasLocalLinkage() && \"Only local linkage is assumed dead initially.\""
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/include/llvm/Transforms/IPO/Attributor.h"
, 1446, __extension__ __PRETTY_FUNCTION__))
;
1447
1448 identifyDefaultAbstractAttributes(const_cast<Function &>(F));
1449 }
1450
1451 /// Helper function to remove callsite.
1452 void removeCallSite(CallInst *CI) {
1453 if (!CI)
1454 return;
1455
1456 CGUpdater.removeCallSite(*CI);
1457 }
1458
1459 /// Record that \p U is to be replaces with \p NV after information was
1460 /// manifested. This also triggers deletion of trivially dead istructions.
1461 bool changeUseAfterManifest(Use &U, Value &NV) {
1462 Value *&V = ToBeChangedUses[&U];
1463 if (V && (V->stripPointerCasts() == NV.stripPointerCasts() ||
1464 isa_and_nonnull<UndefValue>(V)))
1465 return false;
1466 assert((!V || V == &NV || isa<UndefValue>(NV)) &&(static_cast <bool> ((!V || V == &NV || isa<UndefValue
>(NV)) && "Use was registered twice for replacement with different values!"
) ? void (0) : __assert_fail ("(!V || V == &NV || isa<UndefValue>(NV)) && \"Use was registered twice for replacement with different values!\""
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/include/llvm/Transforms/IPO/Attributor.h"
, 1467, __extension__ __PRETTY_FUNCTION__))
1467 "Use was registered twice for replacement with different values!")(static_cast <bool> ((!V || V == &NV || isa<UndefValue
>(NV)) && "Use was registered twice for replacement with different values!"
) ? void (0) : __assert_fail ("(!V || V == &NV || isa<UndefValue>(NV)) && \"Use was registered twice for replacement with different values!\""
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/include/llvm/Transforms/IPO/Attributor.h"
, 1467, __extension__ __PRETTY_FUNCTION__))
;
1468 V = &NV;
1469 return true;
1470 }
1471
1472 /// Helper function to replace all uses of \p V with \p NV. Return true if
1473 /// there is any change. The flag \p ChangeDroppable indicates if dropppable
1474 /// uses should be changed too.
1475 bool changeValueAfterManifest(Value &V, Value &NV,
1476 bool ChangeDroppable = true) {
1477 auto &Entry = ToBeChangedValues[&V];
1478 Value *&CurNV = Entry.first;
1479 if (CurNV && (CurNV->stripPointerCasts() == NV.stripPointerCasts() ||
1480 isa<UndefValue>(CurNV)))
1481 return false;
1482 assert((!CurNV || CurNV == &NV || isa<UndefValue>(NV)) &&(static_cast <bool> ((!CurNV || CurNV == &NV || isa
<UndefValue>(NV)) && "Value replacement was registered twice with different values!"
) ? void (0) : __assert_fail ("(!CurNV || CurNV == &NV || isa<UndefValue>(NV)) && \"Value replacement was registered twice with different values!\""
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/include/llvm/Transforms/IPO/Attributor.h"
, 1483, __extension__ __PRETTY_FUNCTION__))
1483 "Value replacement was registered twice with different values!")(static_cast <bool> ((!CurNV || CurNV == &NV || isa
<UndefValue>(NV)) && "Value replacement was registered twice with different values!"
) ? void (0) : __assert_fail ("(!CurNV || CurNV == &NV || isa<UndefValue>(NV)) && \"Value replacement was registered twice with different values!\""
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/include/llvm/Transforms/IPO/Attributor.h"
, 1483, __extension__ __PRETTY_FUNCTION__))
;
1484 CurNV = &NV;
1485 Entry.second = ChangeDroppable;
1486 return true;
1487 }
1488
1489 /// Record that \p I is to be replaced with `unreachable` after information
1490 /// was manifested.
1491 void changeToUnreachableAfterManifest(Instruction *I) {
1492 ToBeChangedToUnreachableInsts.insert(I);
1493 }
1494
1495 /// Record that \p II has at least one dead successor block. This information
1496 /// is used, e.g., to replace \p II with a call, after information was
1497 /// manifested.
1498 void registerInvokeWithDeadSuccessor(InvokeInst &II) {
1499 InvokeWithDeadSuccessor.push_back(&II);
1500 }
1501
1502 /// Record that \p I is deleted after information was manifested. This also
1503 /// triggers deletion of trivially dead istructions.
1504 void deleteAfterManifest(Instruction &I) { ToBeDeletedInsts.insert(&I); }
1505
1506 /// Record that \p BB is deleted after information was manifested. This also
1507 /// triggers deletion of trivially dead istructions.
1508 void deleteAfterManifest(BasicBlock &BB) { ToBeDeletedBlocks.insert(&BB); }
1509
1510 // Record that \p BB is added during the manifest of an AA. Added basic blocks
1511 // are preserved in the IR.
1512 void registerManifestAddedBasicBlock(BasicBlock &BB) {
1513 ManifestAddedBlocks.insert(&BB);
1514 }
1515
1516 /// Record that \p F is deleted after information was manifested.
1517 void deleteAfterManifest(Function &F) {
1518 if (DeleteFns)
1519 ToBeDeletedFunctions.insert(&F);
1520 }
1521
1522 /// If \p IRP is assumed to be a constant, return it, if it is unclear yet,
1523 /// return None, otherwise return `nullptr`.
1524 Optional<Constant *> getAssumedConstant(const IRPosition &IRP,
1525 const AbstractAttribute &AA,
1526 bool &UsedAssumedInformation);
1527 Optional<Constant *> getAssumedConstant(const Value &V,
1528 const AbstractAttribute &AA,
1529 bool &UsedAssumedInformation) {
1530 return getAssumedConstant(IRPosition::value(V), AA, UsedAssumedInformation);
1531 }
1532
1533 /// If \p V is assumed simplified, return it, if it is unclear yet,
1534 /// return None, otherwise return `nullptr`.
1535 Optional<Value *> getAssumedSimplified(const IRPosition &IRP,
1536 const AbstractAttribute &AA,
1537 bool &UsedAssumedInformation) {
1538 return getAssumedSimplified(IRP, &AA, UsedAssumedInformation);
1539 }
1540 Optional<Value *> getAssumedSimplified(const Value &V,
1541 const AbstractAttribute &AA,
1542 bool &UsedAssumedInformation) {
1543 return getAssumedSimplified(IRPosition::value(V), AA,
1544 UsedAssumedInformation);
1545 }
1546
1547 /// If \p V is assumed simplified, return it, if it is unclear yet,
1548 /// return None, otherwise return `nullptr`. Same as the public version
1549 /// except that it can be used without recording dependences on any \p AA.
1550 Optional<Value *> getAssumedSimplified(const IRPosition &V,
1551 const AbstractAttribute *AA,
1552 bool &UsedAssumedInformation);
1553
1554 /// Register \p CB as a simplification callback.
1555 /// `Attributor::getAssumedSimplified` will use these callbacks before
1556 /// we it will ask `AAValueSimplify`. It is important to ensure this
1557 /// is called before `identifyDefaultAbstractAttributes`, assuming the
1558 /// latter is called at all.
1559 using SimplifictionCallbackTy = std::function<Optional<Value *>(
1560 const IRPosition &, const AbstractAttribute *, bool &)>;
1561 void registerSimplificationCallback(const IRPosition &IRP,
1562 const SimplifictionCallbackTy &CB) {
1563 SimplificationCallbacks[IRP].emplace_back(CB);
1564 }
1565
1566 /// Return true if there is a simplification callback for \p IRP.
1567 bool hasSimplificationCallback(const IRPosition &IRP) {
1568 return SimplificationCallbacks.count(IRP);
1569 }
1570
1571private:
1572 /// The vector with all simplification callbacks registered by outside AAs.
1573 DenseMap<IRPosition, SmallVector<SimplifictionCallbackTy, 1>>
1574 SimplificationCallbacks;
1575
1576public:
1577 /// Translate \p V from the callee context into the call site context.
1578 Optional<Value *>
1579 translateArgumentToCallSiteContent(Optional<Value *> V, CallBase &CB,
1580 const AbstractAttribute &AA,
1581 bool &UsedAssumedInformation);
1582
1583 /// Return true if \p AA (or its context instruction) is assumed dead.
1584 ///
1585 /// If \p LivenessAA is not provided it is queried.
1586 bool isAssumedDead(const AbstractAttribute &AA, const AAIsDead *LivenessAA,
1587 bool &UsedAssumedInformation,
1588 bool CheckBBLivenessOnly = false,
1589 DepClassTy DepClass = DepClassTy::OPTIONAL);
1590
1591 /// Return true if \p I is assumed dead.
1592 ///
1593 /// If \p LivenessAA is not provided it is queried.
1594 bool isAssumedDead(const Instruction &I, const AbstractAttribute *QueryingAA,
1595 const AAIsDead *LivenessAA, bool &UsedAssumedInformation,
1596 bool CheckBBLivenessOnly = false,
1597 DepClassTy DepClass = DepClassTy::OPTIONAL);
1598
1599 /// Return true if \p U is assumed dead.
1600 ///
1601 /// If \p FnLivenessAA is not provided it is queried.
1602 bool isAssumedDead(const Use &U, const AbstractAttribute *QueryingAA,
1603 const AAIsDead *FnLivenessAA, bool &UsedAssumedInformation,
1604 bool CheckBBLivenessOnly = false,
1605 DepClassTy DepClass = DepClassTy::OPTIONAL);
1606
1607 /// Return true if \p IRP is assumed dead.
1608 ///
1609 /// If \p FnLivenessAA is not provided it is queried.
1610 bool isAssumedDead(const IRPosition &IRP, const AbstractAttribute *QueryingAA,
1611 const AAIsDead *FnLivenessAA, bool &UsedAssumedInformation,
1612 bool CheckBBLivenessOnly = false,
1613 DepClassTy DepClass = DepClassTy::OPTIONAL);
1614
1615 /// Return true if \p BB is assumed dead.
1616 ///
1617 /// If \p LivenessAA is not provided it is queried.
1618 bool isAssumedDead(const BasicBlock &BB, const AbstractAttribute *QueryingAA,
1619 const AAIsDead *FnLivenessAA,
1620 DepClassTy DepClass = DepClassTy::OPTIONAL);
1621
1622 /// Check \p Pred on all (transitive) uses of \p V.
1623 ///
1624 /// This method will evaluate \p Pred on all (transitive) uses of the
1625 /// associated value and return true if \p Pred holds every time.
1626 bool checkForAllUses(function_ref<bool(const Use &, bool &)> Pred,
1627 const AbstractAttribute &QueryingAA, const Value &V,
1628 bool CheckBBLivenessOnly = false,
1629 DepClassTy LivenessDepClass = DepClassTy::OPTIONAL);
1630
1631 /// Emit a remark generically.
1632 ///
1633 /// This template function can be used to generically emit a remark. The
1634 /// RemarkKind should be one of the following:
1635 /// - OptimizationRemark to indicate a successful optimization attempt
1636 /// - OptimizationRemarkMissed to report a failed optimization attempt
1637 /// - OptimizationRemarkAnalysis to provide additional information about an
1638 /// optimization attempt
1639 ///
1640 /// The remark is built using a callback function \p RemarkCB that takes a
1641 /// RemarkKind as input and returns a RemarkKind.
1642 template <typename RemarkKind, typename RemarkCallBack>
1643 void emitRemark(Instruction *I, StringRef RemarkName,
1644 RemarkCallBack &&RemarkCB) const {
1645 if (!OREGetter)
1646 return;
1647
1648 Function *F = I->getFunction();
1649 auto &ORE = OREGetter.getValue()(F);
1650
1651 if (RemarkName.startswith("OMP"))
1652 ORE.emit([&]() {
1653 return RemarkCB(RemarkKind(PassName, RemarkName, I))
1654 << " [" << RemarkName << "]";
1655 });
1656 else
1657 ORE.emit([&]() { return RemarkCB(RemarkKind(PassName, RemarkName, I)); });
1658 }
1659
1660 /// Emit a remark on a function.
1661 template <typename RemarkKind, typename RemarkCallBack>
1662 void emitRemark(Function *F, StringRef RemarkName,
1663 RemarkCallBack &&RemarkCB) const {
1664 if (!OREGetter)
1665 return;
1666
1667 auto &ORE = OREGetter.getValue()(F);
1668
1669 if (RemarkName.startswith("OMP"))
1670 ORE.emit([&]() {
1671 return RemarkCB(RemarkKind(PassName, RemarkName, F))
1672 << " [" << RemarkName << "]";
1673 });
1674 else
1675 ORE.emit([&]() { return RemarkCB(RemarkKind(PassName, RemarkName, F)); });
1676 }
1677
1678 /// Helper struct used in the communication between an abstract attribute (AA)
1679 /// that wants to change the signature of a function and the Attributor which
1680 /// applies the changes. The struct is partially initialized with the
1681 /// information from the AA (see the constructor). All other members are
1682 /// provided by the Attributor prior to invoking any callbacks.
1683 struct ArgumentReplacementInfo {
1684 /// Callee repair callback type
1685 ///
1686 /// The function repair callback is invoked once to rewire the replacement
1687 /// arguments in the body of the new function. The argument replacement info
1688 /// is passed, as build from the registerFunctionSignatureRewrite call, as
1689 /// well as the replacement function and an iteratore to the first
1690 /// replacement argument.
1691 using CalleeRepairCBTy = std::function<void(
1692 const ArgumentReplacementInfo &, Function &, Function::arg_iterator)>;
1693
1694 /// Abstract call site (ACS) repair callback type
1695 ///
1696 /// The abstract call site repair callback is invoked once on every abstract
1697 /// call site of the replaced function (\see ReplacedFn). The callback needs
1698 /// to provide the operands for the call to the new replacement function.
1699 /// The number and type of the operands appended to the provided vector
1700 /// (second argument) is defined by the number and types determined through
1701 /// the replacement type vector (\see ReplacementTypes). The first argument
1702 /// is the ArgumentReplacementInfo object registered with the Attributor
1703 /// through the registerFunctionSignatureRewrite call.
1704 using ACSRepairCBTy =
1705 std::function<void(const ArgumentReplacementInfo &, AbstractCallSite,
1706 SmallVectorImpl<Value *> &)>;
1707
1708 /// Simple getters, see the corresponding members for details.
1709 ///{
1710
1711 Attributor &getAttributor() const { return A; }
1712 const Function &getReplacedFn() const { return ReplacedFn; }
1713 const Argument &getReplacedArg() const { return ReplacedArg; }
1714 unsigned getNumReplacementArgs() const { return ReplacementTypes.size(); }
1715 const SmallVectorImpl<Type *> &getReplacementTypes() const {
1716 return ReplacementTypes;
1717 }
1718
1719 ///}
1720
1721 private:
1722 /// Constructor that takes the argument to be replaced, the types of
1723 /// the replacement arguments, as well as callbacks to repair the call sites
1724 /// and new function after the replacement happened.
1725 ArgumentReplacementInfo(Attributor &A, Argument &Arg,
1726 ArrayRef<Type *> ReplacementTypes,
1727 CalleeRepairCBTy &&CalleeRepairCB,
1728 ACSRepairCBTy &&ACSRepairCB)
1729 : A(A), ReplacedFn(*Arg.getParent()), ReplacedArg(Arg),
1730 ReplacementTypes(ReplacementTypes.begin(), ReplacementTypes.end()),
1731 CalleeRepairCB(std::move(CalleeRepairCB)),
1732 ACSRepairCB(std::move(ACSRepairCB)) {}
1733
1734 /// Reference to the attributor to allow access from the callbacks.
1735 Attributor &A;
1736
1737 /// The "old" function replaced by ReplacementFn.
1738 const Function &ReplacedFn;
1739
1740 /// The "old" argument replaced by new ones defined via ReplacementTypes.
1741 const Argument &ReplacedArg;
1742
1743 /// The types of the arguments replacing ReplacedArg.
1744 const SmallVector<Type *, 8> ReplacementTypes;
1745
1746 /// Callee repair callback, see CalleeRepairCBTy.
1747 const CalleeRepairCBTy CalleeRepairCB;
1748
1749 /// Abstract call site (ACS) repair callback, see ACSRepairCBTy.
1750 const ACSRepairCBTy ACSRepairCB;
1751
1752 /// Allow access to the private members from the Attributor.
1753 friend struct Attributor;
1754 };
1755
1756 /// Check if we can rewrite a function signature.
1757 ///
1758 /// The argument \p Arg is replaced with new ones defined by the number,
1759 /// order, and types in \p ReplacementTypes.
1760 ///
1761 /// \returns True, if the replacement can be registered, via
1762 /// registerFunctionSignatureRewrite, false otherwise.
1763 bool isValidFunctionSignatureRewrite(Argument &Arg,
1764 ArrayRef<Type *> ReplacementTypes);
1765
1766 /// Register a rewrite for a function signature.
1767 ///
1768 /// The argument \p Arg is replaced with new ones defined by the number,
1769 /// order, and types in \p ReplacementTypes. The rewiring at the call sites is
1770 /// done through \p ACSRepairCB and at the callee site through
1771 /// \p CalleeRepairCB.
1772 ///
1773 /// \returns True, if the replacement was registered, false otherwise.
1774 bool registerFunctionSignatureRewrite(
1775 Argument &Arg, ArrayRef<Type *> ReplacementTypes,
1776 ArgumentReplacementInfo::CalleeRepairCBTy &&CalleeRepairCB,
1777 ArgumentReplacementInfo::ACSRepairCBTy &&ACSRepairCB);
1778
1779 /// Check \p Pred on all function call sites.
1780 ///
1781 /// This method will evaluate \p Pred on call sites and return
1782 /// true if \p Pred holds in every call sites. However, this is only possible
1783 /// all call sites are known, hence the function has internal linkage.
1784 /// If true is returned, \p AllCallSitesKnown is set if all possible call
1785 /// sites of the function have been visited.
1786 bool checkForAllCallSites(function_ref<bool(AbstractCallSite)> Pred,
1787 const AbstractAttribute &QueryingAA,
1788 bool RequireAllCallSites, bool &AllCallSitesKnown);
1789
1790 /// Check \p Pred on all values potentially returned by \p F.
1791 ///
1792 /// This method will evaluate \p Pred on all values potentially returned by
1793 /// the function associated with \p QueryingAA. The returned values are
1794 /// matched with their respective return instructions. Returns true if \p Pred
1795 /// holds on all of them.
1796 bool checkForAllReturnedValuesAndReturnInsts(
1797 function_ref<bool(Value &, const SmallSetVector<ReturnInst *, 4> &)> Pred,
1798 const AbstractAttribute &QueryingAA);
1799
1800 /// Check \p Pred on all values potentially returned by the function
1801 /// associated with \p QueryingAA.
1802 ///
1803 /// This is the context insensitive version of the method above.
1804 bool checkForAllReturnedValues(function_ref<bool(Value &)> Pred,
1805 const AbstractAttribute &QueryingAA);
1806
1807 /// Check \p Pred on all instructions with an opcode present in \p Opcodes.
1808 ///
1809 /// This method will evaluate \p Pred on all instructions with an opcode
1810 /// present in \p Opcode and return true if \p Pred holds on all of them.
1811 bool checkForAllInstructions(function_ref<bool(Instruction &)> Pred,
1812 const AbstractAttribute &QueryingAA,
1813 const ArrayRef<unsigned> &Opcodes,
1814 bool &UsedAssumedInformation,
1815 bool CheckBBLivenessOnly = false,
1816 bool CheckPotentiallyDead = false);
1817
1818 /// Check \p Pred on all call-like instructions (=CallBased derived).
1819 ///
1820 /// See checkForAllCallLikeInstructions(...) for more information.
1821 bool checkForAllCallLikeInstructions(function_ref<bool(Instruction &)> Pred,
1822 const AbstractAttribute &QueryingAA,
1823 bool &UsedAssumedInformation,
1824 bool CheckBBLivenessOnly = false,
1825 bool CheckPotentiallyDead = false) {
1826 return checkForAllInstructions(
1827 Pred, QueryingAA,
1828 {(unsigned)Instruction::Invoke, (unsigned)Instruction::CallBr,
1829 (unsigned)Instruction::Call},
1830 UsedAssumedInformation, CheckBBLivenessOnly, CheckPotentiallyDead);
1831 }
1832
1833 /// Check \p Pred on all Read/Write instructions.
1834 ///
1835 /// This method will evaluate \p Pred on all instructions that read or write
1836 /// to memory present in the information cache and return true if \p Pred
1837 /// holds on all of them.
1838 bool checkForAllReadWriteInstructions(function_ref<bool(Instruction &)> Pred,
1839 AbstractAttribute &QueryingAA,
1840 bool &UsedAssumedInformation);
1841
1842 /// Create a shallow wrapper for \p F such that \p F has internal linkage
1843 /// afterwards. It also sets the original \p F 's name to anonymous
1844 ///
1845 /// A wrapper is a function with the same type (and attributes) as \p F
1846 /// that will only call \p F and return the result, if any.
1847 ///
1848 /// Assuming the declaration of looks like:
1849 /// rty F(aty0 arg0, ..., atyN argN);
1850 ///
1851 /// The wrapper will then look as follows:
1852 /// rty wrapper(aty0 arg0, ..., atyN argN) {
1853 /// return F(arg0, ..., argN);
1854 /// }
1855 ///
1856 static void createShallowWrapper(Function &F);
1857
1858 /// Returns true if the function \p F can be internalized. i.e. it has a
1859 /// compatible linkage.
1860 static bool isInternalizable(Function &F);
1861
1862 /// Make another copy of the function \p F such that the copied version has
1863 /// internal linkage afterwards and can be analysed. Then we replace all uses
1864 /// of the original function to the copied one
1865 ///
1866 /// Only non-locally linked functions that have `linkonce_odr` or `weak_odr`
1867 /// linkage can be internalized because these linkages guarantee that other
1868 /// definitions with the same name have the same semantics as this one.
1869 ///
1870 /// This will only be run if the `attributor-allow-deep-wrappers` option is
1871 /// set, or if the function is called with \p Force set to true.
1872 ///
1873 /// If the function \p F failed to be internalized the return value will be a
1874 /// null pointer.
1875 static Function *internalizeFunction(Function &F, bool Force = false);
1876
1877 /// Make copies of each function in the set \p FnSet such that the copied
1878 /// version has internal linkage afterwards and can be analysed. Then we
1879 /// replace all uses of the original function to the copied one. The map
1880 /// \p FnMap contains a mapping of functions to their internalized versions.
1881 ///
1882 /// Only non-locally linked functions that have `linkonce_odr` or `weak_odr`
1883 /// linkage can be internalized because these linkages guarantee that other
1884 /// definitions with the same name have the same semantics as this one.
1885 ///
1886 /// This version will internalize all the functions in the set \p FnSet at
1887 /// once and then replace the uses. This prevents internalized functions being
1888 /// called by external functions when there is an internalized version in the
1889 /// module.
1890 static bool internalizeFunctions(SmallPtrSetImpl<Function *> &FnSet,
1891 DenseMap<Function *, Function *> &FnMap);
1892
1893 /// Return the data layout associated with the anchor scope.
1894 const DataLayout &getDataLayout() const { return InfoCache.DL; }
1895
1896 /// The allocator used to allocate memory, e.g. for `AbstractAttribute`s.
1897 BumpPtrAllocator &Allocator;
1898
1899private:
1900 /// This method will do fixpoint iteration until fixpoint or the
1901 /// maximum iteration count is reached.
1902 ///
1903 /// If the maximum iteration count is reached, This method will
1904 /// indicate pessimistic fixpoint on attributes that transitively depend
1905 /// on attributes that were scheduled for an update.
1906 void runTillFixpoint();
1907
1908 /// Gets called after scheduling, manifests attributes to the LLVM IR.
1909 ChangeStatus manifestAttributes();
1910
1911 /// Gets called after attributes have been manifested, cleans up the IR.
1912 /// Deletes dead functions, blocks and instructions.
1913 /// Rewrites function signitures and updates the call graph.
1914 ChangeStatus cleanupIR();
1915
1916 /// Identify internal functions that are effectively dead, thus not reachable
1917 /// from a live entry point. The functions are added to ToBeDeletedFunctions.
1918 void identifyDeadInternalFunctions();
1919
1920 /// Run `::update` on \p AA and track the dependences queried while doing so.
1921 /// Also adjust the state if we know further updates are not necessary.
1922 ChangeStatus updateAA(AbstractAttribute &AA);
1923
1924 /// Remember the dependences on the top of the dependence stack such that they
1925 /// may trigger further updates. (\see DependenceStack)
1926 void rememberDependences();
1927
1928 /// Check \p Pred on all call sites of \p Fn.
1929 ///
1930 /// This method will evaluate \p Pred on call sites and return
1931 /// true if \p Pred holds in every call sites. However, this is only possible
1932 /// all call sites are known, hence the function has internal linkage.
1933 /// If true is returned, \p AllCallSitesKnown is set if all possible call
1934 /// sites of the function have been visited.
1935 bool checkForAllCallSites(function_ref<bool(AbstractCallSite)> Pred,
1936 const Function &Fn, bool RequireAllCallSites,
1937 const AbstractAttribute *QueryingAA,
1938 bool &AllCallSitesKnown);
1939
1940 /// Determine if CallBase context in \p IRP should be propagated.
1941 bool shouldPropagateCallBaseContext(const IRPosition &IRP);
1942
1943 /// Apply all requested function signature rewrites
1944 /// (\see registerFunctionSignatureRewrite) and return Changed if the module
1945 /// was altered.
1946 ChangeStatus
1947 rewriteFunctionSignatures(SmallPtrSetImpl<Function *> &ModifiedFns);
1948
1949 /// Check if the Attribute \p AA should be seeded.
1950 /// See getOrCreateAAFor.
1951 bool shouldSeedAttribute(AbstractAttribute &AA);
1952
1953 /// A nested map to lookup abstract attributes based on the argument position
1954 /// on the outer level, and the addresses of the static member (AAType::ID) on
1955 /// the inner level.
1956 ///{
1957 using AAMapKeyTy = std::pair<const char *, IRPosition>;
1958 DenseMap<AAMapKeyTy, AbstractAttribute *> AAMap;
1959 ///}
1960
1961 /// Map to remember all requested signature changes (= argument replacements).
1962 DenseMap<Function *, SmallVector<std::unique_ptr<ArgumentReplacementInfo>, 8>>
1963 ArgumentReplacementMap;
1964
1965 /// The set of functions we are deriving attributes for.
1966 SetVector<Function *> &Functions;
1967
1968 /// The information cache that holds pre-processed (LLVM-IR) information.
1969 InformationCache &InfoCache;
1970
1971 /// Helper to update an underlying call graph.
1972 CallGraphUpdater &CGUpdater;
1973
1974 /// Abstract Attribute dependency graph
1975 AADepGraph DG;
1976
1977 /// Set of functions for which we modified the content such that it might
1978 /// impact the call graph.
1979 SmallPtrSet<Function *, 8> CGModifiedFunctions;
1980
1981 /// Information about a dependence. If FromAA is changed ToAA needs to be
1982 /// updated as well.
1983 struct DepInfo {
1984 const AbstractAttribute *FromAA;
1985 const AbstractAttribute *ToAA;
1986 DepClassTy DepClass;
1987 };
1988
1989 /// The dependence stack is used to track dependences during an
1990 /// `AbstractAttribute::update` call. As `AbstractAttribute::update` can be
1991 /// recursive we might have multiple vectors of dependences in here. The stack
1992 /// size, should be adjusted according to the expected recursion depth and the
1993 /// inner dependence vector size to the expected number of dependences per
1994 /// abstract attribute. Since the inner vectors are actually allocated on the
1995 /// stack we can be generous with their size.
1996 using DependenceVector = SmallVector<DepInfo, 8>;
1997 SmallVector<DependenceVector *, 16> DependenceStack;
1998
1999 /// If not null, a set limiting the attribute opportunities.
2000 const DenseSet<const char *> *Allowed;
2001
2002 /// Whether to delete functions.
2003 const bool DeleteFns;
2004
2005 /// Whether to rewrite signatures.
2006 const bool RewriteSignatures;
2007
2008 /// Maximum number of fixedpoint iterations.
2009 Optional<unsigned> MaxFixpointIterations;
2010
2011 /// A set to remember the functions we already assume to be live and visited.
2012 DenseSet<const Function *> VisitedFunctions;
2013
2014 /// Uses we replace with a new value after manifest is done. We will remove
2015 /// then trivially dead instructions as well.
2016 DenseMap<Use *, Value *> ToBeChangedUses;
2017
2018 /// Values we replace with a new value after manifest is done. We will remove
2019 /// then trivially dead instructions as well.
2020 DenseMap<Value *, std::pair<Value *, bool>> ToBeChangedValues;
2021
2022 /// Instructions we replace with `unreachable` insts after manifest is done.
2023 SmallDenseSet<WeakVH, 16> ToBeChangedToUnreachableInsts;
2024
2025 /// Invoke instructions with at least a single dead successor block.
2026 SmallVector<WeakVH, 16> InvokeWithDeadSuccessor;
2027
2028 /// A flag that indicates which stage of the process we are in. Initially, the
2029 /// phase is SEEDING. Phase is changed in `Attributor::run()`
2030 enum class AttributorPhase {
2031 SEEDING,
2032 UPDATE,
2033 MANIFEST,
2034 CLEANUP,
2035 } Phase = AttributorPhase::SEEDING;
2036
2037 /// The current initialization chain length. Tracked to avoid stack overflows.
2038 unsigned InitializationChainLength = 0;
2039
2040 /// Functions, blocks, and instructions we delete after manifest is done.
2041 ///
2042 ///{
2043 SmallPtrSet<Function *, 8> ToBeDeletedFunctions;
2044 SmallPtrSet<BasicBlock *, 8> ToBeDeletedBlocks;
2045 SmallPtrSet<BasicBlock *, 8> ManifestAddedBlocks;
2046 SmallDenseSet<WeakVH, 8> ToBeDeletedInsts;
2047 ///}
2048
2049 /// Callback to get an OptimizationRemarkEmitter from a Function *.
2050 Optional<OptimizationRemarkGetter> OREGetter;
2051
2052 /// The name of the pass to emit remarks for.
2053 const char *PassName = "";
2054
2055 friend AADepGraph;
2056 friend AttributorCallGraph;
2057};
2058
2059/// An interface to query the internal state of an abstract attribute.
2060///
2061/// The abstract state is a minimal interface that allows the Attributor to
2062/// communicate with the abstract attributes about their internal state without
2063/// enforcing or exposing implementation details, e.g., the (existence of an)
2064/// underlying lattice.
2065///
2066/// It is sufficient to be able to query if a state is (1) valid or invalid, (2)
2067/// at a fixpoint, and to indicate to the state that (3) an optimistic fixpoint
2068/// was reached or (4) a pessimistic fixpoint was enforced.
2069///
2070/// All methods need to be implemented by the subclass. For the common use case,
2071/// a single boolean state or a bit-encoded state, the BooleanState and
2072/// {Inc,Dec,Bit}IntegerState classes are already provided. An abstract
2073/// attribute can inherit from them to get the abstract state interface and
2074/// additional methods to directly modify the state based if needed. See the
2075/// class comments for help.
2076struct AbstractState {
2077 virtual ~AbstractState() {}
2078
2079 /// Return if this abstract state is in a valid state. If false, no
2080 /// information provided should be used.
2081 virtual bool isValidState() const = 0;
2082
2083 /// Return if this abstract state is fixed, thus does not need to be updated
2084 /// if information changes as it cannot change itself.
2085 virtual bool isAtFixpoint() const = 0;
2086
2087 /// Indicate that the abstract state should converge to the optimistic state.
2088 ///
2089 /// This will usually make the optimistically assumed state the known to be
2090 /// true state.
2091 ///
2092 /// \returns ChangeStatus::UNCHANGED as the assumed value should not change.
2093 virtual ChangeStatus indicateOptimisticFixpoint() = 0;
2094
2095 /// Indicate that the abstract state should converge to the pessimistic state.
2096 ///
2097 /// This will usually revert the optimistically assumed state to the known to
2098 /// be true state.
2099 ///
2100 /// \returns ChangeStatus::CHANGED as the assumed value may change.
2101 virtual ChangeStatus indicatePessimisticFixpoint() = 0;
2102};
2103
2104/// Simple state with integers encoding.
2105///
2106/// The interface ensures that the assumed bits are always a subset of the known
2107/// bits. Users can only add known bits and, except through adding known bits,
2108/// they can only remove assumed bits. This should guarantee monotoniticy and
2109/// thereby the existence of a fixpoint (if used corretly). The fixpoint is
2110/// reached when the assumed and known state/bits are equal. Users can
2111/// force/inidicate a fixpoint. If an optimistic one is indicated, the known
2112/// state will catch up with the assumed one, for a pessimistic fixpoint it is
2113/// the other way around.
2114template <typename base_ty, base_ty BestState, base_ty WorstState>
2115struct IntegerStateBase : public AbstractState {
2116 using base_t = base_ty;
2117
2118 IntegerStateBase() {}
2119 IntegerStateBase(base_t Assumed) : Assumed(Assumed) {}
2120
2121 /// Return the best possible representable state.
2122 static constexpr base_t getBestState() { return BestState; }
2123 static constexpr base_t getBestState(const IntegerStateBase &) {
2124 return getBestState();
2125 }
2126
2127 /// Return the worst possible representable state.
2128 static constexpr base_t getWorstState() { return WorstState; }
2129 static constexpr base_t getWorstState(const IntegerStateBase &) {
2130 return getWorstState();
2131 }
2132
2133 /// See AbstractState::isValidState()
2134 /// NOTE: For now we simply pretend that the worst possible state is invalid.
2135 bool isValidState() const override { return Assumed != getWorstState(); }
4
Assuming the condition is false
5
Returning zero, which participates in a condition later
2136
2137 /// See AbstractState::isAtFixpoint()
2138 bool isAtFixpoint() const override { return Assumed == Known; }
2139
2140 /// See AbstractState::indicateOptimisticFixpoint(...)
2141 ChangeStatus indicateOptimisticFixpoint() override {
2142 Known = Assumed;
2143 return ChangeStatus::UNCHANGED;
2144 }
2145
2146 /// See AbstractState::indicatePessimisticFixpoint(...)
2147 ChangeStatus indicatePessimisticFixpoint() override {
2148 Assumed = Known;
2149 return ChangeStatus::CHANGED;
2150 }
2151
2152 /// Return the known state encoding
2153 base_t getKnown() const { return Known; }
2154
2155 /// Return the assumed state encoding.
2156 base_t getAssumed() const { return Assumed; }
2157
2158 /// Equality for IntegerStateBase.
2159 bool
2160 operator==(const IntegerStateBase<base_t, BestState, WorstState> &R) const {
2161 return this->getAssumed() == R.getAssumed() &&
2162 this->getKnown() == R.getKnown();
2163 }
2164
2165 /// Inequality for IntegerStateBase.
2166 bool
2167 operator!=(const IntegerStateBase<base_t, BestState, WorstState> &R) const {
2168 return !(*this == R);
2169 }
2170
2171 /// "Clamp" this state with \p R. The result is subtype dependent but it is
2172 /// intended that only information assumed in both states will be assumed in
2173 /// this one afterwards.
2174 void operator^=(const IntegerStateBase<base_t, BestState, WorstState> &R) {
2175 handleNewAssumedValue(R.getAssumed());
2176 }
2177
2178 /// "Clamp" this state with \p R. The result is subtype dependent but it is
2179 /// intended that information known in either state will be known in
2180 /// this one afterwards.
2181 void operator+=(const IntegerStateBase<base_t, BestState, WorstState> &R) {
2182 handleNewKnownValue(R.getKnown());
2183 }
2184
2185 void operator|=(const IntegerStateBase<base_t, BestState, WorstState> &R) {
2186 joinOR(R.getAssumed(), R.getKnown());
2187 }
2188
2189 void operator&=(const IntegerStateBase<base_t, BestState, WorstState> &R) {
2190 joinAND(R.getAssumed(), R.getKnown());
2191 }
2192
2193protected:
2194 /// Handle a new assumed value \p Value. Subtype dependent.
2195 virtual void handleNewAssumedValue(base_t Value) = 0;
2196
2197 /// Handle a new known value \p Value. Subtype dependent.
2198 virtual void handleNewKnownValue(base_t Value) = 0;
2199
2200 /// Handle a value \p Value. Subtype dependent.
2201 virtual void joinOR(base_t AssumedValue, base_t KnownValue) = 0;
2202
2203 /// Handle a new assumed value \p Value. Subtype dependent.
2204 virtual void joinAND(base_t AssumedValue, base_t KnownValue) = 0;
2205
2206 /// The known state encoding in an integer of type base_t.
2207 base_t Known = getWorstState();
2208
2209 /// The assumed state encoding in an integer of type base_t.
2210 base_t Assumed = getBestState();
2211};
2212
2213/// Specialization of the integer state for a bit-wise encoding.
2214template <typename base_ty = uint32_t, base_ty BestState = ~base_ty(0),
2215 base_ty WorstState = 0>
2216struct BitIntegerState
2217 : public IntegerStateBase<base_ty, BestState, WorstState> {
2218 using base_t = base_ty;
2219
2220 /// Return true if the bits set in \p BitsEncoding are "known bits".
2221 bool isKnown(base_t BitsEncoding) const {
2222 return (this->Known & BitsEncoding) == BitsEncoding;
2223 }
2224
2225 /// Return true if the bits set in \p BitsEncoding are "assumed bits".
2226 bool isAssumed(base_t BitsEncoding) const {
2227 return (this->Assumed & BitsEncoding) == BitsEncoding;
2228 }
2229
2230 /// Add the bits in \p BitsEncoding to the "known bits".
2231 BitIntegerState &addKnownBits(base_t Bits) {
2232 // Make sure we never miss any "known bits".
2233 this->Assumed |= Bits;
2234 this->Known |= Bits;
2235 return *this;
2236 }
2237
2238 /// Remove the bits in \p BitsEncoding from the "assumed bits" if not known.
2239 BitIntegerState &removeAssumedBits(base_t BitsEncoding) {
2240 return intersectAssumedBits(~BitsEncoding);
2241 }
2242
2243 /// Remove the bits in \p BitsEncoding from the "known bits".
2244 BitIntegerState &removeKnownBits(base_t BitsEncoding) {
2245 this->Known = (this->Known & ~BitsEncoding);
2246 return *this;
2247 }
2248
2249 /// Keep only "assumed bits" also set in \p BitsEncoding but all known ones.
2250 BitIntegerState &intersectAssumedBits(base_t BitsEncoding) {
2251 // Make sure we never loose any "known bits".
2252 this->Assumed = (this->Assumed & BitsEncoding) | this->Known;
2253 return *this;
2254 }
2255
2256private:
2257 void handleNewAssumedValue(base_t Value) override {
2258 intersectAssumedBits(Value);
2259 }
2260 void handleNewKnownValue(base_t Value) override { addKnownBits(Value); }
2261 void joinOR(base_t AssumedValue, base_t KnownValue) override {
2262 this->Known |= KnownValue;
2263 this->Assumed |= AssumedValue;
2264 }
2265 void joinAND(base_t AssumedValue, base_t KnownValue) override {
2266 this->Known &= KnownValue;
2267 this->Assumed &= AssumedValue;
2268 }
2269};
2270
2271/// Specialization of the integer state for an increasing value, hence ~0u is
2272/// the best state and 0 the worst.
2273template <typename base_ty = uint32_t, base_ty BestState = ~base_ty(0),
2274 base_ty WorstState = 0>
2275struct IncIntegerState
2276 : public IntegerStateBase<base_ty, BestState, WorstState> {
2277 using super = IntegerStateBase<base_ty, BestState, WorstState>;
2278 using base_t = base_ty;
2279
2280 IncIntegerState() : super() {}
2281 IncIntegerState(base_t Assumed) : super(Assumed) {}
2282
2283 /// Return the best possible representable state.
2284 static constexpr base_t getBestState() { return BestState; }
2285 static constexpr base_t
2286 getBestState(const IncIntegerState<base_ty, BestState, WorstState> &) {
2287 return getBestState();
2288 }
2289
2290 /// Take minimum of assumed and \p Value.
2291 IncIntegerState &takeAssumedMinimum(base_t Value) {
2292 // Make sure we never loose "known value".
2293 this->Assumed = std::max(std::min(this->Assumed, Value), this->Known);
2294 return *this;
2295 }
2296
2297 /// Take maximum of known and \p Value.
2298 IncIntegerState &takeKnownMaximum(base_t Value) {
2299 // Make sure we never loose "known value".
2300 this->Assumed = std::max(Value, this->Assumed);
2301 this->Known = std::max(Value, this->Known);
2302 return *this;
2303 }
2304
2305private:
2306 void handleNewAssumedValue(base_t Value) override {
2307 takeAssumedMinimum(Value);
2308 }
2309 void handleNewKnownValue(base_t Value) override { takeKnownMaximum(Value); }
2310 void joinOR(base_t AssumedValue, base_t KnownValue) override {
2311 this->Known = std::max(this->Known, KnownValue);
2312 this->Assumed = std::max(this->Assumed, AssumedValue);
2313 }
2314 void joinAND(base_t AssumedValue, base_t KnownValue) override {
2315 this->Known = std::min(this->Known, KnownValue);
2316 this->Assumed = std::min(this->Assumed, AssumedValue);
2317 }
2318};
2319
2320/// Specialization of the integer state for a decreasing value, hence 0 is the
2321/// best state and ~0u the worst.
2322template <typename base_ty = uint32_t>
2323struct DecIntegerState : public IntegerStateBase<base_ty, 0, ~base_ty(0)> {
2324 using base_t = base_ty;
2325
2326 /// Take maximum of assumed and \p Value.
2327 DecIntegerState &takeAssumedMaximum(base_t Value) {
2328 // Make sure we never loose "known value".
2329 this->Assumed = std::min(std::max(this->Assumed, Value), this->Known);
2330 return *this;
2331 }
2332
2333 /// Take minimum of known and \p Value.
2334 DecIntegerState &takeKnownMinimum(base_t Value) {
2335 // Make sure we never loose "known value".
2336 this->Assumed = std::min(Value, this->Assumed);
2337 this->Known = std::min(Value, this->Known);
2338 return *this;
2339 }
2340
2341private:
2342 void handleNewAssumedValue(base_t Value) override {
2343 takeAssumedMaximum(Value);
2344 }
2345 void handleNewKnownValue(base_t Value) override { takeKnownMinimum(Value); }
2346 void joinOR(base_t AssumedValue, base_t KnownValue) override {
2347 this->Assumed = std::min(this->Assumed, KnownValue);
2348 this->Assumed = std::min(this->Assumed, AssumedValue);
2349 }
2350 void joinAND(base_t AssumedValue, base_t KnownValue) override {
2351 this->Assumed = std::max(this->Assumed, KnownValue);
2352 this->Assumed = std::max(this->Assumed, AssumedValue);
2353 }
2354};
2355
2356/// Simple wrapper for a single bit (boolean) state.
2357struct BooleanState : public IntegerStateBase<bool, 1, 0> {
2358 using super = IntegerStateBase<bool, 1, 0>;
2359 using base_t = IntegerStateBase::base_t;
2360
2361 BooleanState() : super() {}
2362 BooleanState(base_t Assumed) : super(Assumed) {}
2363
2364 /// Set the assumed value to \p Value but never below the known one.
2365 void setAssumed(bool Value) { Assumed &= (Known | Value); }
2366
2367 /// Set the known and asssumed value to \p Value.
2368 void setKnown(bool Value) {
2369 Known |= Value;
2370 Assumed |= Value;
2371 }
2372
2373 /// Return true if the state is assumed to hold.
2374 bool isAssumed() const { return getAssumed(); }
2375
2376 /// Return true if the state is known to hold.
2377 bool isKnown() const { return getKnown(); }
2378
2379private:
2380 void handleNewAssumedValue(base_t Value) override {
2381 if (!Value)
2382 Assumed = Known;
2383 }
2384 void handleNewKnownValue(base_t Value) override {
2385 if (Value)
2386 Known = (Assumed = Value);
2387 }
2388 void joinOR(base_t AssumedValue, base_t KnownValue) override {
2389 Known |= KnownValue;
2390 Assumed |= AssumedValue;
2391 }
2392 void joinAND(base_t AssumedValue, base_t KnownValue) override {
2393 Known &= KnownValue;
2394 Assumed &= AssumedValue;
2395 }
2396};
2397
2398/// State for an integer range.
2399struct IntegerRangeState : public AbstractState {
2400
2401 /// Bitwidth of the associated value.
2402 uint32_t BitWidth;
2403
2404 /// State representing assumed range, initially set to empty.
2405 ConstantRange Assumed;
2406
2407 /// State representing known range, initially set to [-inf, inf].
2408 ConstantRange Known;
2409
2410 IntegerRangeState(uint32_t BitWidth)
2411 : BitWidth(BitWidth), Assumed(ConstantRange::getEmpty(BitWidth)),
2412 Known(ConstantRange::getFull(BitWidth)) {}
2413
2414 IntegerRangeState(const ConstantRange &CR)
2415 : BitWidth(CR.getBitWidth()), Assumed(CR),
2416 Known(getWorstState(CR.getBitWidth())) {}
2417
2418 /// Return the worst possible representable state.
2419 static ConstantRange getWorstState(uint32_t BitWidth) {
2420 return ConstantRange::getFull(BitWidth);
2421 }
2422
2423 /// Return the best possible representable state.
2424 static ConstantRange getBestState(uint32_t BitWidth) {
2425 return ConstantRange::getEmpty(BitWidth);
2426 }
2427 static ConstantRange getBestState(const IntegerRangeState &IRS) {
2428 return getBestState(IRS.getBitWidth());
2429 }
2430
2431 /// Return associated values' bit width.
2432 uint32_t getBitWidth() const { return BitWidth; }
2433
2434 /// See AbstractState::isValidState()
2435 bool isValidState() const override {
2436 return BitWidth > 0 && !Assumed.isFullSet();
2437 }
2438
2439 /// See AbstractState::isAtFixpoint()
2440 bool isAtFixpoint() const override { return Assumed == Known; }
2441
2442 /// See AbstractState::indicateOptimisticFixpoint(...)
2443 ChangeStatus indicateOptimisticFixpoint() override {
2444 Known = Assumed;
2445 return ChangeStatus::CHANGED;
2446 }
2447
2448 /// See AbstractState::indicatePessimisticFixpoint(...)
2449 ChangeStatus indicatePessimisticFixpoint() override {
2450 Assumed = Known;
2451 return ChangeStatus::CHANGED;
2452 }
2453
2454 /// Return the known state encoding
2455 ConstantRange getKnown() const { return Known; }
2456
2457 /// Return the assumed state encoding.
2458 ConstantRange getAssumed() const { return Assumed; }
2459
2460 /// Unite assumed range with the passed state.
2461 void unionAssumed(const ConstantRange &R) {
2462 // Don't loose a known range.
2463 Assumed = Assumed.unionWith(R).intersectWith(Known);
2464 }
2465
2466 /// See IntegerRangeState::unionAssumed(..).
2467 void unionAssumed(const IntegerRangeState &R) {
2468 unionAssumed(R.getAssumed());
2469 }
2470
2471 /// Unite known range with the passed state.
2472 void unionKnown(const ConstantRange &R) {
2473 // Don't loose a known range.
2474 Known = Known.unionWith(R);
2475 Assumed = Assumed.unionWith(Known);
2476 }
2477
2478 /// See IntegerRangeState::unionKnown(..).
2479 void unionKnown(const IntegerRangeState &R) { unionKnown(R.getKnown()); }
2480
2481 /// Intersect known range with the passed state.
2482 void intersectKnown(const ConstantRange &R) {
2483 Assumed = Assumed.intersectWith(R);
2484 Known = Known.intersectWith(R);
2485 }
2486
2487 /// See IntegerRangeState::intersectKnown(..).
2488 void intersectKnown(const IntegerRangeState &R) {
2489 intersectKnown(R.getKnown());
2490 }
2491
2492 /// Equality for IntegerRangeState.
2493 bool operator==(const IntegerRangeState &R) const {
2494 return getAssumed() == R.getAssumed() && getKnown() == R.getKnown();
2495 }
2496
2497 /// "Clamp" this state with \p R. The result is subtype dependent but it is
2498 /// intended that only information assumed in both states will be assumed in
2499 /// this one afterwards.
2500 IntegerRangeState operator^=(const IntegerRangeState &R) {
2501 // NOTE: `^=` operator seems like `intersect` but in this case, we need to
2502 // take `union`.
2503 unionAssumed(R);
2504 return *this;
2505 }
2506
2507 IntegerRangeState operator&=(const IntegerRangeState &R) {
2508 // NOTE: `&=` operator seems like `intersect` but in this case, we need to
2509 // take `union`.
2510 unionKnown(R);
2511 unionAssumed(R);
2512 return *this;
2513 }
2514};
2515
2516/// Simple state for a set.
2517///
2518/// This represents a state containing a set of values. The interface supports
2519/// modelling sets that contain all possible elements. The state's internal
2520/// value is modified using union or intersection operations.
2521template <typename BaseTy> struct SetState : public AbstractState {
2522 /// A wrapper around a set that has semantics for handling unions and
2523 /// intersections with a "universal" set that contains all elements.
2524 struct SetContents {
2525 /// Creates a universal set with no concrete elements or an empty set.
2526 SetContents(bool Universal) : Universal(Universal) {}
2527
2528 /// Creates a non-universal set with concrete values.
2529 SetContents(const DenseSet<BaseTy> &Assumptions)
2530 : Universal(false), Set(Assumptions) {}
2531
2532 SetContents(bool Universal, const DenseSet<BaseTy> &Assumptions)
2533 : Universal(Universal), Set(Assumptions) {}
2534
2535 const DenseSet<BaseTy> &getSet() const { return Set; }
2536
2537 bool isUniversal() const { return Universal; }
2538
2539 bool empty() const { return Set.empty() && !Universal; }
2540
2541 /// Finds A := A ^ B where A or B could be the "Universal" set which
2542 /// contains every possible attribute. Returns true if changes were made.
2543 bool getIntersection(const SetContents &RHS) {
2544 bool IsUniversal = Universal;
2545 unsigned Size = Set.size();
2546
2547 // A := A ^ U = A
2548 if (RHS.isUniversal())
2549 return false;
2550
2551 // A := U ^ B = B
2552 if (Universal)
2553 Set = RHS.getSet();
2554 else
2555 set_intersect(Set, RHS.getSet());
2556
2557 Universal &= RHS.isUniversal();
2558 return IsUniversal != Universal || Size != Set.size();
2559 }
2560
2561 /// Finds A := A u B where A or B could be the "Universal" set which
2562 /// contains every possible attribute. returns true if changes were made.
2563 bool getUnion(const SetContents &RHS) {
2564 bool IsUniversal = Universal;
2565 unsigned Size = Set.size();
2566
2567 // A := A u U = U = U u B
2568 if (!RHS.isUniversal() && !Universal)
2569 set_union(Set, RHS.getSet());
2570
2571 Universal |= RHS.isUniversal();
2572 return IsUniversal != Universal || Size != Set.size();
2573 }
2574
2575 private:
2576 /// Indicates if this set is "universal", containing every possible element.
2577 bool Universal;
2578
2579 /// The set of currently active assumptions.
2580 DenseSet<BaseTy> Set;
2581 };
2582
2583 SetState() : Known(false), Assumed(true), IsAtFixedpoint(false) {}
2584
2585 /// Initializes the known state with an initial set and initializes the
2586 /// assumed state as universal.
2587 SetState(const DenseSet<BaseTy> &Known)
2588 : Known(Known), Assumed(true), IsAtFixedpoint(false) {}
2589
2590 /// See AbstractState::isValidState()
2591 bool isValidState() const override { return !Assumed.empty(); }
2592
2593 /// See AbstractState::isAtFixpoint()
2594 bool isAtFixpoint() const override { return IsAtFixedpoint; }
2595
2596 /// See AbstractState::indicateOptimisticFixpoint(...)
2597 ChangeStatus indicateOptimisticFixpoint() override {
2598 IsAtFixedpoint = true;
2599 Known = Assumed;
2600 return ChangeStatus::UNCHANGED;
2601 }
2602
2603 /// See AbstractState::indicatePessimisticFixpoint(...)
2604 ChangeStatus indicatePessimisticFixpoint() override {
2605 IsAtFixedpoint = true;
2606 Assumed = Known;
2607 return ChangeStatus::CHANGED;
2608 }
2609
2610 /// Return the known state encoding.
2611 const SetContents &getKnown() const { return Known; }
2612
2613 /// Return the assumed state encoding.
2614 const SetContents &getAssumed() const { return Assumed; }
2615
2616 /// Returns if the set state contains the element.
2617 bool setContains(const BaseTy &Elem) const {
2618 return Assumed.getSet().contains(Elem) || Known.getSet().contains(Elem);
2619 }
2620
2621 /// Performs the set intersection between this set and \p RHS. Returns true if
2622 /// changes were made.
2623 bool getIntersection(const SetContents &RHS) {
2624 unsigned SizeBefore = Assumed.getSet().size();
2625
2626 // Get intersection and make sure that the known set is still a proper
2627 // subset of the assumed set. A := K u (A ^ R).
2628 Assumed.getIntersection(RHS);
2629 Assumed.getUnion(Known);
2630
2631 return SizeBefore != Assumed.getSet().size();
2632 }
2633
2634 /// Performs the set union between this set and \p RHS. Returns true if
2635 /// changes were made.
2636 bool getUnion(const SetContents &RHS) { return Assumed.getUnion(RHS); }
2637
2638private:
2639 /// The set of values known for this state.
2640 SetContents Known;
2641
2642 /// The set of assumed values for this state.
2643 SetContents Assumed;
2644
2645 bool IsAtFixedpoint;
2646};
2647
2648/// Helper struct necessary as the modular build fails if the virtual method
2649/// IRAttribute::manifest is defined in the Attributor.cpp.
2650struct IRAttributeManifest {
2651 static ChangeStatus manifestAttrs(Attributor &A, const IRPosition &IRP,
2652 const ArrayRef<Attribute> &DeducedAttrs,
2653 bool ForceReplace = false);
2654};
2655
2656/// Helper to tie a abstract state implementation to an abstract attribute.
2657template <typename StateTy, typename BaseType, class... Ts>
2658struct StateWrapper : public BaseType, public StateTy {
2659 /// Provide static access to the type of the state.
2660 using StateType = StateTy;
2661
2662 StateWrapper(const IRPosition &IRP, Ts... Args)
2663 : BaseType(IRP), StateTy(Args...) {}
2664
2665 /// See AbstractAttribute::getState(...).
2666 StateType &getState() override { return *this; }
2667
2668 /// See AbstractAttribute::getState(...).
2669 const StateType &getState() const override { return *this; }
2670};
2671
2672/// Helper class that provides common functionality to manifest IR attributes.
2673template <Attribute::AttrKind AK, typename BaseType>
2674struct IRAttribute : public BaseType {
2675 IRAttribute(const IRPosition &IRP) : BaseType(IRP) {}
2676
2677 /// See AbstractAttribute::initialize(...).
2678 virtual void initialize(Attributor &A) override {
2679 const IRPosition &IRP = this->getIRPosition();
2680 if (isa<UndefValue>(IRP.getAssociatedValue()) ||
2681 this->hasAttr(getAttrKind(), /* IgnoreSubsumingPositions */ false,
2682 &A)) {
2683 this->getState().indicateOptimisticFixpoint();
2684 return;
2685 }
2686
2687 bool IsFnInterface = IRP.isFnInterfaceKind();
2688 const Function *FnScope = IRP.getAnchorScope();
2689 // TODO: Not all attributes require an exact definition. Find a way to
2690 // enable deduction for some but not all attributes in case the
2691 // definition might be changed at runtime, see also
2692 // http://lists.llvm.org/pipermail/llvm-dev/2018-February/121275.html.
2693 // TODO: We could always determine abstract attributes and if sufficient
2694 // information was found we could duplicate the functions that do not
2695 // have an exact definition.
2696 if (IsFnInterface && (!FnScope || !A.isFunctionIPOAmendable(*FnScope)))
2697 this->getState().indicatePessimisticFixpoint();
2698 }
2699
2700 /// See AbstractAttribute::manifest(...).
2701 ChangeStatus manifest(Attributor &A) override {
2702 if (isa<UndefValue>(this->getIRPosition().getAssociatedValue()))
2703 return ChangeStatus::UNCHANGED;
2704 SmallVector<Attribute, 4> DeducedAttrs;
2705 getDeducedAttributes(this->getAnchorValue().getContext(), DeducedAttrs);
2706 return IRAttributeManifest::manifestAttrs(A, this->getIRPosition(),
2707 DeducedAttrs);
2708 }
2709
2710 /// Return the kind that identifies the abstract attribute implementation.
2711 Attribute::AttrKind getAttrKind() const { return AK; }
2712
2713 /// Return the deduced attributes in \p Attrs.
2714 virtual void getDeducedAttributes(LLVMContext &Ctx,
2715 SmallVectorImpl<Attribute> &Attrs) const {
2716 Attrs.emplace_back(Attribute::get(Ctx, getAttrKind()));
2717 }
2718};
2719
2720/// Base struct for all "concrete attribute" deductions.
2721///
2722/// The abstract attribute is a minimal interface that allows the Attributor to
2723/// orchestrate the abstract/fixpoint analysis. The design allows to hide away
2724/// implementation choices made for the subclasses but also to structure their
2725/// implementation and simplify the use of other abstract attributes in-flight.
2726///
2727/// To allow easy creation of new attributes, most methods have default
2728/// implementations. The ones that do not are generally straight forward, except
2729/// `AbstractAttribute::updateImpl` which is the location of most reasoning
2730/// associated with the abstract attribute. The update is invoked by the
2731/// Attributor in case the situation used to justify the current optimistic
2732/// state might have changed. The Attributor determines this automatically
2733/// by monitoring the `Attributor::getAAFor` calls made by abstract attributes.
2734///
2735/// The `updateImpl` method should inspect the IR and other abstract attributes
2736/// in-flight to justify the best possible (=optimistic) state. The actual
2737/// implementation is, similar to the underlying abstract state encoding, not
2738/// exposed. In the most common case, the `updateImpl` will go through a list of
2739/// reasons why its optimistic state is valid given the current information. If
2740/// any combination of them holds and is sufficient to justify the current
2741/// optimistic state, the method shall return UNCHAGED. If not, the optimistic
2742/// state is adjusted to the situation and the method shall return CHANGED.
2743///
2744/// If the manifestation of the "concrete attribute" deduced by the subclass
2745/// differs from the "default" behavior, which is a (set of) LLVM-IR
2746/// attribute(s) for an argument, call site argument, function return value, or
2747/// function, the `AbstractAttribute::manifest` method should be overloaded.
2748///
2749/// NOTE: If the state obtained via getState() is INVALID, thus if
2750/// AbstractAttribute::getState().isValidState() returns false, no
2751/// information provided by the methods of this class should be used.
2752/// NOTE: The Attributor currently has certain limitations to what we can do.
2753/// As a general rule of thumb, "concrete" abstract attributes should *for
2754/// now* only perform "backward" information propagation. That means
2755/// optimistic information obtained through abstract attributes should
2756/// only be used at positions that precede the origin of the information
2757/// with regards to the program flow. More practically, information can
2758/// *now* be propagated from instructions to their enclosing function, but
2759/// *not* from call sites to the called function. The mechanisms to allow
2760/// both directions will be added in the future.
2761/// NOTE: The mechanics of adding a new "concrete" abstract attribute are
2762/// described in the file comment.
2763struct AbstractAttribute : public IRPosition, public AADepGraphNode {
2764 using StateType = AbstractState;
2765
2766 AbstractAttribute(const IRPosition &IRP) : IRPosition(IRP) {}
2767
2768 /// Virtual destructor.
2769 virtual ~AbstractAttribute() {}
2770
2771 /// This function is used to identify if an \p DGN is of type
2772 /// AbstractAttribute so that the dyn_cast and cast can use such information
2773 /// to cast an AADepGraphNode to an AbstractAttribute.
2774 ///
2775 /// We eagerly return true here because all AADepGraphNodes except for the
2776 /// Synthethis Node are of type AbstractAttribute
2777 static bool classof(const AADepGraphNode *DGN) { return true; }
2778
2779 /// Initialize the state with the information in the Attributor \p A.
2780 ///
2781 /// This function is called by the Attributor once all abstract attributes
2782 /// have been identified. It can and shall be used for task like:
2783 /// - identify existing knowledge in the IR and use it for the "known state"
2784 /// - perform any work that is not going to change over time, e.g., determine
2785 /// a subset of the IR, or attributes in-flight, that have to be looked at
2786 /// in the `updateImpl` method.
2787 virtual void initialize(Attributor &A) {}
2788
2789 /// Return the internal abstract state for inspection.
2790 virtual StateType &getState() = 0;
2791 virtual const StateType &getState() const = 0;
2792
2793 /// Return an IR position, see struct IRPosition.
2794 const IRPosition &getIRPosition() const { return *this; };
2795 IRPosition &getIRPosition() { return *this; };
2796
2797 /// Helper functions, for debug purposes only.
2798 ///{
2799 void print(raw_ostream &OS) const override;
2800 virtual void printWithDeps(raw_ostream &OS) const;
2801 void dump() const { print(dbgs()); }
2802
2803 /// This function should return the "summarized" assumed state as string.
2804 virtual const std::string getAsStr() const = 0;
2805
2806 /// This function should return the name of the AbstractAttribute
2807 virtual const std::string getName() const = 0;
2808
2809 /// This function should return the address of the ID of the AbstractAttribute
2810 virtual const char *getIdAddr() const = 0;
2811 ///}
2812
2813 /// Allow the Attributor access to the protected methods.
2814 friend struct Attributor;
2815
2816protected:
2817 /// Hook for the Attributor to trigger an update of the internal state.
2818 ///
2819 /// If this attribute is already fixed, this method will return UNCHANGED,
2820 /// otherwise it delegates to `AbstractAttribute::updateImpl`.
2821 ///
2822 /// \Return CHANGED if the internal state changed, otherwise UNCHANGED.
2823 ChangeStatus update(Attributor &A);
2824
2825 /// Hook for the Attributor to trigger the manifestation of the information
2826 /// represented by the abstract attribute in the LLVM-IR.
2827 ///
2828 /// \Return CHANGED if the IR was altered, otherwise UNCHANGED.
2829 virtual ChangeStatus manifest(Attributor &A) {
2830 return ChangeStatus::UNCHANGED;
2831 }
2832
2833 /// Hook to enable custom statistic tracking, called after manifest that
2834 /// resulted in a change if statistics are enabled.
2835 ///
2836 /// We require subclasses to provide an implementation so we remember to
2837 /// add statistics for them.
2838 virtual void trackStatistics() const = 0;
2839
2840 /// The actual update/transfer function which has to be implemented by the
2841 /// derived classes.
2842 ///
2843 /// If it is called, the environment has changed and we have to determine if
2844 /// the current information is still valid or adjust it otherwise.
2845 ///
2846 /// \Return CHANGED if the internal state changed, otherwise UNCHANGED.
2847 virtual ChangeStatus updateImpl(Attributor &A) = 0;
2848};
2849
2850/// Forward declarations of output streams for debug purposes.
2851///
2852///{
2853raw_ostream &operator<<(raw_ostream &OS, const AbstractAttribute &AA);
2854raw_ostream &operator<<(raw_ostream &OS, ChangeStatus S);
2855raw_ostream &operator<<(raw_ostream &OS, IRPosition::Kind);
2856raw_ostream &operator<<(raw_ostream &OS, const IRPosition &);
2857raw_ostream &operator<<(raw_ostream &OS, const AbstractState &State);
2858template <typename base_ty, base_ty BestState, base_ty WorstState>
2859raw_ostream &
2860operator<<(raw_ostream &OS,
2861 const IntegerStateBase<base_ty, BestState, WorstState> &S) {
2862 return OS << "(" << S.getKnown() << "-" << S.getAssumed() << ")"
2863 << static_cast<const AbstractState &>(S);
2864}
2865raw_ostream &operator<<(raw_ostream &OS, const IntegerRangeState &State);
2866///}
2867
2868struct AttributorPass : public PassInfoMixin<AttributorPass> {
2869 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
2870};
2871struct AttributorCGSCCPass : public PassInfoMixin<AttributorCGSCCPass> {
2872 PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM,
2873 LazyCallGraph &CG, CGSCCUpdateResult &UR);
2874};
2875
2876Pass *createAttributorLegacyPass();
2877Pass *createAttributorCGSCCLegacyPass();
2878
2879/// Helper function to clamp a state \p S of type \p StateType with the
2880/// information in \p R and indicate/return if \p S did change (as-in update is
2881/// required to be run again).
2882template <typename StateType>
2883ChangeStatus clampStateAndIndicateChange(StateType &S, const StateType &R) {
2884 auto Assumed = S.getAssumed();
2885 S ^= R;
2886 return Assumed == S.getAssumed() ? ChangeStatus::UNCHANGED
2887 : ChangeStatus::CHANGED;
2888}
2889
2890/// ----------------------------------------------------------------------------
2891/// Abstract Attribute Classes
2892/// ----------------------------------------------------------------------------
2893
2894/// An abstract attribute for the returned values of a function.
2895struct AAReturnedValues
2896 : public IRAttribute<Attribute::Returned, AbstractAttribute> {
2897 AAReturnedValues(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}
2898
2899 /// Return an assumed unique return value if a single candidate is found. If
2900 /// there cannot be one, return a nullptr. If it is not clear yet, return the
2901 /// Optional::NoneType.
2902 Optional<Value *> getAssumedUniqueReturnValue(Attributor &A) const;
2903
2904 /// Check \p Pred on all returned values.
2905 ///
2906 /// This method will evaluate \p Pred on returned values and return
2907 /// true if (1) all returned values are known, and (2) \p Pred returned true
2908 /// for all returned values.
2909 ///
2910 /// Note: Unlike the Attributor::checkForAllReturnedValuesAndReturnInsts
2911 /// method, this one will not filter dead return instructions.
2912 virtual bool checkForAllReturnedValuesAndReturnInsts(
2913 function_ref<bool(Value &, const SmallSetVector<ReturnInst *, 4> &)> Pred)
2914 const = 0;
2915
2916 using iterator =
2917 MapVector<Value *, SmallSetVector<ReturnInst *, 4>>::iterator;
2918 using const_iterator =
2919 MapVector<Value *, SmallSetVector<ReturnInst *, 4>>::const_iterator;
2920 virtual llvm::iterator_range<iterator> returned_values() = 0;
2921 virtual llvm::iterator_range<const_iterator> returned_values() const = 0;
2922
2923 virtual size_t getNumReturnValues() const = 0;
2924
2925 /// Create an abstract attribute view for the position \p IRP.
2926 static AAReturnedValues &createForPosition(const IRPosition &IRP,
2927 Attributor &A);
2928
2929 /// See AbstractAttribute::getName()
2930 const std::string getName() const override { return "AAReturnedValues"; }
2931
2932 /// See AbstractAttribute::getIdAddr()
2933 const char *getIdAddr() const override { return &ID; }
2934
2935 /// This function should return true if the type of the \p AA is
2936 /// AAReturnedValues
2937 static bool classof(const AbstractAttribute *AA) {
2938 return (AA->getIdAddr() == &ID);
2939 }
2940
2941 /// Unique ID (due to the unique address)
2942 static const char ID;
2943};
2944
2945struct AANoUnwind
2946 : public IRAttribute<Attribute::NoUnwind,
2947 StateWrapper<BooleanState, AbstractAttribute>> {
2948 AANoUnwind(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}
2949
2950 /// Returns true if nounwind is assumed.
2951 bool isAssumedNoUnwind() const { return getAssumed(); }
2952
2953 /// Returns true if nounwind is known.
2954 bool isKnownNoUnwind() const { return getKnown(); }
2955
2956 /// Create an abstract attribute view for the position \p IRP.
2957 static AANoUnwind &createForPosition(const IRPosition &IRP, Attributor &A);
2958
2959 /// See AbstractAttribute::getName()
2960 const std::string getName() const override { return "AANoUnwind"; }
2961
2962 /// See AbstractAttribute::getIdAddr()
2963 const char *getIdAddr() const override { return &ID; }
2964
2965 /// This function should return true if the type of the \p AA is AANoUnwind
2966 static bool classof(const AbstractAttribute *AA) {
2967 return (AA->getIdAddr() == &ID);
2968 }
2969
2970 /// Unique ID (due to the unique address)
2971 static const char ID;
2972};
2973
2974struct AANoSync
2975 : public IRAttribute<Attribute::NoSync,
2976 StateWrapper<BooleanState, AbstractAttribute>> {
2977 AANoSync(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}
2978
2979 /// Returns true if "nosync" is assumed.
2980 bool isAssumedNoSync() const { return getAssumed(); }
2981
2982 /// Returns true if "nosync" is known.
2983 bool isKnownNoSync() const { return getKnown(); }
2984
2985 /// Create an abstract attribute view for the position \p IRP.
2986 static AANoSync &createForPosition(const IRPosition &IRP, Attributor &A);
2987
2988 /// See AbstractAttribute::getName()
2989 const std::string getName() const override { return "AANoSync"; }
2990
2991 /// See AbstractAttribute::getIdAddr()
2992 const char *getIdAddr() const override { return &ID; }
2993
2994 /// This function should return true if the type of the \p AA is AANoSync
2995 static bool classof(const AbstractAttribute *AA) {
2996 return (AA->getIdAddr() == &ID);
2997 }
2998
2999 /// Unique ID (due to the unique address)
3000 static const char ID;
3001};
3002
3003/// An abstract interface for all nonnull attributes.
3004struct AANonNull
3005 : public IRAttribute<Attribute::NonNull,
3006 StateWrapper<BooleanState, AbstractAttribute>> {
3007 AANonNull(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}
3008
3009 /// Return true if we assume that the underlying value is nonnull.
3010 bool isAssumedNonNull() const { return getAssumed(); }
3011
3012 /// Return true if we know that underlying value is nonnull.
3013 bool isKnownNonNull() const { return getKnown(); }
3014
3015 /// Create an abstract attribute view for the position \p IRP.
3016 static AANonNull &createForPosition(const IRPosition &IRP, Attributor &A);
3017
3018 /// See AbstractAttribute::getName()
3019 const std::string getName() const override { return "AANonNull"; }
3020
3021 /// See AbstractAttribute::getIdAddr()
3022 const char *getIdAddr() const override { return &ID; }
3023
3024 /// This function should return true if the type of the \p AA is AANonNull
3025 static bool classof(const AbstractAttribute *AA) {
3026 return (AA->getIdAddr() == &ID);
3027 }
3028
3029 /// Unique ID (due to the unique address)
3030 static const char ID;
3031};
3032
3033/// An abstract attribute for norecurse.
3034struct AANoRecurse
3035 : public IRAttribute<Attribute::NoRecurse,
3036 StateWrapper<BooleanState, AbstractAttribute>> {
3037 AANoRecurse(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}
3038
3039 /// Return true if "norecurse" is assumed.
3040 bool isAssumedNoRecurse() const { return getAssumed(); }
3041
3042 /// Return true if "norecurse" is known.
3043 bool isKnownNoRecurse() const { return getKnown(); }
3044
3045 /// Create an abstract attribute view for the position \p IRP.
3046 static AANoRecurse &createForPosition(const IRPosition &IRP, Attributor &A);
3047
3048 /// See AbstractAttribute::getName()
3049 const std::string getName() const override { return "AANoRecurse"; }
3050
3051 /// See AbstractAttribute::getIdAddr()
3052 const char *getIdAddr() const override { return &ID; }
3053
3054 /// This function should return true if the type of the \p AA is AANoRecurse
3055 static bool classof(const AbstractAttribute *AA) {
3056 return (AA->getIdAddr() == &ID);
3057 }
3058
3059 /// Unique ID (due to the unique address)
3060 static const char ID;
3061};
3062
3063/// An abstract attribute for willreturn.
3064struct AAWillReturn
3065 : public IRAttribute<Attribute::WillReturn,
3066 StateWrapper<BooleanState, AbstractAttribute>> {
3067 AAWillReturn(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}
3068
3069 /// Return true if "willreturn" is assumed.
3070 bool isAssumedWillReturn() const { return getAssumed(); }
3071
3072 /// Return true if "willreturn" is known.
3073 bool isKnownWillReturn() const { return getKnown(); }
3074
3075 /// Create an abstract attribute view for the position \p IRP.
3076 static AAWillReturn &createForPosition(const IRPosition &IRP, Attributor &A);
3077
3078 /// See AbstractAttribute::getName()
3079 const std::string getName() const override { return "AAWillReturn"; }
3080
3081 /// See AbstractAttribute::getIdAddr()
3082 const char *getIdAddr() const override { return &ID; }
3083
3084 /// This function should return true if the type of the \p AA is AAWillReturn
3085 static bool classof(const AbstractAttribute *AA) {
3086 return (AA->getIdAddr() == &ID);
3087 }
3088
3089 /// Unique ID (due to the unique address)
3090 static const char ID;
3091};
3092
3093/// An abstract attribute for undefined behavior.
3094struct AAUndefinedBehavior
3095 : public StateWrapper<BooleanState, AbstractAttribute> {
3096 using Base = StateWrapper<BooleanState, AbstractAttribute>;
3097 AAUndefinedBehavior(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
3098
3099 /// Return true if "undefined behavior" is assumed.
3100 bool isAssumedToCauseUB() const { return getAssumed(); }
3101
3102 /// Return true if "undefined behavior" is assumed for a specific instruction.
3103 virtual bool isAssumedToCauseUB(Instruction *I) const = 0;
3104
3105 /// Return true if "undefined behavior" is known.
3106 bool isKnownToCauseUB() const { return getKnown(); }
3107
3108 /// Return true if "undefined behavior" is known for a specific instruction.
3109 virtual bool isKnownToCauseUB(Instruction *I) const = 0;
3110
3111 /// Create an abstract attribute view for the position \p IRP.
3112 static AAUndefinedBehavior &createForPosition(const IRPosition &IRP,
3113 Attributor &A);
3114
3115 /// See AbstractAttribute::getName()
3116 const std::string getName() const override { return "AAUndefinedBehavior"; }
3117
3118 /// See AbstractAttribute::getIdAddr()
3119 const char *getIdAddr() const override { return &ID; }
3120
3121 /// This function should return true if the type of the \p AA is
3122 /// AAUndefineBehavior
3123 static bool classof(const AbstractAttribute *AA) {
3124 return (AA->getIdAddr() == &ID);
3125 }
3126
3127 /// Unique ID (due to the unique address)
3128 static const char ID;
3129};
3130
3131/// An abstract interface to determine reachability of point A to B.
3132struct AAReachability : public StateWrapper<BooleanState, AbstractAttribute> {
3133 using Base = StateWrapper<BooleanState, AbstractAttribute>;
3134 AAReachability(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
3135
3136 /// Returns true if 'From' instruction is assumed to reach, 'To' instruction.
3137 /// Users should provide two positions they are interested in, and the class
3138 /// determines (and caches) reachability.
3139 bool isAssumedReachable(Attributor &A, const Instruction &From,
3140 const Instruction &To) const {
3141 if (!getState().isValidState())
3142 return true;
3143 return A.getInfoCache().getPotentiallyReachable(From, To);
3144 }
3145
3146 /// Returns true if 'From' instruction is known to reach, 'To' instruction.
3147 /// Users should provide two positions they are interested in, and the class
3148 /// determines (and caches) reachability.
3149 bool isKnownReachable(Attributor &A, const Instruction &From,
3150 const Instruction &To) const {
3151 if (!getState().isValidState())
3152 return false;
3153 return A.getInfoCache().getPotentiallyReachable(From, To);
3154 }
3155
3156 /// Create an abstract attribute view for the position \p IRP.
3157 static AAReachability &createForPosition(const IRPosition &IRP,
3158 Attributor &A);
3159
3160 /// See AbstractAttribute::getName()
3161 const std::string getName() const override { return "AAReachability"; }
3162
3163 /// See AbstractAttribute::getIdAddr()
3164 const char *getIdAddr() const override { return &ID; }
3165
3166 /// This function should return true if the type of the \p AA is
3167 /// AAReachability
3168 static bool classof(const AbstractAttribute *AA) {
3169 return (AA->getIdAddr() == &ID);
3170 }
3171
3172 /// Unique ID (due to the unique address)
3173 static const char ID;
3174};
3175
3176/// An abstract interface for all noalias attributes.
3177struct AANoAlias
3178 : public IRAttribute<Attribute::NoAlias,
3179 StateWrapper<BooleanState, AbstractAttribute>> {
3180 AANoAlias(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}
3181
3182 /// Return true if we assume that the underlying value is alias.
3183 bool isAssumedNoAlias() const { return getAssumed(); }
3184
3185 /// Return true if we know that underlying value is noalias.
3186 bool isKnownNoAlias() const { return getKnown(); }
3187
3188 /// Create an abstract attribute view for the position \p IRP.
3189 static AANoAlias &createForPosition(const IRPosition &IRP, Attributor &A);
3190
3191 /// See AbstractAttribute::getName()
3192 const std::string getName() const override { return "AANoAlias"; }
3193
3194 /// See AbstractAttribute::getIdAddr()
3195 const char *getIdAddr() const override { return &ID; }
3196
3197 /// This function should return true if the type of the \p AA is AANoAlias
3198 static bool classof(const AbstractAttribute *AA) {
3199 return (AA->getIdAddr() == &ID);
3200 }
3201
3202 /// Unique ID (due to the unique address)
3203 static const char ID;
3204};
3205
3206/// An AbstractAttribute for nofree.
3207struct AANoFree
3208 : public IRAttribute<Attribute::NoFree,
3209 StateWrapper<BooleanState, AbstractAttribute>> {
3210 AANoFree(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}
3211
3212 /// Return true if "nofree" is assumed.
3213 bool isAssumedNoFree() const { return getAssumed(); }
3214
3215 /// Return true if "nofree" is known.
3216 bool isKnownNoFree() const { return getKnown(); }
3217
3218 /// Create an abstract attribute view for the position \p IRP.
3219 static AANoFree &createForPosition(const IRPosition &IRP, Attributor &A);
3220
3221 /// See AbstractAttribute::getName()
3222 const std::string getName() const override { return "AANoFree"; }
3223
3224 /// See AbstractAttribute::getIdAddr()
3225 const char *getIdAddr() const override { return &ID; }
3226
3227 /// This function should return true if the type of the \p AA is AANoFree
3228 static bool classof(const AbstractAttribute *AA) {
3229 return (AA->getIdAddr() == &ID);
3230 }
3231
3232 /// Unique ID (due to the unique address)
3233 static const char ID;
3234};
3235
3236/// An AbstractAttribute for noreturn.
3237struct AANoReturn
3238 : public IRAttribute<Attribute::NoReturn,
3239 StateWrapper<BooleanState, AbstractAttribute>> {
3240 AANoReturn(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}
3241
3242 /// Return true if the underlying object is assumed to never return.
3243 bool isAssumedNoReturn() const { return getAssumed(); }
3244
3245 /// Return true if the underlying object is known to never return.
3246 bool isKnownNoReturn() const { return getKnown(); }
3247
3248 /// Create an abstract attribute view for the position \p IRP.
3249 static AANoReturn &createForPosition(const IRPosition &IRP, Attributor &A);
3250
3251 /// See AbstractAttribute::getName()
3252 const std::string getName() const override { return "AANoReturn"; }
3253
3254 /// See AbstractAttribute::getIdAddr()
3255 const char *getIdAddr() const override { return &ID; }
3256
3257 /// This function should return true if the type of the \p AA is AANoReturn
3258 static bool classof(const AbstractAttribute *AA) {
3259 return (AA->getIdAddr() == &ID);
3260 }
3261
3262 /// Unique ID (due to the unique address)
3263 static const char ID;
3264};
3265
3266/// An abstract interface for liveness abstract attribute.
3267struct AAIsDead
3268 : public StateWrapper<BitIntegerState<uint8_t, 3, 0>, AbstractAttribute> {
3269 using Base = StateWrapper<BitIntegerState<uint8_t, 3, 0>, AbstractAttribute>;
3270 AAIsDead(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
3271
3272 /// State encoding bits. A set bit in the state means the property holds.
3273 enum {
3274 HAS_NO_EFFECT = 1 << 0,
3275 IS_REMOVABLE = 1 << 1,
3276
3277 IS_DEAD = HAS_NO_EFFECT | IS_REMOVABLE,
3278 };
3279 static_assert(IS_DEAD == getBestState(), "Unexpected BEST_STATE value");
3280
3281protected:
3282 /// The query functions are protected such that other attributes need to go
3283 /// through the Attributor interfaces: `Attributor::isAssumedDead(...)`
3284
3285 /// Returns true if the underlying value is assumed dead.
3286 virtual bool isAssumedDead() const = 0;
3287
3288 /// Returns true if the underlying value is known dead.
3289 virtual bool isKnownDead() const = 0;
3290
3291 /// Returns true if \p BB is assumed dead.
3292 virtual bool isAssumedDead(const BasicBlock *BB) const = 0;
3293
3294 /// Returns true if \p BB is known dead.
3295 virtual bool isKnownDead(const BasicBlock *BB) const = 0;
3296
3297 /// Returns true if \p I is assumed dead.
3298 virtual bool isAssumedDead(const Instruction *I) const = 0;
3299
3300 /// Returns true if \p I is known dead.
3301 virtual bool isKnownDead(const Instruction *I) const = 0;
3302
3303 /// This method is used to check if at least one instruction in a collection
3304 /// of instructions is live.
3305 template <typename T> bool isLiveInstSet(T begin, T end) const {
3306 for (const auto &I : llvm::make_range(begin, end)) {
3307 assert(I->getFunction() == getIRPosition().getAssociatedFunction() &&(static_cast <bool> (I->getFunction() == getIRPosition
().getAssociatedFunction() && "Instruction must be in the same anchor scope function."
) ? void (0) : __assert_fail ("I->getFunction() == getIRPosition().getAssociatedFunction() && \"Instruction must be in the same anchor scope function.\""
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/include/llvm/Transforms/IPO/Attributor.h"
, 3308, __extension__ __PRETTY_FUNCTION__))
3308 "Instruction must be in the same anchor scope function.")(static_cast <bool> (I->getFunction() == getIRPosition
().getAssociatedFunction() && "Instruction must be in the same anchor scope function."
) ? void (0) : __assert_fail ("I->getFunction() == getIRPosition().getAssociatedFunction() && \"Instruction must be in the same anchor scope function.\""
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/include/llvm/Transforms/IPO/Attributor.h"
, 3308, __extension__ __PRETTY_FUNCTION__))
;
3309
3310 if (!isAssumedDead(I))
3311 return true;
3312 }
3313
3314 return false;
3315 }
3316
3317public:
3318 /// Create an abstract attribute view for the position \p IRP.
3319 static AAIsDead &createForPosition(const IRPosition &IRP, Attributor &A);
3320
3321 /// Determine if \p F might catch asynchronous exceptions.
3322 static bool mayCatchAsynchronousExceptions(const Function &F) {
3323 return F.hasPersonalityFn() && !canSimplifyInvokeNoUnwind(&F);
3324 }
3325
3326 /// Return if the edge from \p From BB to \p To BB is assumed dead.
3327 /// This is specifically useful in AAReachability.
3328 virtual bool isEdgeDead(const BasicBlock *From, const BasicBlock *To) const {
3329 return false;
3330 }
3331
3332 /// See AbstractAttribute::getName()
3333 const std::string getName() const override { return "AAIsDead"; }
3334
3335 /// See AbstractAttribute::getIdAddr()
3336 const char *getIdAddr() const override { return &ID; }
3337
3338 /// This function should return true if the type of the \p AA is AAIsDead
3339 static bool classof(const AbstractAttribute *AA) {
3340 return (AA->getIdAddr() == &ID);
3341 }
3342
3343 /// Unique ID (due to the unique address)
3344 static const char ID;
3345
3346 friend struct Attributor;
3347};
3348
3349/// State for dereferenceable attribute
3350struct DerefState : AbstractState {
3351
3352 static DerefState getBestState() { return DerefState(); }
3353 static DerefState getBestState(const DerefState &) { return getBestState(); }
3354
3355 /// Return the worst possible representable state.
3356 static DerefState getWorstState() {
3357 DerefState DS;
3358 DS.indicatePessimisticFixpoint();
3359 return DS;
3360 }
3361 static DerefState getWorstState(const DerefState &) {
3362 return getWorstState();
3363 }
3364
3365 /// State representing for dereferenceable bytes.
3366 IncIntegerState<> DerefBytesState;
3367
3368 /// Map representing for accessed memory offsets and sizes.
3369 /// A key is Offset and a value is size.
3370 /// If there is a load/store instruction something like,
3371 /// p[offset] = v;
3372 /// (offset, sizeof(v)) will be inserted to this map.
3373 /// std::map is used because we want to iterate keys in ascending order.
3374 std::map<int64_t, uint64_t> AccessedBytesMap;
3375
3376 /// Helper function to calculate dereferenceable bytes from current known
3377 /// bytes and accessed bytes.
3378 ///
3379 /// int f(int *A){
3380 /// *A = 0;
3381 /// *(A+2) = 2;
3382 /// *(A+1) = 1;
3383 /// *(A+10) = 10;
3384 /// }
3385 /// ```
3386 /// In that case, AccessedBytesMap is `{0:4, 4:4, 8:4, 40:4}`.
3387 /// AccessedBytesMap is std::map so it is iterated in accending order on
3388 /// key(Offset). So KnownBytes will be updated like this:
3389 ///
3390 /// |Access | KnownBytes
3391 /// |(0, 4)| 0 -> 4
3392 /// |(4, 4)| 4 -> 8
3393 /// |(8, 4)| 8 -> 12
3394 /// |(40, 4) | 12 (break)
3395 void computeKnownDerefBytesFromAccessedMap() {
3396 int64_t KnownBytes = DerefBytesState.getKnown();
3397 for (auto &Access : AccessedBytesMap) {
3398 if (KnownBytes < Access.first)
3399 break;
3400 KnownBytes = std::max(KnownBytes, Access.first + (int64_t)Access.second);
3401 }
3402
3403 DerefBytesState.takeKnownMaximum(KnownBytes);
3404 }
3405
3406 /// State representing that whether the value is globaly dereferenceable.
3407 BooleanState GlobalState;
3408
3409 /// See AbstractState::isValidState()
3410 bool isValidState() const override { return DerefBytesState.isValidState(); }
3411
3412 /// See AbstractState::isAtFixpoint()
3413 bool isAtFixpoint() const override {
3414 return !isValidState() ||
3415 (DerefBytesState.isAtFixpoint() && GlobalState.isAtFixpoint());
3416 }
3417
3418 /// See AbstractState::indicateOptimisticFixpoint(...)
3419 ChangeStatus indicateOptimisticFixpoint() override {
3420 DerefBytesState.indicateOptimisticFixpoint();
3421 GlobalState.indicateOptimisticFixpoint();
3422 return ChangeStatus::UNCHANGED;
3423 }
3424
3425 /// See AbstractState::indicatePessimisticFixpoint(...)
3426 ChangeStatus indicatePessimisticFixpoint() override {
3427 DerefBytesState.indicatePessimisticFixpoint();
3428 GlobalState.indicatePessimisticFixpoint();
3429 return ChangeStatus::CHANGED;
3430 }
3431
3432 /// Update known dereferenceable bytes.
3433 void takeKnownDerefBytesMaximum(uint64_t Bytes) {
3434 DerefBytesState.takeKnownMaximum(Bytes);
3435
3436 // Known bytes might increase.
3437 computeKnownDerefBytesFromAccessedMap();
3438 }
3439
3440 /// Update assumed dereferenceable bytes.
3441 void takeAssumedDerefBytesMinimum(uint64_t Bytes) {
3442 DerefBytesState.takeAssumedMinimum(Bytes);
3443 }
3444
3445 /// Add accessed bytes to the map.
3446 void addAccessedBytes(int64_t Offset, uint64_t Size) {
3447 uint64_t &AccessedBytes = AccessedBytesMap[Offset];
3448 AccessedBytes = std::max(AccessedBytes, Size);
3449
3450 // Known bytes might increase.
3451 computeKnownDerefBytesFromAccessedMap();
3452 }
3453
3454 /// Equality for DerefState.
3455 bool operator==(const DerefState &R) const {
3456 return this->DerefBytesState == R.DerefBytesState &&
3457 this->GlobalState == R.GlobalState;
3458 }
3459
3460 /// Inequality for DerefState.
3461 bool operator!=(const DerefState &R) const { return !(*this == R); }
3462
3463 /// See IntegerStateBase::operator^=
3464 DerefState operator^=(const DerefState &R) {
3465 DerefBytesState ^= R.DerefBytesState;
3466 GlobalState ^= R.GlobalState;
3467 return *this;
3468 }
3469
3470 /// See IntegerStateBase::operator+=
3471 DerefState operator+=(const DerefState &R) {
3472 DerefBytesState += R.DerefBytesState;
3473 GlobalState += R.GlobalState;
3474 return *this;
3475 }
3476
3477 /// See IntegerStateBase::operator&=
3478 DerefState operator&=(const DerefState &R) {
3479 DerefBytesState &= R.DerefBytesState;
3480 GlobalState &= R.GlobalState;
3481 return *this;
3482 }
3483
3484 /// See IntegerStateBase::operator|=
3485 DerefState operator|=(const DerefState &R) {
3486 DerefBytesState |= R.DerefBytesState;
3487 GlobalState |= R.GlobalState;
3488 return *this;
3489 }
3490
3491protected:
3492 const AANonNull *NonNullAA = nullptr;
3493};
3494
3495/// An abstract interface for all dereferenceable attribute.
3496struct AADereferenceable
3497 : public IRAttribute<Attribute::Dereferenceable,
3498 StateWrapper<DerefState, AbstractAttribute>> {
3499 AADereferenceable(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}
3500
3501 /// Return true if we assume that the underlying value is nonnull.
3502 bool isAssumedNonNull() const {
3503 return NonNullAA && NonNullAA->isAssumedNonNull();
3504 }
3505
3506 /// Return true if we know that the underlying value is nonnull.
3507 bool isKnownNonNull() const {
3508 return NonNullAA && NonNullAA->isKnownNonNull();
3509 }
3510
3511 /// Return true if we assume that underlying value is
3512 /// dereferenceable(_or_null) globally.
3513 bool isAssumedGlobal() const { return GlobalState.getAssumed(); }
3514
3515 /// Return true if we know that underlying value is
3516 /// dereferenceable(_or_null) globally.
3517 bool isKnownGlobal() const { return GlobalState.getKnown(); }
3518
3519 /// Return assumed dereferenceable bytes.
3520 uint32_t getAssumedDereferenceableBytes() const {
3521 return DerefBytesState.getAssumed();
3522 }
3523
3524 /// Return known dereferenceable bytes.
3525 uint32_t getKnownDereferenceableBytes() const {
3526 return DerefBytesState.getKnown();
3527 }
3528
3529 /// Create an abstract attribute view for the position \p IRP.
3530 static AADereferenceable &createForPosition(const IRPosition &IRP,
3531 Attributor &A);
3532
3533 /// See AbstractAttribute::getName()
3534 const std::string getName() const override { return "AADereferenceable"; }
3535
3536 /// See AbstractAttribute::getIdAddr()
3537 const char *getIdAddr() const override { return &ID; }
3538
3539 /// This function should return true if the type of the \p AA is
3540 /// AADereferenceable
3541 static bool classof(const AbstractAttribute *AA) {
3542 return (AA->getIdAddr() == &ID);
3543 }
3544
3545 /// Unique ID (due to the unique address)
3546 static const char ID;
3547};
3548
3549using AAAlignmentStateType =
3550 IncIntegerState<uint64_t, Value::MaximumAlignment, 1>;
3551/// An abstract interface for all align attributes.
3552struct AAAlign : public IRAttribute<
3553 Attribute::Alignment,
3554 StateWrapper<AAAlignmentStateType, AbstractAttribute>> {
3555 AAAlign(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}
3556
3557 /// Return assumed alignment.
3558 uint64_t getAssumedAlign() const { return getAssumed(); }
3559
3560 /// Return known alignment.
3561 uint64_t getKnownAlign() const { return getKnown(); }
3562
3563 /// See AbstractAttribute::getName()
3564 const std::string getName() const override { return "AAAlign"; }
3565
3566 /// See AbstractAttribute::getIdAddr()
3567 const char *getIdAddr() const override { return &ID; }
3568
3569 /// This function should return true if the type of the \p AA is AAAlign
3570 static bool classof(const AbstractAttribute *AA) {
3571 return (AA->getIdAddr() == &ID);
3572 }
3573
3574 /// Create an abstract attribute view for the position \p IRP.
3575 static AAAlign &createForPosition(const IRPosition &IRP, Attributor &A);
3576
3577 /// Unique ID (due to the unique address)
3578 static const char ID;
3579};
3580
3581/// An abstract interface for all nocapture attributes.
3582struct AANoCapture
3583 : public IRAttribute<
3584 Attribute::NoCapture,
3585 StateWrapper<BitIntegerState<uint16_t, 7, 0>, AbstractAttribute>> {
3586 AANoCapture(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}
3587
3588 /// State encoding bits. A set bit in the state means the property holds.
3589 /// NO_CAPTURE is the best possible state, 0 the worst possible state.
3590 enum {
3591 NOT_CAPTURED_IN_MEM = 1 << 0,
3592 NOT_CAPTURED_IN_INT = 1 << 1,
3593 NOT_CAPTURED_IN_RET = 1 << 2,
3594
3595 /// If we do not capture the value in memory or through integers we can only
3596 /// communicate it back as a derived pointer.
3597 NO_CAPTURE_MAYBE_RETURNED = NOT_CAPTURED_IN_MEM | NOT_CAPTURED_IN_INT,
3598
3599 /// If we do not capture the value in memory, through integers, or as a
3600 /// derived pointer we know it is not captured.
3601 NO_CAPTURE =
3602 NOT_CAPTURED_IN_MEM | NOT_CAPTURED_IN_INT | NOT_CAPTURED_IN_RET,
3603 };
3604
3605 /// Return true if we know that the underlying value is not captured in its
3606 /// respective scope.
3607 bool isKnownNoCapture() const { return isKnown(NO_CAPTURE); }
3608
3609 /// Return true if we assume that the underlying value is not captured in its
3610 /// respective scope.
3611 bool isAssumedNoCapture() const { return isAssumed(NO_CAPTURE); }
3612
3613 /// Return true if we know that the underlying value is not captured in its
3614 /// respective scope but we allow it to escape through a "return".
3615 bool isKnownNoCaptureMaybeReturned() const {
3616 return isKnown(NO_CAPTURE_MAYBE_RETURNED);
3617 }
3618
3619 /// Return true if we assume that the underlying value is not captured in its
3620 /// respective scope but we allow it to escape through a "return".
3621 bool isAssumedNoCaptureMaybeReturned() const {
3622 return isAssumed(NO_CAPTURE_MAYBE_RETURNED);
3623 }
3624
3625 /// Create an abstract attribute view for the position \p IRP.
3626 static AANoCapture &createForPosition(const IRPosition &IRP, Attributor &A);
3627
3628 /// See AbstractAttribute::getName()
3629 const std::string getName() const override { return "AANoCapture"; }
3630
3631 /// See AbstractAttribute::getIdAddr()
3632 const char *getIdAddr() const override { return &ID; }
3633
3634 /// This function should return true if the type of the \p AA is AANoCapture
3635 static bool classof(const AbstractAttribute *AA) {
3636 return (AA->getIdAddr() == &ID);
3637 }
3638
3639 /// Unique ID (due to the unique address)
3640 static const char ID;
3641};
3642
3643struct ValueSimplifyStateType : public AbstractState {
3644
3645 ValueSimplifyStateType(Type *Ty) : Ty(Ty) {}
3646
3647 static ValueSimplifyStateType getBestState(Type *Ty) {
3648 return ValueSimplifyStateType(Ty);
3649 }
3650 static ValueSimplifyStateType getBestState(const ValueSimplifyStateType &VS) {
3651 return getBestState(VS.Ty);
3652 }
3653
3654 /// Return the worst possible representable state.
3655 static ValueSimplifyStateType getWorstState(Type *Ty) {
3656 ValueSimplifyStateType DS(Ty);
3657 DS.indicatePessimisticFixpoint();
3658 return DS;
3659 }
3660 static ValueSimplifyStateType
3661 getWorstState(const ValueSimplifyStateType &VS) {
3662 return getWorstState(VS.Ty);
3663 }
3664
3665 /// See AbstractState::isValidState(...)
3666 bool isValidState() const override { return BS.isValidState(); }
3667
3668 /// See AbstractState::isAtFixpoint(...)
3669 bool isAtFixpoint() const override { return BS.isAtFixpoint(); }
3670
3671 /// Return the assumed state encoding.
3672 ValueSimplifyStateType getAssumed() { return *this; }
3673 const ValueSimplifyStateType &getAssumed() const { return *this; }
3674
3675 /// See AbstractState::indicatePessimisticFixpoint(...)
3676 ChangeStatus indicatePessimisticFixpoint() override {
3677 return BS.indicatePessimisticFixpoint();
3678 }
3679
3680 /// See AbstractState::indicateOptimisticFixpoint(...)
3681 ChangeStatus indicateOptimisticFixpoint() override {
3682 return BS.indicateOptimisticFixpoint();
3683 }
3684
3685 /// "Clamp" this state with \p PVS.
3686 ValueSimplifyStateType operator^=(const ValueSimplifyStateType &VS) {
3687 BS ^= VS.BS;
3688 unionAssumed(VS.SimplifiedAssociatedValue);
3689 return *this;
3690 }
3691
3692 bool operator==(const ValueSimplifyStateType &RHS) const {
3693 if (isValidState() != RHS.isValidState())
3694 return false;
3695 if (!isValidState() && !RHS.isValidState())
3696 return true;
3697 return SimplifiedAssociatedValue == RHS.SimplifiedAssociatedValue;
3698 }
3699
3700protected:
3701 /// The type of the original value.
3702 Type *Ty;
3703
3704 /// Merge \p Other into the currently assumed simplified value
3705 bool unionAssumed(Optional<Value *> Other);
3706
3707 /// Helper to track validity and fixpoint
3708 BooleanState BS;
3709
3710 /// An assumed simplified value. Initially, it is set to Optional::None, which
3711 /// means that the value is not clear under current assumption. If in the
3712 /// pessimistic state, getAssumedSimplifiedValue doesn't return this value but
3713 /// returns orignal associated value.
3714 Optional<Value *> SimplifiedAssociatedValue;
3715};
3716
3717/// An abstract interface for value simplify abstract attribute.
3718struct AAValueSimplify
3719 : public StateWrapper<ValueSimplifyStateType, AbstractAttribute, Type *> {
3720 using Base = StateWrapper<ValueSimplifyStateType, AbstractAttribute, Type *>;
3721 AAValueSimplify(const IRPosition &IRP, Attributor &A)
3722 : Base(IRP, IRP.getAssociatedType()) {}
3723
3724 /// Create an abstract attribute view for the position \p IRP.
3725 static AAValueSimplify &createForPosition(const IRPosition &IRP,
3726 Attributor &A);
3727
3728 /// See AbstractAttribute::getName()
3729 const std::string getName() const override { return "AAValueSimplify"; }
3730
3731 /// See AbstractAttribute::getIdAddr()
3732 const char *getIdAddr() const override { return &ID; }
3733
3734 /// This function should return true if the type of the \p AA is
3735 /// AAValueSimplify
3736 static bool classof(const AbstractAttribute *AA) {
3737 return (AA->getIdAddr() == &ID);
3738 }
3739
3740 /// Unique ID (due to the unique address)
3741 static const char ID;
3742
3743private:
3744 /// Return an assumed simplified value if a single candidate is found. If
3745 /// there cannot be one, return original value. If it is not clear yet, return
3746 /// the Optional::NoneType.
3747 ///
3748 /// Use `Attributor::getAssumedSimplified` for value simplification.
3749 virtual Optional<Value *> getAssumedSimplifiedValue(Attributor &A) const = 0;
3750
3751 friend struct Attributor;
3752};
3753
3754struct AAHeapToStack : public StateWrapper<BooleanState, AbstractAttribute> {
3755 using Base = StateWrapper<BooleanState, AbstractAttribute>;
3756 AAHeapToStack(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
3757
3758 /// Returns true if HeapToStack conversion is assumed to be possible.
3759 virtual bool isAssumedHeapToStack(const CallBase &CB) const = 0;
3760
3761 /// Returns true if HeapToStack conversion is assumed and the CB is a
3762 /// callsite to a free operation to be removed.
3763 virtual bool isAssumedHeapToStackRemovedFree(CallBase &CB) const = 0;
3764
3765 /// Create an abstract attribute view for the position \p IRP.
3766 static AAHeapToStack &createForPosition(const IRPosition &IRP, Attributor &A);
3767
3768 /// See AbstractAttribute::getName()
3769 const std::string getName() const override { return "AAHeapToStack"; }
3770
3771 /// See AbstractAttribute::getIdAddr()
3772 const char *getIdAddr() const override { return &ID; }
3773
3774 /// This function should return true if the type of the \p AA is AAHeapToStack
3775 static bool classof(const AbstractAttribute *AA) {
3776 return (AA->getIdAddr() == &ID);
3777 }
3778
3779 /// Unique ID (due to the unique address)
3780 static const char ID;
3781};
3782
3783/// An abstract interface for privatizability.
3784///
3785/// A pointer is privatizable if it can be replaced by a new, private one.
3786/// Privatizing pointer reduces the use count, interaction between unrelated
3787/// code parts.
3788///
3789/// In order for a pointer to be privatizable its value cannot be observed
3790/// (=nocapture), it is (for now) not written (=readonly & noalias), we know
3791/// what values are necessary to make the private copy look like the original
3792/// one, and the values we need can be loaded (=dereferenceable).
3793struct AAPrivatizablePtr
3794 : public StateWrapper<BooleanState, AbstractAttribute> {
3795 using Base = StateWrapper<BooleanState, AbstractAttribute>;
3796 AAPrivatizablePtr(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
3797
3798 /// Returns true if pointer privatization is assumed to be possible.
3799 bool isAssumedPrivatizablePtr() const { return getAssumed(); }
3800
3801 /// Returns true if pointer privatization is known to be possible.
3802 bool isKnownPrivatizablePtr() const { return getKnown(); }
3803
3804 /// Return the type we can choose for a private copy of the underlying
3805 /// value. None means it is not clear yet, nullptr means there is none.
3806 virtual Optional<Type *> getPrivatizableType() const = 0;
3807
3808 /// Create an abstract attribute view for the position \p IRP.
3809 static AAPrivatizablePtr &createForPosition(const IRPosition &IRP,
3810 Attributor &A);
3811
3812 /// See AbstractAttribute::getName()
3813 const std::string getName() const override { return "AAPrivatizablePtr"; }
3814
3815 /// See AbstractAttribute::getIdAddr()
3816 const char *getIdAddr() const override { return &ID; }
3817
3818 /// This function should return true if the type of the \p AA is
3819 /// AAPricatizablePtr
3820 static bool classof(const AbstractAttribute *AA) {
3821 return (AA->getIdAddr() == &ID);
3822 }
3823
3824 /// Unique ID (due to the unique address)
3825 static const char ID;
3826};
3827
3828/// An abstract interface for memory access kind related attributes
3829/// (readnone/readonly/writeonly).
3830struct AAMemoryBehavior
3831 : public IRAttribute<
3832 Attribute::ReadNone,
3833 StateWrapper<BitIntegerState<uint8_t, 3>, AbstractAttribute>> {
3834 AAMemoryBehavior(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}
3835
3836 /// State encoding bits. A set bit in the state means the property holds.
3837 /// BEST_STATE is the best possible state, 0 the worst possible state.
3838 enum {
3839 NO_READS = 1 << 0,
3840 NO_WRITES = 1 << 1,
3841 NO_ACCESSES = NO_READS | NO_WRITES,
3842
3843 BEST_STATE = NO_ACCESSES,
3844 };
3845 static_assert(BEST_STATE == getBestState(), "Unexpected BEST_STATE value");
3846
3847 /// Return true if we know that the underlying value is not read or accessed
3848 /// in its respective scope.
3849 bool isKnownReadNone() const { return isKnown(NO_ACCESSES); }
3850
3851 /// Return true if we assume that the underlying value is not read or accessed
3852 /// in its respective scope.
3853 bool isAssumedReadNone() const { return isAssumed(NO_ACCESSES); }
3854
3855 /// Return true if we know that the underlying value is not accessed
3856 /// (=written) in its respective scope.
3857 bool isKnownReadOnly() const { return isKnown(NO_WRITES); }
3858
3859 /// Return true if we assume that the underlying value is not accessed
3860 /// (=written) in its respective scope.
3861 bool isAssumedReadOnly() const { return isAssumed(NO_WRITES); }
3862
3863 /// Return true if we know that the underlying value is not read in its
3864 /// respective scope.
3865 bool isKnownWriteOnly() const { return isKnown(NO_READS); }
3866
3867 /// Return true if we assume that the underlying value is not read in its
3868 /// respective scope.
3869 bool isAssumedWriteOnly() const { return isAssumed(NO_READS); }
3870
3871 /// Create an abstract attribute view for the position \p IRP.
3872 static AAMemoryBehavior &createForPosition(const IRPosition &IRP,
3873 Attributor &A);
3874
3875 /// See AbstractAttribute::getName()
3876 const std::string getName() const override { return "AAMemoryBehavior"; }
3877
3878 /// See AbstractAttribute::getIdAddr()
3879 const char *getIdAddr() const override { return &ID; }
3880
3881 /// This function should return true if the type of the \p AA is
3882 /// AAMemoryBehavior
3883 static bool classof(const AbstractAttribute *AA) {
3884 return (AA->getIdAddr() == &ID);
3885 }
3886
3887 /// Unique ID (due to the unique address)
3888 static const char ID;
3889};
3890
3891/// An abstract interface for all memory location attributes
3892/// (readnone/argmemonly/inaccessiblememonly/inaccessibleorargmemonly).
3893struct AAMemoryLocation
3894 : public IRAttribute<
3895 Attribute::ReadNone,
3896 StateWrapper<BitIntegerState<uint32_t, 511>, AbstractAttribute>> {
3897 using MemoryLocationsKind = StateType::base_t;
3898
3899 AAMemoryLocation(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}
3900
3901 /// Encoding of different locations that could be accessed by a memory
3902 /// access.
3903 enum {
3904 ALL_LOCATIONS = 0,
3905 NO_LOCAL_MEM = 1 << 0,
3906 NO_CONST_MEM = 1 << 1,
3907 NO_GLOBAL_INTERNAL_MEM = 1 << 2,
3908 NO_GLOBAL_EXTERNAL_MEM = 1 << 3,
3909 NO_GLOBAL_MEM = NO_GLOBAL_INTERNAL_MEM | NO_GLOBAL_EXTERNAL_MEM,
3910 NO_ARGUMENT_MEM = 1 << 4,
3911 NO_INACCESSIBLE_MEM = 1 << 5,
3912 NO_MALLOCED_MEM = 1 << 6,
3913 NO_UNKOWN_MEM = 1 << 7,
3914 NO_LOCATIONS = NO_LOCAL_MEM | NO_CONST_MEM | NO_GLOBAL_INTERNAL_MEM |
3915 NO_GLOBAL_EXTERNAL_MEM | NO_ARGUMENT_MEM |
3916 NO_INACCESSIBLE_MEM | NO_MALLOCED_MEM | NO_UNKOWN_MEM,
3917
3918 // Helper bit to track if we gave up or not.
3919 VALID_STATE = NO_LOCATIONS + 1,
3920
3921 BEST_STATE = NO_LOCATIONS | VALID_STATE,
3922 };
3923 static_assert(BEST_STATE == getBestState(), "Unexpected BEST_STATE value");
3924
3925 /// Return true if we know that the associated functions has no observable
3926 /// accesses.
3927 bool isKnownReadNone() const { return isKnown(NO_LOCATIONS); }
3928
3929 /// Return true if we assume that the associated functions has no observable
3930 /// accesses.
3931 bool isAssumedReadNone() const {
3932 return isAssumed(NO_LOCATIONS) || isAssumedStackOnly();
3933 }
3934
3935 /// Return true if we know that the associated functions has at most
3936 /// local/stack accesses.
3937 bool isKnowStackOnly() const {
3938 return isKnown(inverseLocation(NO_LOCAL_MEM, true, true));
3939 }
3940
3941 /// Return true if we assume that the associated functions has at most
3942 /// local/stack accesses.
3943 bool isAssumedStackOnly() const {
3944 return isAssumed(inverseLocation(NO_LOCAL_MEM, true, true));
3945 }
3946
3947 /// Return true if we know that the underlying value will only access
3948 /// inaccesible memory only (see Attribute::InaccessibleMemOnly).
3949 bool isKnownInaccessibleMemOnly() const {
3950 return isKnown(inverseLocation(NO_INACCESSIBLE_MEM, true, true));
3951 }
3952
3953 /// Return true if we assume that the underlying value will only access
3954 /// inaccesible memory only (see Attribute::InaccessibleMemOnly).
3955 bool isAssumedInaccessibleMemOnly() const {
3956 return isAssumed(inverseLocation(NO_INACCESSIBLE_MEM, true, true));
3957 }
3958
3959 /// Return true if we know that the underlying value will only access
3960 /// argument pointees (see Attribute::ArgMemOnly).
3961 bool isKnownArgMemOnly() const {
3962 return isKnown(inverseLocation(NO_ARGUMENT_MEM, true, true));
3963 }
3964
3965 /// Return true if we assume that the underlying value will only access
3966 /// argument pointees (see Attribute::ArgMemOnly).
3967 bool isAssumedArgMemOnly() const {
3968 return isAssumed(inverseLocation(NO_ARGUMENT_MEM, true, true));
3969 }
3970
3971 /// Return true if we know that the underlying value will only access
3972 /// inaccesible memory or argument pointees (see
3973 /// Attribute::InaccessibleOrArgMemOnly).
3974 bool isKnownInaccessibleOrArgMemOnly() const {
3975 return isKnown(
3976 inverseLocation(NO_INACCESSIBLE_MEM | NO_ARGUMENT_MEM, true, true));
3977 }
3978
3979 /// Return true if we assume that the underlying value will only access
3980 /// inaccesible memory or argument pointees (see
3981 /// Attribute::InaccessibleOrArgMemOnly).
3982 bool isAssumedInaccessibleOrArgMemOnly() const {
3983 return isAssumed(
3984 inverseLocation(NO_INACCESSIBLE_MEM | NO_ARGUMENT_MEM, true, true));
3985 }
3986
3987 /// Return true if the underlying value may access memory through arguement
3988 /// pointers of the associated function, if any.
3989 bool mayAccessArgMem() const { return !isAssumed(NO_ARGUMENT_MEM); }
3990
3991 /// Return true if only the memory locations specififed by \p MLK are assumed
3992 /// to be accessed by the associated function.
3993 bool isAssumedSpecifiedMemOnly(MemoryLocationsKind MLK) const {
3994 return isAssumed(MLK);
3995 }
3996
3997 /// Return the locations that are assumed to be not accessed by the associated
3998 /// function, if any.
3999 MemoryLocationsKind getAssumedNotAccessedLocation() const {
4000 return getAssumed();
4001 }
4002
4003 /// Return the inverse of location \p Loc, thus for NO_XXX the return
4004 /// describes ONLY_XXX. The flags \p AndLocalMem and \p AndConstMem determine
4005 /// if local (=stack) and constant memory are allowed as well. Most of the
4006 /// time we do want them to be included, e.g., argmemonly allows accesses via
4007 /// argument pointers or local or constant memory accesses.
4008 static MemoryLocationsKind
4009 inverseLocation(MemoryLocationsKind Loc, bool AndLocalMem, bool AndConstMem) {
4010 return NO_LOCATIONS & ~(Loc | (AndLocalMem ? NO_LOCAL_MEM : 0) |
4011 (AndConstMem ? NO_CONST_MEM : 0));
4012 };
4013
4014 /// Return the locations encoded by \p MLK as a readable string.
4015 static std::string getMemoryLocationsAsStr(MemoryLocationsKind MLK);
4016
4017 /// Simple enum to distinguish read/write/read-write accesses.
4018 enum AccessKind {
4019 NONE = 0,
4020 READ = 1 << 0,
4021 WRITE = 1 << 1,
4022 READ_WRITE = READ | WRITE,
4023 };
4024
4025 /// Check \p Pred on all accesses to the memory kinds specified by \p MLK.
4026 ///
4027 /// This method will evaluate \p Pred on all accesses (access instruction +
4028 /// underlying accessed memory pointer) and it will return true if \p Pred
4029 /// holds every time.
4030 virtual bool checkForAllAccessesToMemoryKind(
4031 function_ref<bool(const Instruction *, const Value *, AccessKind,
4032 MemoryLocationsKind)>
4033 Pred,
4034 MemoryLocationsKind MLK) const = 0;
4035
4036 /// Create an abstract attribute view for the position \p IRP.
4037 static AAMemoryLocation &createForPosition(const IRPosition &IRP,
4038 Attributor &A);
4039
4040 /// See AbstractState::getAsStr().
4041 const std::string getAsStr() const override {
4042 return getMemoryLocationsAsStr(getAssumedNotAccessedLocation());
4043 }
4044
4045 /// See AbstractAttribute::getName()
4046 const std::string getName() const override { return "AAMemoryLocation"; }
4047
4048 /// See AbstractAttribute::getIdAddr()
4049 const char *getIdAddr() const override { return &ID; }
4050
4051 /// This function should return true if the type of the \p AA is
4052 /// AAMemoryLocation
4053 static bool classof(const AbstractAttribute *AA) {
4054 return (AA->getIdAddr() == &ID);
4055 }
4056
4057 /// Unique ID (due to the unique address)
4058 static const char ID;
4059};
4060
4061/// An abstract interface for range value analysis.
4062struct AAValueConstantRange
4063 : public StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t> {
4064 using Base = StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t>;
4065 AAValueConstantRange(const IRPosition &IRP, Attributor &A)
4066 : Base(IRP, IRP.getAssociatedType()->getIntegerBitWidth()) {}
4067
4068 /// See AbstractAttribute::getState(...).
4069 IntegerRangeState &getState() override { return *this; }
4070 const IntegerRangeState &getState() const override { return *this; }
4071
4072 /// Create an abstract attribute view for the position \p IRP.
4073 static AAValueConstantRange &createForPosition(const IRPosition &IRP,
4074 Attributor &A);
4075
4076 /// Return an assumed range for the associated value a program point \p CtxI.
4077 /// If \p I is nullptr, simply return an assumed range.
4078 virtual ConstantRange
4079 getAssumedConstantRange(Attributor &A,
4080 const Instruction *CtxI = nullptr) const = 0;
4081
4082 /// Return a known range for the associated value at a program point \p CtxI.
4083 /// If \p I is nullptr, simply return a known range.
4084 virtual ConstantRange
4085 getKnownConstantRange(Attributor &A,
4086 const Instruction *CtxI = nullptr) const = 0;
4087
4088 /// Return an assumed constant for the associated value a program point \p
4089 /// CtxI.
4090 Optional<ConstantInt *>
4091 getAssumedConstantInt(Attributor &A,
4092 const Instruction *CtxI = nullptr) const {
4093 ConstantRange RangeV = getAssumedConstantRange(A, CtxI);
4094 if (auto *C = RangeV.getSingleElement())
4095 return cast<ConstantInt>(
4096 ConstantInt::get(getAssociatedValue().getType(), *C));
4097 if (RangeV.isEmptySet())
4098 return llvm::None;
4099 return nullptr;
4100 }
4101
4102 /// See AbstractAttribute::getName()
4103 const std::string getName() const override { return "AAValueConstantRange"; }
4104
4105 /// See AbstractAttribute::getIdAddr()
4106 const char *getIdAddr() const override { return &ID; }
4107
4108 /// This function should return true if the type of the \p AA is
4109 /// AAValueConstantRange
4110 static bool classof(const AbstractAttribute *AA) {
4111 return (AA->getIdAddr() == &ID);
4112 }
4113
4114 /// Unique ID (due to the unique address)
4115 static const char ID;
4116};
4117
4118/// A class for a set state.
4119/// The assumed boolean state indicates whether the corresponding set is full
4120/// set or not. If the assumed state is false, this is the worst state. The
4121/// worst state (invalid state) of set of potential values is when the set
4122/// contains every possible value (i.e. we cannot in any way limit the value
4123/// that the target position can take). That never happens naturally, we only
4124/// force it. As for the conditions under which we force it, see
4125/// AAPotentialValues.
4126template <typename MemberTy, typename KeyInfo = DenseMapInfo<MemberTy>>
4127struct PotentialValuesState : AbstractState {
4128 using SetTy = DenseSet<MemberTy, KeyInfo>;
4129
4130 PotentialValuesState() : IsValidState(true), UndefIsContained(false) {}
4131
4132 PotentialValuesState(bool IsValid)
4133 : IsValidState(IsValid), UndefIsContained(false) {}
4134
4135 /// See AbstractState::isValidState(...)
4136 bool isValidState() const override { return IsValidState.isValidState(); }
4137
4138 /// See AbstractState::isAtFixpoint(...)
4139 bool isAtFixpoint() const override { return IsValidState.isAtFixpoint(); }
4140
4141 /// See AbstractState::indicatePessimisticFixpoint(...)
4142 ChangeStatus indicatePessimisticFixpoint() override {
4143 return IsValidState.indicatePessimisticFixpoint();
4144 }
4145
4146 /// See AbstractState::indicateOptimisticFixpoint(...)
4147 ChangeStatus indicateOptimisticFixpoint() override {
4148 return IsValidState.indicateOptimisticFixpoint();
4149 }
4150
4151 /// Return the assumed state
4152 PotentialValuesState &getAssumed() { return *this; }
4153 const PotentialValuesState &getAssumed() const { return *this; }
4154
4155 /// Return this set. We should check whether this set is valid or not by
4156 /// isValidState() before calling this function.
4157 const SetTy &getAssumedSet() const {
4158 assert(isValidState() && "This set shoud not be used when it is invalid!")(static_cast <bool> (isValidState() && "This set shoud not be used when it is invalid!"
) ? void (0) : __assert_fail ("isValidState() && \"This set shoud not be used when it is invalid!\""
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/include/llvm/Transforms/IPO/Attributor.h"
, 4158, __extension__ __PRETTY_FUNCTION__))
;
4159 return Set;
4160 }
4161
4162 /// Returns whether this state contains an undef value or not.
4163 bool undefIsContained() const {
4164 assert(isValidState() && "This flag shoud not be used when it is invalid!")(static_cast <bool> (isValidState() && "This flag shoud not be used when it is invalid!"
) ? void (0) : __assert_fail ("isValidState() && \"This flag shoud not be used when it is invalid!\""
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/include/llvm/Transforms/IPO/Attributor.h"
, 4164, __extension__ __PRETTY_FUNCTION__))
;
4165 return UndefIsContained;
4166 }
4167
4168 bool operator==(const PotentialValuesState &RHS) const {
4169 if (isValidState() != RHS.isValidState())
4170 return false;
4171 if (!isValidState() && !RHS.isValidState())
4172 return true;
4173 if (undefIsContained() != RHS.undefIsContained())
4174 return false;
4175 return Set == RHS.getAssumedSet();
4176 }
4177
4178 /// Maximum number of potential values to be tracked.
4179 /// This is set by -attributor-max-potential-values command line option
4180 static unsigned MaxPotentialValues;
4181
4182 /// Return empty set as the best state of potential values.
4183 static PotentialValuesState getBestState() {
4184 return PotentialValuesState(true);
4185 }
4186
4187 static PotentialValuesState getBestState(PotentialValuesState &PVS) {
4188 return getBestState();
4189 }
4190
4191 /// Return full set as the worst state of potential values.
4192 static PotentialValuesState getWorstState() {
4193 return PotentialValuesState(false);
4194 }
4195
4196 /// Union assumed set with the passed value.
4197 void unionAssumed(const MemberTy &C) { insert(C); }
4198
4199 /// Union assumed set with assumed set of the passed state \p PVS.
4200 void unionAssumed(const PotentialValuesState &PVS) { unionWith(PVS); }
4201
4202 /// Union assumed set with an undef value.
4203 void unionAssumedWithUndef() { unionWithUndef(); }
4204
4205 /// "Clamp" this state with \p PVS.
4206 PotentialValuesState operator^=(const PotentialValuesState &PVS) {
4207 IsValidState ^= PVS.IsValidState;
4208 unionAssumed(PVS);
4209 return *this;
4210 }
4211
4212 PotentialValuesState operator&=(const PotentialValuesState &PVS) {
4213 IsValidState &= PVS.IsValidState;
4214 unionAssumed(PVS);
4215 return *this;
4216 }
4217
4218private:
4219 /// Check the size of this set, and invalidate when the size is no
4220 /// less than \p MaxPotentialValues threshold.
4221 void checkAndInvalidate() {
4222 if (Set.size() >= MaxPotentialValues)
4223 indicatePessimisticFixpoint();
4224 else
4225 reduceUndefValue();
4226 }
4227
4228 /// If this state contains both undef and not undef, we can reduce
4229 /// undef to the not undef value.
4230 void reduceUndefValue() { UndefIsContained = UndefIsContained & Set.empty(); }
4231
4232 /// Insert an element into this set.
4233 void insert(const MemberTy &C) {
4234 if (!isValidState())
4235 return;
4236 Set.insert(C);
4237 checkAndInvalidate();
4238 }
4239
4240 /// Take union with R.
4241 void unionWith(const PotentialValuesState &R) {
4242 /// If this is a full set, do nothing.
4243 if (!isValidState())
4244 return;
4245 /// If R is full set, change L to a full set.
4246 if (!R.isValidState()) {
4247 indicatePessimisticFixpoint();
4248 return;
4249 }
4250 for (const MemberTy &C : R.Set)
4251 Set.insert(C);
4252 UndefIsContained |= R.undefIsContained();
4253 checkAndInvalidate();
4254 }
4255
4256 /// Take union with an undef value.
4257 void unionWithUndef() {
4258 UndefIsContained = true;
4259 reduceUndefValue();
4260 }
4261
4262 /// Take intersection with R.
4263 void intersectWith(const PotentialValuesState &R) {
4264 /// If R is a full set, do nothing.
4265 if (!R.isValidState())
4266 return;
4267 /// If this is a full set, change this to R.
4268 if (!isValidState()) {
4269 *this = R;
4270 return;
4271 }
4272 SetTy IntersectSet;
4273 for (const MemberTy &C : Set) {
4274 if (R.Set.count(C))
4275 IntersectSet.insert(C);
4276 }
4277 Set = IntersectSet;
4278 UndefIsContained &= R.undefIsContained();
4279 reduceUndefValue();
4280 }
4281
4282 /// A helper state which indicate whether this state is valid or not.
4283 BooleanState IsValidState;
4284
4285 /// Container for potential values
4286 SetTy Set;
4287
4288 /// Flag for undef value
4289 bool UndefIsContained;
4290};
4291
4292using PotentialConstantIntValuesState = PotentialValuesState<APInt>;
4293
4294raw_ostream &operator<<(raw_ostream &OS,
4295 const PotentialConstantIntValuesState &R);
4296
4297/// An abstract interface for potential values analysis.
4298///
4299/// This AA collects potential values for each IR position.
4300/// An assumed set of potential values is initialized with the empty set (the
4301/// best state) and it will grow monotonically as we find more potential values
4302/// for this position.
4303/// The set might be forced to the worst state, that is, to contain every
4304/// possible value for this position in 2 cases.
4305/// 1. We surpassed the \p MaxPotentialValues threshold. This includes the
4306/// case that this position is affected (e.g. because of an operation) by a
4307/// Value that is in the worst state.
4308/// 2. We tried to initialize on a Value that we cannot handle (e.g. an
4309/// operator we do not currently handle).
4310///
4311/// TODO: Support values other than constant integers.
4312struct AAPotentialValues
4313 : public StateWrapper<PotentialConstantIntValuesState, AbstractAttribute> {
4314 using Base = StateWrapper<PotentialConstantIntValuesState, AbstractAttribute>;
4315 AAPotentialValues(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
4316
4317 /// See AbstractAttribute::getState(...).
4318 PotentialConstantIntValuesState &getState() override { return *this; }
4319 const PotentialConstantIntValuesState &getState() const override {
4320 return *this;
4321 }
4322
4323 /// Create an abstract attribute view for the position \p IRP.
4324 static AAPotentialValues &createForPosition(const IRPosition &IRP,
4325 Attributor &A);
4326
4327 /// Return assumed constant for the associated value
4328 Optional<ConstantInt *>
4329 getAssumedConstantInt(Attributor &A,
4330 const Instruction *CtxI = nullptr) const {
4331 if (!isValidState())
4332 return nullptr;
4333 if (getAssumedSet().size() == 1)
4334 return cast<ConstantInt>(ConstantInt::get(getAssociatedValue().getType(),
4335 *(getAssumedSet().begin())));
4336 if (getAssumedSet().size() == 0) {
4337 if (undefIsContained())
4338 return cast<ConstantInt>(
4339 ConstantInt::get(getAssociatedValue().getType(), 0));
4340 return llvm::None;
4341 }
4342
4343 return nullptr;
4344 }
4345
4346 /// See AbstractAttribute::getName()
4347 const std::string getName() const override { return "AAPotentialValues"; }
4348
4349 /// See AbstractAttribute::getIdAddr()
4350 const char *getIdAddr() const override { return &ID; }
4351
4352 /// This function should return true if the type of the \p AA is
4353 /// AAPotentialValues
4354 static bool classof(const AbstractAttribute *AA) {
4355 return (AA->getIdAddr() == &ID);
4356 }
4357
4358 /// Unique ID (due to the unique address)
4359 static const char ID;
4360};
4361
4362/// An abstract interface for all noundef attributes.
4363struct AANoUndef
4364 : public IRAttribute<Attribute::NoUndef,
4365 StateWrapper<BooleanState, AbstractAttribute>> {
4366 AANoUndef(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}
4367
4368 /// Return true if we assume that the underlying value is noundef.
4369 bool isAssumedNoUndef() const { return getAssumed(); }
4370
4371 /// Return true if we know that underlying value is noundef.
4372 bool isKnownNoUndef() const { return getKnown(); }
4373
4374 /// Create an abstract attribute view for the position \p IRP.
4375 static AANoUndef &createForPosition(const IRPosition &IRP, Attributor &A);
4376
4377 /// See AbstractAttribute::getName()
4378 const std::string getName() const override { return "AANoUndef"; }
4379
4380 /// See AbstractAttribute::getIdAddr()
4381 const char *getIdAddr() const override { return &ID; }
4382
4383 /// This function should return true if the type of the \p AA is AANoUndef
4384 static bool classof(const AbstractAttribute *AA) {
4385 return (AA->getIdAddr() == &ID);
4386 }
4387
4388 /// Unique ID (due to the unique address)
4389 static const char ID;
4390};
4391
4392struct AACallGraphNode;
4393struct AACallEdges;
4394
4395/// An Iterator for call edges, creates AACallEdges attributes in a lazy way.
4396/// This iterator becomes invalid if the underlying edge list changes.
4397/// So This shouldn't outlive a iteration of Attributor.
4398class AACallEdgeIterator
4399 : public iterator_adaptor_base<AACallEdgeIterator,
4400 SetVector<Function *>::iterator> {
4401 AACallEdgeIterator(Attributor &A, SetVector<Function *>::iterator Begin)
4402 : iterator_adaptor_base(Begin), A(A) {}
4403
4404public:
4405 AACallGraphNode *operator*() const;
4406
4407private:
4408 Attributor &A;
4409 friend AACallEdges;
4410 friend AttributorCallGraph;
4411};
4412
4413struct AACallGraphNode {
4414 AACallGraphNode(Attributor &A) : A(A) {}
4415 virtual ~AACallGraphNode() {}
4416
4417 virtual AACallEdgeIterator optimisticEdgesBegin() const = 0;
4418 virtual AACallEdgeIterator optimisticEdgesEnd() const = 0;
4419
4420 /// Iterator range for exploring the call graph.
4421 iterator_range<AACallEdgeIterator> optimisticEdgesRange() const {
4422 return iterator_range<AACallEdgeIterator>(optimisticEdgesBegin(),
4423 optimisticEdgesEnd());
4424 }
4425
4426protected:
4427 /// Reference to Attributor needed for GraphTraits implementation.
4428 Attributor &A;
4429};
4430
4431/// An abstract state for querying live call edges.
4432/// This interface uses the Attributor's optimistic liveness
4433/// information to compute the edges that are alive.
4434struct AACallEdges : public StateWrapper<BooleanState, AbstractAttribute>,
4435 AACallGraphNode {
4436 using Base = StateWrapper<BooleanState, AbstractAttribute>;
4437
4438 AACallEdges(const IRPosition &IRP, Attributor &A)
4439 : Base(IRP), AACallGraphNode(A) {}
4440
4441 /// Get the optimistic edges.
4442 virtual const SetVector<Function *> &getOptimisticEdges() const = 0;
4443
4444 /// Is there any call with a unknown callee.
4445 virtual bool hasUnknownCallee() const = 0;
4446
4447 /// Is there any call with a unknown callee, excluding any inline asm.
4448 virtual bool hasNonAsmUnknownCallee() const = 0;
4449
4450 /// Iterator for exploring the call graph.
4451 AACallEdgeIterator optimisticEdgesBegin() const override {
4452 return AACallEdgeIterator(A, getOptimisticEdges().begin());
4453 }
4454
4455 /// Iterator for exploring the call graph.
4456 AACallEdgeIterator optimisticEdgesEnd() const override {
4457 return AACallEdgeIterator(A, getOptimisticEdges().end());
4458 }
4459
4460 /// Create an abstract attribute view for the position \p IRP.
4461 static AACallEdges &createForPosition(const IRPosition &IRP, Attributor &A);
4462
4463 /// See AbstractAttribute::getName()
4464 const std::string getName() const override { return "AACallEdges"; }
4465
4466 /// See AbstractAttribute::getIdAddr()
4467 const char *getIdAddr() const override { return &ID; }
4468
4469 /// This function should return true if the type of the \p AA is AACallEdges.
4470 static bool classof(const AbstractAttribute *AA) {
4471 return (AA->getIdAddr() == &ID);
4472 }
4473
4474 /// Unique ID (due to the unique address)
4475 static const char ID;
4476};
4477
4478// Synthetic root node for the Attributor's internal call graph.
4479struct AttributorCallGraph : public AACallGraphNode {
4480 AttributorCallGraph(Attributor &A) : AACallGraphNode(A) {}
4481 virtual ~AttributorCallGraph() {}
4482
4483 AACallEdgeIterator optimisticEdgesBegin() const override {
4484 return AACallEdgeIterator(A, A.Functions.begin());
4485 }
4486
4487 AACallEdgeIterator optimisticEdgesEnd() const override {
4488 return AACallEdgeIterator(A, A.Functions.end());
4489 }
4490
4491 /// Force populate the entire call graph.
4492 void populateAll() const {
4493 for (const AACallGraphNode *AA : optimisticEdgesRange()) {
4494 // Nothing else to do here.
4495 (void)AA;
4496 }
4497 }
4498
4499 void print();
4500};
4501
4502template <> struct GraphTraits<AACallGraphNode *> {
4503 using NodeRef = AACallGraphNode *;
4504 using ChildIteratorType = AACallEdgeIterator;
4505
4506 static AACallEdgeIterator child_begin(AACallGraphNode *Node) {
4507 return Node->optimisticEdgesBegin();
4508 }
4509
4510 static AACallEdgeIterator child_end(AACallGraphNode *Node) {
4511 return Node->optimisticEdgesEnd();
4512 }
4513};
4514
4515template <>
4516struct GraphTraits<AttributorCallGraph *>
4517 : public GraphTraits<AACallGraphNode *> {
4518 using nodes_iterator = AACallEdgeIterator;
4519
4520 static AACallGraphNode *getEntryNode(AttributorCallGraph *G) {
4521 return static_cast<AACallGraphNode *>(G);
4522 }
4523
4524 static AACallEdgeIterator nodes_begin(const AttributorCallGraph *G) {
4525 return G->optimisticEdgesBegin();
4526 }
4527
4528 static AACallEdgeIterator nodes_end(const AttributorCallGraph *G) {
4529 return G->optimisticEdgesEnd();
4530 }
4531};
4532
4533template <>
4534struct DOTGraphTraits<AttributorCallGraph *> : public DefaultDOTGraphTraits {
4535 DOTGraphTraits(bool Simple = false) : DefaultDOTGraphTraits(Simple) {}
4536
4537 std::string getNodeLabel(const AACallGraphNode *Node,
4538 const AttributorCallGraph *Graph) {
4539 const AACallEdges *AACE = static_cast<const AACallEdges *>(Node);
4540 return AACE->getAssociatedFunction()->getName().str();
4541 }
4542
4543 static bool isNodeHidden(const AACallGraphNode *Node,
4544 const AttributorCallGraph *Graph) {
4545 // Hide the synth root.
4546 return static_cast<const AACallGraphNode *>(Graph) == Node;
4547 }
4548};
4549
4550struct AAExecutionDomain
4551 : public StateWrapper<BooleanState, AbstractAttribute> {
4552 using Base = StateWrapper<BooleanState, AbstractAttribute>;
4553 AAExecutionDomain(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
4554
4555 /// Create an abstract attribute view for the position \p IRP.
4556 static AAExecutionDomain &createForPosition(const IRPosition &IRP,
4557 Attributor &A);
4558
4559 /// See AbstractAttribute::getName().
4560 const std::string getName() const override { return "AAExecutionDomain"; }
4561
4562 /// See AbstractAttribute::getIdAddr().
4563 const char *getIdAddr() const override { return &ID; }
4564
4565 /// Check if an instruction is executed only by the initial thread.
4566 virtual bool isExecutedByInitialThreadOnly(const Instruction &) const = 0;
4567
4568 /// Check if a basic block is executed only by the initial thread.
4569 virtual bool isExecutedByInitialThreadOnly(const BasicBlock &) const = 0;
4570
4571 /// This function should return true if the type of the \p AA is
4572 /// AAExecutionDomain.
4573 static bool classof(const AbstractAttribute *AA) {
4574 return (AA->getIdAddr() == &ID);
4575 }
4576
4577 /// Unique ID (due to the unique address)
4578 static const char ID;
4579};
4580
4581/// An abstract Attribute for computing reachability between functions.
4582struct AAFunctionReachability
4583 : public StateWrapper<BooleanState, AbstractAttribute> {
4584 using Base = StateWrapper<BooleanState, AbstractAttribute>;
4585
4586 AAFunctionReachability(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
4587
4588 /// If the function represented by this possition can reach \p Fn.
4589 virtual bool canReach(Attributor &A, Function *Fn) const = 0;
4590
4591 /// Can \p CB reach \p Fn
4592 virtual bool canReach(Attributor &A, CallBase &CB, Function *Fn) const = 0;
4593
4594 /// Create an abstract attribute view for the position \p IRP.
4595 static AAFunctionReachability &createForPosition(const IRPosition &IRP,
4596 Attributor &A);
4597
4598 /// See AbstractAttribute::getName()
4599 const std::string getName() const override { return "AAFuncitonReacability"; }
4600
4601 /// See AbstractAttribute::getIdAddr()
4602 const char *getIdAddr() const override { return &ID; }
4603
4604 /// This function should return true if the type of the \p AA is AACallEdges.
4605 static bool classof(const AbstractAttribute *AA) {
4606 return (AA->getIdAddr() == &ID);
4607 }
4608
4609 /// Unique ID (due to the unique address)
4610 static const char ID;
4611
4612private:
4613 /// Can this function reach a call with unknown calee.
4614 virtual bool canReachUnknownCallee() const = 0;
4615};
4616
4617/// An abstract interface for struct information.
4618struct AAPointerInfo : public AbstractAttribute {
4619 AAPointerInfo(const IRPosition &IRP) : AbstractAttribute(IRP) {}
4620
4621 enum AccessKind {
4622 AK_READ = 1 << 0,
4623 AK_WRITE = 1 << 1,
4624 AK_READ_WRITE = AK_READ | AK_WRITE,
4625 };
4626
4627 /// An access description.
4628 struct Access {
4629 Access(Instruction *I, Optional<Value *> Content, AccessKind Kind, Type *Ty)
4630 : LocalI(I), RemoteI(I), Content(Content), Kind(Kind), Ty(Ty) {}
4631 Access(Instruction *LocalI, Instruction *RemoteI, Optional<Value *> Content,
4632 AccessKind Kind, Type *Ty)
4633 : LocalI(LocalI), RemoteI(RemoteI), Content(Content), Kind(Kind),
4634 Ty(Ty) {}
4635 Access(const Access &Other)
4636 : LocalI(Other.LocalI), RemoteI(Other.RemoteI), Content(Other.Content),
4637 Kind(Other.Kind), Ty(Other.Ty) {}
4638 Access(const Access &&Other)
4639 : LocalI(Other.LocalI), RemoteI(Other.RemoteI), Content(Other.Content),
4640 Kind(Other.Kind), Ty(Other.Ty) {}
4641
4642 Access &operator=(const Access &Other) {
4643 LocalI = Other.LocalI;
4644 RemoteI = Other.RemoteI;
4645 Content = Other.Content;
4646 Kind = Other.Kind;
4647 Ty = Other.Ty;
4648 return *this;
4649 }
4650 bool operator==(const Access &R) const {
4651 return LocalI == R.LocalI && RemoteI == R.RemoteI &&
4652 Content == R.Content && Kind == R.Kind;
4653 }
4654 bool operator!=(const Access &R) const { return !(*this == R); }
4655
4656 Access &operator&=(const Access &R) {
4657 assert(RemoteI == R.RemoteI && "Expected same instruction!")(static_cast <bool> (RemoteI == R.RemoteI && "Expected same instruction!"
) ? void (0) : __assert_fail ("RemoteI == R.RemoteI && \"Expected same instruction!\""
, "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/include/llvm/Transforms/IPO/Attributor.h"
, 4657, __extension__ __PRETTY_FUNCTION__))
;
4658 Content =
4659 AA::combineOptionalValuesInAAValueLatice(Content, R.Content, Ty);
4660 Kind = AccessKind(Kind | R.Kind);
4661 return *this;
4662 }
4663
4664 /// Return the access kind.
4665 AccessKind getKind() const { return Kind; }
4666
4667 /// Return true if this is a read access.
4668 bool isRead() const { return Kind & AK_READ; }
4669
4670 /// Return true if this is a write access.
4671 bool isWrite() const { return Kind & AK_WRITE; }
4672
4673 /// Return the instruction that causes the access with respect to the local
4674 /// scope of the associated attribute.
4675 Instruction *getLocalInst() const { return LocalI; }
4676
4677 /// Return the actual instruction that causes the access.
4678 Instruction *getRemoteInst() const { return RemoteI; }
4679
4680 /// Return true if the value written is not known yet.
4681 bool isWrittenValueYetUndetermined() const { return !Content.hasValue(); }
4682
4683 /// Return true if the value written cannot be determined at all.
4684 bool isWrittenValueUnknown() const {
4685 return Content.hasValue() && !*Content;
4686 }
4687
4688 /// Return the type associated with the access, if known.
4689 Type *getType() const { return Ty; }
4690
4691 /// Return the value writen, if any. As long as
4692 /// isWrittenValueYetUndetermined return true this function shall not be
4693 /// called.
4694 Value *getWrittenValue() const { return *Content; }
4695
4696 /// Return the written value which can be `llvm::null` if it is not yet
4697 /// determined.
4698 Optional<Value *> getContent() const { return Content; }
4699
4700 private:
4701 /// The instruction responsible for the access with respect to the local
4702 /// scope of the associated attribute.
4703 Instruction *LocalI;
4704
4705 /// The instruction responsible for the access.
4706 Instruction *RemoteI;
4707
4708 /// The value written, if any. `llvm::none` means "not known yet", `nullptr`
4709 /// cannot be determined.
4710 Optional<Value *> Content;
4711
4712 /// The access kind, e.g., READ, as bitset (could be more than one).
4713 AccessKind Kind;
4714
4715 /// The type of the content, thus the type read/written, can be null if not
4716 /// available.
4717 Type *Ty;
4718 };
4719
4720 /// Create an abstract attribute view for the position \p IRP.
4721 static AAPointerInfo &createForPosition(const IRPosition &IRP, Attributor &A);
4722
4723 /// See AbstractAttribute::getName()
4724 const std::string getName() const override { return "AAPointerInfo"; }
4725
4726 /// See AbstractAttribute::getIdAddr()
4727 const char *getIdAddr() const override { return &ID; }
4728
4729 /// Call \p CB on all accesses that might interfere with \p LI and return true
4730 /// if all such accesses were known and the callback returned true for all of
4731 /// them, false otherwise.
4732 virtual bool forallInterferingAccesses(
4733 LoadInst &LI, function_ref<bool(const Access &, bool)> CB) const = 0;
4734 virtual bool forallInterferingAccesses(
4735 StoreInst &SI, function_ref<bool(const Access &, bool)> CB) const = 0;
4736
4737 /// This function should return true if the type of the \p AA is AAPointerInfo
4738 static bool classof(const AbstractAttribute *AA) {
4739 return (AA->getIdAddr() == &ID);
4740 }
4741
4742 /// Unique ID (due to the unique address)
4743 static const char ID;
4744};
4745
4746/// An abstract attribute for getting assumption information.
4747struct AAAssumptionInfo
4748 : public StateWrapper<SetState<StringRef>, AbstractAttribute,
4749 DenseSet<StringRef>> {
4750 using Base =
4751 StateWrapper<SetState<StringRef>, AbstractAttribute, DenseSet<StringRef>>;
4752
4753 AAAssumptionInfo(const IRPosition &IRP, Attributor &A,
4754 const DenseSet<StringRef> &Known)
4755 : Base(IRP, Known) {}
4756
4757 /// Returns true if the assumption set contains the assumption \p Assumption.
4758 virtual bool hasAssumption(const StringRef Assumption) const = 0;
4759
4760 /// Create an abstract attribute view for the position \p IRP.
4761 static AAAssumptionInfo &createForPosition(const IRPosition &IRP,
4762 Attributor &A);
4763
4764 /// See AbstractAttribute::getName()
4765 const std::string getName() const override { return "AAAssumptionInfo"; }
4766
4767 /// See AbstractAttribute::getIdAddr()
4768 const char *getIdAddr() const override { return &ID; }
4769
4770 /// This function should return true if the type of the \p AA is
4771 /// AAAssumptionInfo
4772 static bool classof(const AbstractAttribute *AA) {
4773 return (AA->getIdAddr() == &ID);
4774 }
4775
4776 /// Unique ID (due to the unique address)
4777 static const char ID;
4778};
4779
4780raw_ostream &operator<<(raw_ostream &, const AAPointerInfo::Access &);
4781
4782/// Run options, used by the pass manager.
4783enum AttributorRunOption {
4784 NONE = 0,
4785 MODULE = 1 << 0,
4786 CGSCC = 1 << 1,
4787 ALL = MODULE | CGSCC
4788};
4789
4790} // end namespace llvm
4791
4792#endif // LLVM_TRANSFORMS_IPO_ATTRIBUTOR_H