LLVM 17.0.0git
AMDGPUPropagateAttributes.cpp
Go to the documentation of this file.
1//===--- AMDGPUPropagateAttributes.cpp --------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// \brief This pass propagates attributes from kernels to the non-entry
11/// functions. Most of the library functions were not compiled for specific ABI,
12/// yet will be correctly compiled if proper attributes are propagated from the
13/// caller.
14///
15/// The pass analyzes call graph and propagates ABI target features through the
16/// call graph.
17///
18/// It can run in two modes: as a function or module pass. A function pass
19/// simply propagates attributes. A module pass clones functions if there are
20/// callers with different ABI. If a function is cloned all call sites will
21/// be updated to use a correct clone.
22///
23/// A function pass is limited in functionality but can run early in the
24/// pipeline. A module pass is more powerful but has to run late, so misses
25/// library folding opportunities.
26//
27//===----------------------------------------------------------------------===//
28
29#include "AMDGPU.h"
32#include "llvm/ADT/SmallSet.h"
35#include "llvm/IR/InstrTypes.h"
38
39#define DEBUG_TYPE "amdgpu-propagate-attributes"
40
41using namespace llvm;
42
43namespace llvm {
44extern const SubtargetFeatureKV AMDGPUFeatureKV[AMDGPU::NumSubtargetFeatures-1];
45}
46
47namespace {
48
49// Target features to propagate.
50static constexpr const FeatureBitset TargetFeatures = {
51 AMDGPU::FeatureWavefrontSize16,
52 AMDGPU::FeatureWavefrontSize32,
53 AMDGPU::FeatureWavefrontSize64
54};
55
56// Attributes to propagate.
57// TODO: Support conservative min/max merging instead of cloning.
58static constexpr const char *AttributeNames[] = {"amdgpu-waves-per-eu"};
59
60static constexpr unsigned NumAttr = std::size(AttributeNames);
61
62class AMDGPUPropagateAttributes {
63
64 class FnProperties {
65 private:
66 explicit FnProperties(const FeatureBitset &&FB) : Features(FB) {}
67
68 public:
69 explicit FnProperties(const TargetMachine &TM, const Function &F) {
70 Features = TM.getSubtargetImpl(F)->getFeatureBits();
71
72 for (unsigned I = 0; I < NumAttr; ++I)
73 if (F.hasFnAttribute(AttributeNames[I]))
74 Attributes[I] = F.getFnAttribute(AttributeNames[I]);
75 }
76
77 bool operator == (const FnProperties &Other) const {
78 if ((Features & TargetFeatures) != (Other.Features & TargetFeatures))
79 return false;
80 for (unsigned I = 0; I < NumAttr; ++I)
81 if (Attributes[I] != Other.Attributes[I])
82 return false;
83 return true;
84 }
85
86 FnProperties adjustToCaller(const FnProperties &CallerProps) const {
87 FnProperties New((Features & ~TargetFeatures) | CallerProps.Features);
88 for (unsigned I = 0; I < NumAttr; ++I)
89 New.Attributes[I] = CallerProps.Attributes[I];
90 return New;
91 }
92
93 FeatureBitset Features;
94 std::optional<Attribute> Attributes[NumAttr];
95 };
96
97 class Clone {
98 public:
99 Clone(const FnProperties &Props, Function *OrigF, Function *NewF) :
100 Properties(Props), OrigF(OrigF), NewF(NewF) {}
101
102 FnProperties Properties;
103 Function *OrigF;
104 Function *NewF;
105 };
106
107 const TargetMachine *TM;
108
109 // Clone functions as needed or just set attributes.
110 bool AllowClone;
111
112 // Option propagation roots.
114
115 // Clones of functions with their attributes.
117
118 // Find a clone with required features.
119 Function *findFunction(const FnProperties &PropsNeeded,
120 Function *OrigF);
121
122 // Clone function \p F and set \p NewProps on the clone.
123 // Cole takes the name of original function.
124 Function *cloneWithProperties(Function &F, const FnProperties &NewProps);
125
126 // Set new function's features in place.
127 void setFeatures(Function &F, const FeatureBitset &NewFeatures);
128
129 // Set new function's attributes in place.
130 void setAttributes(Function &F,
131 const ArrayRef<std::optional<Attribute>> NewAttrs);
132
133 std::string getFeatureString(const FeatureBitset &Features) const;
134
135 // Propagate attributes from Roots.
136 bool process();
137
138public:
139 AMDGPUPropagateAttributes(const TargetMachine *TM, bool AllowClone) :
140 TM(TM), AllowClone(AllowClone) {}
141
142 // Use F as a root and propagate its attributes.
143 bool process(Function &F);
144
145 // Propagate attributes starting from kernel functions.
146 bool process(Module &M);
147};
148
149// Allows to propagate attributes early, but no cloning is allowed as it must
150// be a function pass to run before any optimizations.
151// TODO: We shall only need a one instance of module pass, but that needs to be
152// in the linker pipeline which is currently not possible.
153class AMDGPUPropagateAttributesEarly : public FunctionPass {
154 const TargetMachine *TM;
155
156public:
157 static char ID; // Pass identification
158
159 AMDGPUPropagateAttributesEarly(const TargetMachine *TM = nullptr) :
160 FunctionPass(ID), TM(TM) {
163 }
164
165 bool runOnFunction(Function &F) override;
166};
167
168// Allows to propagate attributes with cloning but does that late in the
169// pipeline.
170class AMDGPUPropagateAttributesLate : public ModulePass {
171 const TargetMachine *TM;
172
173public:
174 static char ID; // Pass identification
175
176 AMDGPUPropagateAttributesLate(const TargetMachine *TM = nullptr) :
177 ModulePass(ID), TM(TM) {
180 }
181
182 bool runOnModule(Module &M) override;
183};
184
185} // end anonymous namespace.
186
187char AMDGPUPropagateAttributesEarly::ID = 0;
188char AMDGPUPropagateAttributesLate::ID = 0;
189
190INITIALIZE_PASS(AMDGPUPropagateAttributesEarly,
191 "amdgpu-propagate-attributes-early",
192 "Early propagate attributes from kernels to functions",
193 false, false)
194INITIALIZE_PASS(AMDGPUPropagateAttributesLate,
195 "amdgpu-propagate-attributes-late",
196 "Late propagate attributes from kernels to functions",
198
199Function *
200AMDGPUPropagateAttributes::findFunction(const FnProperties &PropsNeeded,
201 Function *OrigF) {
202 // TODO: search for clone's clones.
203 for (Clone &C : Clones)
204 if (C.OrigF == OrigF && PropsNeeded == C.Properties)
205 return C.NewF;
206
207 return nullptr;
208}
209
210bool AMDGPUPropagateAttributes::process(Module &M) {
211 for (auto &F : M.functions())
212 if (AMDGPU::isKernel(F.getCallingConv()))
213 Roots.insert(&F);
214
215 return Roots.empty() ? false : process();
216}
217
218bool AMDGPUPropagateAttributes::process(Function &F) {
219 Roots.insert(&F);
220 return process();
221}
222
223bool AMDGPUPropagateAttributes::process() {
224 bool Changed = false;
227
228 assert(!Roots.empty());
229 Module &M = *(*Roots.begin())->getParent();
230
231 do {
232 Roots.insert(NewRoots.begin(), NewRoots.end());
233 NewRoots.clear();
234
235 for (auto &F : M.functions()) {
236 if (F.isDeclaration())
237 continue;
238
239 const FnProperties CalleeProps(*TM, F);
242
243 for (User *U : F.users()) {
244 Instruction *I = dyn_cast<Instruction>(U);
245 if (!I)
246 continue;
247 CallBase *CI = dyn_cast<CallBase>(I);
248 // Only propagate attributes if F is the called function. Specifically,
249 // do not propagate attributes if F is passed as an argument.
250 // FIXME: handle bitcasted callee, e.g.
251 // %retval = call i8* bitcast (i32* ()* @f to i8* ()*)()
252 if (!CI || CI->getCalledOperand() != &F)
253 continue;
254 Function *Caller = CI->getCaller();
255 if (!Caller || !Visited.insert(CI).second)
256 continue;
257 if (!Roots.count(Caller) && !NewRoots.count(Caller))
258 continue;
259
260 const FnProperties CallerProps(*TM, *Caller);
261
262 if (CalleeProps == CallerProps) {
263 if (!Roots.count(&F))
264 NewRoots.insert(&F);
265 continue;
266 }
267
268 Function *NewF = findFunction(CallerProps, &F);
269 if (!NewF) {
270 const FnProperties NewProps = CalleeProps.adjustToCaller(CallerProps);
271 if (!AllowClone) {
272 // This may set different features on different iterations if
273 // there is a contradiction in callers' attributes. In this case
274 // we rely on a second pass running on Module, which is allowed
275 // to clone.
276 setFeatures(F, NewProps.Features);
277 setAttributes(F, NewProps.Attributes);
278 NewRoots.insert(&F);
279 Changed = true;
280 break;
281 }
282
283 NewF = cloneWithProperties(F, NewProps);
284 Clones.push_back(Clone(CallerProps, &F, NewF));
285 NewRoots.insert(NewF);
286 }
287
288 ToReplace.push_back(std::pair(CI, NewF));
289 Replaced.insert(&F);
290
291 Changed = true;
292 }
293
294 while (!ToReplace.empty()) {
295 auto R = ToReplace.pop_back_val();
296 R.first->setCalledFunction(R.second);
297 }
298 }
299 } while (!NewRoots.empty());
300
301 for (Function *F : Replaced) {
302 if (F->use_empty())
303 F->eraseFromParent();
304 }
305
306 Roots.clear();
307 Clones.clear();
308
309 return Changed;
310}
311
312Function *
313AMDGPUPropagateAttributes::cloneWithProperties(Function &F,
314 const FnProperties &NewProps) {
315 LLVM_DEBUG(dbgs() << "Cloning " << F.getName() << '\n');
316
317 ValueToValueMapTy dummy;
318 Function *NewF = CloneFunction(&F, dummy);
319 setFeatures(*NewF, NewProps.Features);
320 setAttributes(*NewF, NewProps.Attributes);
323
324 // Swap names. If that is the only clone it will retain the name of now
325 // dead value. Preserve original name for externally visible functions.
326 if (F.hasName() && F.hasLocalLinkage()) {
327 std::string NewName = std::string(NewF->getName());
328 NewF->takeName(&F);
329 F.setName(NewName);
330 }
331
332 return NewF;
333}
334
335void AMDGPUPropagateAttributes::setFeatures(Function &F,
336 const FeatureBitset &NewFeatures) {
337 std::string NewFeatureStr = getFeatureString(NewFeatures);
338
339 LLVM_DEBUG(dbgs() << "Set features "
340 << getFeatureString(NewFeatures & TargetFeatures)
341 << " on " << F.getName() << '\n');
342
343 F.removeFnAttr("target-features");
344 F.addFnAttr("target-features", NewFeatureStr);
345}
346
347void AMDGPUPropagateAttributes::setAttributes(
348 Function &F, const ArrayRef<std::optional<Attribute>> NewAttrs) {
349 LLVM_DEBUG(dbgs() << "Set attributes on " << F.getName() << ":\n");
350 for (unsigned I = 0; I < NumAttr; ++I) {
351 F.removeFnAttr(AttributeNames[I]);
352 if (NewAttrs[I]) {
353 LLVM_DEBUG(dbgs() << '\t' << NewAttrs[I]->getAsString() << '\n');
354 F.addFnAttr(*NewAttrs[I]);
355 }
356 }
357}
358
359std::string
360AMDGPUPropagateAttributes::getFeatureString(const FeatureBitset &Features) const
361{
362 std::string Ret;
363 for (const SubtargetFeatureKV &KV : AMDGPUFeatureKV) {
364 if (Features[KV.Value])
365 Ret += (StringRef("+") + KV.Key + ",").str();
366 else if (TargetFeatures[KV.Value])
367 Ret += (StringRef("-") + KV.Key + ",").str();
368 }
369 Ret.pop_back(); // Remove last comma.
370 return Ret;
371}
372
373bool AMDGPUPropagateAttributesEarly::runOnFunction(Function &F) {
374 if (!TM) {
375 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
376 if (!TPC)
377 return false;
378
379 TM = &TPC->getTM<TargetMachine>();
380 }
381
382 if (!AMDGPU::isKernel(F.getCallingConv()))
383 return false;
384
385 return AMDGPUPropagateAttributes(TM, false).process(F);
386}
387
388bool AMDGPUPropagateAttributesLate::runOnModule(Module &M) {
389 if (!TM) {
390 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
391 if (!TPC)
392 return false;
393
394 TM = &TPC->getTM<TargetMachine>();
395 }
396
397 return AMDGPUPropagateAttributes(TM, true).process(M);
398}
399
402 return new AMDGPUPropagateAttributesEarly(TM);
403}
404
407 return new AMDGPUPropagateAttributesLate(TM);
408}
409
413 if (!AMDGPU::isEntryFunctionCC(F.getCallingConv()))
414 return PreservedAnalyses::all();
415
416 return AMDGPUPropagateAttributes(&TM, false).process(F)
419}
420
423 return AMDGPUPropagateAttributes(&TM, true).process(M)
426}
aarch64 promote const
AMDGPU Kernel Attributes
Provides AMDGPU specific target descriptions.
amdgpu propagate attributes Late propagate attributes from kernels to false
amdgpu propagate attributes late
amdgpu propagate attributes Late propagate attributes from kernels to functions
static const Function * getParent(const Value *V)
#define LLVM_DEBUG(X)
Definition: Debug.h:101
std::optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1269
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallSet class.
Target-Independent Code Generator Pass Configuration Options pass.
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:620
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1190
Value * getCalledOperand() const
Definition: InstrTypes.h:1405
Function * getCaller()
Helper to get the caller (the parent function).
Container class for subtarget features.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
Function::iterator insert(Function::iterator Position, BasicBlock *BB)
Insert BB in the basic block list at Position.
Definition: Function.h:696
void setLinkage(LinkageTypes LT)
Definition: GlobalValue.h:532
@ DefaultVisibility
The GV is visible.
Definition: GlobalValue.h:63
void setVisibility(VisibilityTypes V)
Definition: GlobalValue.h:250
@ InternalLinkage
Rename collisions when linking (static functions).
Definition: GlobalValue.h:55
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:251
virtual bool runOnModule(Module &M)=0
runOnModule - Virtual method overriden by subclasses to process the module being operated on.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: PassManager.h:155
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
const_iterator begin() const
Definition: SmallSet.h:223
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:166
bool empty() const
Definition: SmallSet.h:159
void clear()
Definition: SmallSet.h:218
const_iterator end() const
Definition: SmallSet.h:229
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:78
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:384
TargetPassConfig.
bool isEntryFunctionCC(CallingConv::ID CC)
LLVM_READNONE bool isKernel(CallingConv::ID CC)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
const SubtargetFeatureKV AMDGPUFeatureKV[AMDGPU::NumSubtargetFeatures-1]
void initializeAMDGPUPropagateAttributesLatePass(PassRegistry &)
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
FunctionPass * createAMDGPUPropagateAttributesEarlyPass(const TargetMachine *)
void initializeAMDGPUPropagateAttributesEarlyPass(PassRegistry &)
ModulePass * createAMDGPUPropagateAttributesLatePass(const TargetMachine *)
Function * CloneFunction(Function *F, ValueToValueMapTy &VMap, ClonedCodeInfo *CodeInfo=nullptr)
Return a copy of the specified function and add it to that function's module.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
Used to provide key value pairs for feature and CPU bit flags.