LLVM  12.0.0git
AMDGPUPropagateAttributes.cpp
Go to the documentation of this file.
1 //===--- AMDGPUPropagateAttributes.cpp --------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// \brief This pass propagates attributes from kernels to the non-entry
11 /// functions. Most of the library functions were not compiled for specific ABI,
12 /// yet will be correctly compiled if proper attrbutes are propagated from the
13 /// caller.
14 ///
15 /// The pass analyzes call graph and propagates ABI target features through the
16 /// call graph.
17 ///
18 /// It can run in two modes: as a function or module pass. A function pass
19 /// simply propagates attributes. A module pass clones functions if there are
20 /// callers with different ABI. If a function is clonned all call sites will
21 /// be updated to use a correct clone.
22 ///
23 /// A function pass is limited in functionality but can run early in the
24 /// pipeline. A module pass is more powerful but has to run late, so misses
25 /// library folding opportunities.
26 //
27 //===----------------------------------------------------------------------===//
28 
29 #include "AMDGPU.h"
30 #include "AMDGPUSubtarget.h"
32 #include "Utils/AMDGPUBaseInfo.h"
33 #include "llvm/ADT/SmallSet.h"
34 #include "llvm/ADT/SmallVector.h"
35 #include "llvm/IR/Function.h"
36 #include "llvm/IR/Module.h"
39 #include <string>
40 
41 #define DEBUG_TYPE "amdgpu-propagate-attributes"
42 
43 using namespace llvm;
44 
45 namespace llvm {
46 extern const SubtargetFeatureKV AMDGPUFeatureKV[AMDGPU::NumSubtargetFeatures-1];
47 }
48 
49 namespace {
50 
51 // Target features to propagate.
52 static constexpr const FeatureBitset TargetFeatures = {
53  AMDGPU::FeatureWavefrontSize16,
54  AMDGPU::FeatureWavefrontSize32,
55  AMDGPU::FeatureWavefrontSize64
56 };
57 
58 // Attributes to propagate.
59 static constexpr const char* AttributeNames[] = {
60  "amdgpu-waves-per-eu"
61 };
62 
63 static constexpr unsigned NumAttr =
64  sizeof(AttributeNames) / sizeof(AttributeNames[0]);
65 
66 class AMDGPUPropagateAttributes {
67 
68  class FnProperties {
69  private:
70  explicit FnProperties(const FeatureBitset &&FB) : Features(FB) {}
71 
72  public:
73  explicit FnProperties(const TargetMachine &TM, const Function &F) {
74  Features = TM.getSubtargetImpl(F)->getFeatureBits();
75 
76  for (unsigned I = 0; I < NumAttr; ++I)
77  if (F.hasFnAttribute(AttributeNames[I]))
78  Attributes[I] = F.getFnAttribute(AttributeNames[I]);
79  }
80 
81  bool operator == (const FnProperties &Other) const {
82  if ((Features & TargetFeatures) != (Other.Features & TargetFeatures))
83  return false;
84  for (unsigned I = 0; I < NumAttr; ++I)
85  if (Attributes[I] != Other.Attributes[I])
86  return false;
87  return true;
88  }
89 
90  FnProperties adjustToCaller(const FnProperties &CallerProps) const {
91  FnProperties New((Features & ~TargetFeatures) | CallerProps.Features);
92  for (unsigned I = 0; I < NumAttr; ++I)
93  New.Attributes[I] = CallerProps.Attributes[I];
94  return New;
95  }
96 
97  FeatureBitset Features;
98  Optional<Attribute> Attributes[NumAttr];
99  };
100 
101  class Clone {
102  public:
103  Clone(const FnProperties &Props, Function *OrigF, Function *NewF) :
104  Properties(Props), OrigF(OrigF), NewF(NewF) {}
105 
106  FnProperties Properties;
107  Function *OrigF;
108  Function *NewF;
109  };
110 
111  const TargetMachine *TM;
112 
113  // Clone functions as needed or just set attributes.
114  bool AllowClone;
115 
116  // Option propagation roots.
118 
119  // Clones of functions with their attributes.
120  SmallVector<Clone, 32> Clones;
121 
122  // Find a clone with required features.
123  Function *findFunction(const FnProperties &PropsNeeded,
124  Function *OrigF);
125 
126  // Clone function \p F and set \p NewProps on the clone.
127  // Cole takes the name of original function.
128  Function *cloneWithProperties(Function &F, const FnProperties &NewProps);
129 
130  // Set new function's features in place.
131  void setFeatures(Function &F, const FeatureBitset &NewFeatures);
132 
133  // Set new function's attributes in place.
134  void setAttributes(Function &F, const ArrayRef<Optional<Attribute>> NewAttrs);
135 
136  std::string getFeatureString(const FeatureBitset &Features) const;
137 
138  // Propagate attributes from Roots.
139  bool process();
140 
141 public:
142  AMDGPUPropagateAttributes(const TargetMachine *TM, bool AllowClone) :
143  TM(TM), AllowClone(AllowClone) {}
144 
145  // Use F as a root and propagate its attributes.
146  bool process(Function &F);
147 
148  // Propagate attributes starting from kernel functions.
149  bool process(Module &M);
150 };
151 
152 // Allows to propagate attributes early, but no clonning is allowed as it must
153 // be a function pass to run before any optimizations.
154 // TODO: We shall only need a one instance of module pass, but that needs to be
155 // in the linker pipeline which is currently not possible.
156 class AMDGPUPropagateAttributesEarly : public FunctionPass {
157  const TargetMachine *TM;
158 
159 public:
160  static char ID; // Pass identification
161 
162  AMDGPUPropagateAttributesEarly(const TargetMachine *TM = nullptr) :
163  FunctionPass(ID), TM(TM) {
166  }
167 
168  bool runOnFunction(Function &F) override;
169 };
170 
171 // Allows to propagate attributes with clonning but does that late in the
172 // pipeline.
173 class AMDGPUPropagateAttributesLate : public ModulePass {
174  const TargetMachine *TM;
175 
176 public:
177  static char ID; // Pass identification
178 
179  AMDGPUPropagateAttributesLate(const TargetMachine *TM = nullptr) :
180  ModulePass(ID), TM(TM) {
183  }
184 
185  bool runOnModule(Module &M) override;
186 };
187 
188 } // end anonymous namespace.
189 
192 
193 INITIALIZE_PASS(AMDGPUPropagateAttributesEarly,
194  "amdgpu-propagate-attributes-early",
195  "Early propagate attributes from kernels to functions",
196  false, false)
197 INITIALIZE_PASS(AMDGPUPropagateAttributesLate,
198  "amdgpu-propagate-attributes-late",
199  "Late propagate attributes from kernels to functions",
200  false, false)
201 
202 Function *
203 AMDGPUPropagateAttributes::findFunction(const FnProperties &PropsNeeded,
204  Function *OrigF) {
205  // TODO: search for clone's clones.
206  for (Clone &C : Clones)
207  if (C.OrigF == OrigF && PropsNeeded == C.Properties)
208  return C.NewF;
209 
210  return nullptr;
211 }
212 
213 bool AMDGPUPropagateAttributes::process(Module &M) {
214  for (auto &F : M.functions())
215  if (AMDGPU::isEntryFunctionCC(F.getCallingConv()))
216  Roots.insert(&F);
217 
218  return process();
219 }
220 
221 bool AMDGPUPropagateAttributes::process(Function &F) {
222  Roots.insert(&F);
223  return process();
224 }
225 
226 bool AMDGPUPropagateAttributes::process() {
227  bool Changed = false;
228  SmallSet<Function *, 32> NewRoots;
229  SmallSet<Function *, 32> Replaced;
230 
231  if (Roots.empty())
232  return false;
233  Module &M = *(*Roots.begin())->getParent();
234 
235  do {
236  Roots.insert(NewRoots.begin(), NewRoots.end());
237  NewRoots.clear();
238 
239  for (auto &F : M.functions()) {
240  if (F.isDeclaration())
241  continue;
242 
243  const FnProperties CalleeProps(*TM, F);
245  SmallSet<CallBase *, 32> Visited;
246 
247  for (User *U : F.users()) {
249  if (!I)
250  continue;
251  CallBase *CI = dyn_cast<CallBase>(I);
252  if (!CI)
253  continue;
254  Function *Caller = CI->getCaller();
255  if (!Caller || !Visited.insert(CI).second)
256  continue;
257  if (!Roots.count(Caller) && !NewRoots.count(Caller))
258  continue;
259 
260  const FnProperties CallerProps(*TM, *Caller);
261 
262  if (CalleeProps == CallerProps) {
263  if (!Roots.count(&F))
264  NewRoots.insert(&F);
265  continue;
266  }
267 
268  Function *NewF = findFunction(CallerProps, &F);
269  if (!NewF) {
270  const FnProperties NewProps = CalleeProps.adjustToCaller(CallerProps);
271  if (!AllowClone) {
272  // This may set different features on different iteartions if
273  // there is a contradiction in callers' attributes. In this case
274  // we rely on a second pass running on Module, which is allowed
275  // to clone.
276  setFeatures(F, NewProps.Features);
277  setAttributes(F, NewProps.Attributes);
278  NewRoots.insert(&F);
279  Changed = true;
280  break;
281  }
282 
283  NewF = cloneWithProperties(F, NewProps);
284  Clones.push_back(Clone(CallerProps, &F, NewF));
285  NewRoots.insert(NewF);
286  }
287 
288  ToReplace.push_back(std::make_pair(CI, NewF));
289  Replaced.insert(&F);
290 
291  Changed = true;
292  }
293 
294  while (!ToReplace.empty()) {
295  auto R = ToReplace.pop_back_val();
296  R.first->setCalledFunction(R.second);
297  }
298  }
299  } while (!NewRoots.empty());
300 
301  for (Function *F : Replaced) {
302  if (F->use_empty())
303  F->eraseFromParent();
304  }
305 
306  Roots.clear();
307  Clones.clear();
308 
309  return Changed;
310 }
311 
312 Function *
313 AMDGPUPropagateAttributes::cloneWithProperties(Function &F,
314  const FnProperties &NewProps) {
315  LLVM_DEBUG(dbgs() << "Cloning " << F.getName() << '\n');
316 
317  ValueToValueMapTy dummy;
318  Function *NewF = CloneFunction(&F, dummy);
319  setFeatures(*NewF, NewProps.Features);
320  setAttributes(*NewF, NewProps.Attributes);
323 
324  // Swap names. If that is the only clone it will retain the name of now
325  // dead value. Preserve original name for externally visible functions.
326  if (F.hasName() && F.hasLocalLinkage()) {
327  std::string NewName = std::string(NewF->getName());
328  NewF->takeName(&F);
329  F.setName(NewName);
330  }
331 
332  return NewF;
333 }
334 
335 void AMDGPUPropagateAttributes::setFeatures(Function &F,
336  const FeatureBitset &NewFeatures) {
337  std::string NewFeatureStr = getFeatureString(NewFeatures);
338 
339  LLVM_DEBUG(dbgs() << "Set features "
340  << getFeatureString(NewFeatures & TargetFeatures)
341  << " on " << F.getName() << '\n');
342 
343  F.removeFnAttr("target-features");
344  F.addFnAttr("target-features", NewFeatureStr);
345 }
346 
347 void AMDGPUPropagateAttributes::setAttributes(Function &F,
348  const ArrayRef<Optional<Attribute>> NewAttrs) {
349  LLVM_DEBUG(dbgs() << "Set attributes on " << F.getName() << ":\n");
350  for (unsigned I = 0; I < NumAttr; ++I) {
351  F.removeFnAttr(AttributeNames[I]);
352  if (NewAttrs[I]) {
353  LLVM_DEBUG(dbgs() << '\t' << NewAttrs[I]->getAsString() << '\n');
354  F.addFnAttr(*NewAttrs[I]);
355  }
356  }
357 }
358 
359 std::string
360 AMDGPUPropagateAttributes::getFeatureString(const FeatureBitset &Features) const
361 {
362  std::string Ret;
363  for (const SubtargetFeatureKV &KV : AMDGPUFeatureKV) {
364  if (Features[KV.Value])
365  Ret += (StringRef("+") + KV.Key + ",").str();
366  else if (TargetFeatures[KV.Value])
367  Ret += (StringRef("-") + KV.Key + ",").str();
368  }
369  Ret.pop_back(); // Remove last comma.
370  return Ret;
371 }
372 
375  return false;
376 
377  return AMDGPUPropagateAttributes(TM, false).process(F);
378 }
379 
380 bool AMDGPUPropagateAttributesLate::runOnModule(Module &M) {
381  if (!TM)
382  return false;
383 
384  return AMDGPUPropagateAttributes(TM, true).process(M);
385 }
386 
389  return new AMDGPUPropagateAttributesEarly(TM);
390 }
391 
394  return new AMDGPUPropagateAttributesLate(TM);
395 }
void setVisibility(VisibilityTypes V)
Definition: GlobalValue.h:235
FunctionPass * createAMDGPUPropagateAttributesEarlyPass(const TargetMachine *)
uint64_t CallInst * C
LLVM_NODISCARD std::enable_if_t< !is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type > dyn_cast(const Y &Val)
Definition: Casting.h:334
bool hasLocalLinkage() const
Definition: GlobalValue.h:445
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
AMDGPU specific subclass of TargetSubtarget.
const SubtargetFeatureKV AMDGPUFeatureKV[AMDGPU::NumSubtargetFeatures-1]
This class represents lattice values for constants.
Definition: AllocatorList.h:23
INITIALIZE_PASS(AMDGPUPropagateAttributesEarly, "amdgpu-propagate-attributes-early", "Early propagate attributes from kernels to functions", false, false) INITIALIZE_PASS(AMDGPUPropagateAttributesLate
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:67
Function * getCaller()
Helper to get the caller (the parent function).
Function * CloneFunction(Function *F, ValueToValueMapTy &VMap, ClonedCodeInfo *CodeInfo=nullptr)
Return a copy of the specified function and add it to that function&#39;s module.
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:330
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1100
F(f)
LLVM_NODISCARD bool empty() const
Definition: SmallSet.h:155
const_iterator begin() const
Definition: SmallSet.h:223
const FeatureBitset & getFeatureBits() const
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:342
void clear()
Definition: SmallSet.h:218
Used to provide key value pairs for feature and CPU bit flags.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:348
const_iterator end() const
Definition: SmallSet.h:229
static bool runOnFunction(Function &F, bool PostInlining)
iterator_range< iterator > functions()
Definition: Module.h:621
Container class for subtarget features.
bool hasName() const
Definition: Value.h:250
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:134
bool isEntryFunctionCC(CallingConv::ID CC)
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn&#39;t already there.
Definition: SmallSet.h:180
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:219
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:883
Module.h This file contains the declarations for the Module class.
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:420
void setLinkage(LinkageTypes LT)
Definition: GlobalValue.h:454
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target&#39;s TargetSubtargetInf...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
iterator_range< user_iterator > users()
Definition: Value.h:418
Provides AMDGPU specific target descriptions.
void initializeAMDGPUPropagateAttributesLatePass(PassRegistry &)
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:270
#define I(x, y, z)
Definition: MD5.cpp:59
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:224
Rename collisions when linking (static functions).
Definition: GlobalValue.h:55
void eraseFromParent()
eraseFromParent - This method unlinks &#39;this&#39; from the containing module and deletes it...
Definition: Function.cpp:273
ModulePass * createAMDGPUPropagateAttributesLatePass(const TargetMachine *)
void removeFnAttr(Attribute::AttrKind Kind)
Remove function attributes from this function.
Definition: Function.h:252
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:227
aarch64 promote const
void initializeAMDGPUPropagateAttributesEarlyPass(PassRegistry &)
static const Function * getParent(const Value *V)
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.h:340
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:65
void addFnAttr(Attribute::AttrKind Kind)
Add function attributes to this function.
Definition: Function.h:236
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
bool operator==(uint64_t V1, const APInt &V2)
Definition: APInt.h:2027
#define LLVM_DEBUG(X)
Definition: Debug.h:122
bool use_empty() const
Definition: Value.h:341
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:164