LLVM  10.0.0svn
AMDGPUPropagateAttributes.cpp
Go to the documentation of this file.
1 //===--- AMDGPUPropagateAttributes.cpp --------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// \brief This pass propagates attributes from kernels to the non-entry
11 /// functions. Most of the library functions were not compiled for specific ABI,
12 /// yet will be correctly compiled if proper attrbutes are propagated from the
13 /// caller.
14 ///
15 /// The pass analyzes call graph and propagates ABI target features through the
16 /// call graph.
17 ///
18 /// It can run in two modes: as a function or module pass. A function pass
19 /// simply propagates attributes. A module pass clones functions if there are
20 /// callers with different ABI. If a function is clonned all call sites will
21 /// be updated to use a correct clone.
22 ///
23 /// A function pass is limited in functionality but can run early in the
24 /// pipeline. A module pass is more powerful but has to run late, so misses
25 /// library folding opportunities.
26 //
27 //===----------------------------------------------------------------------===//
28 
29 #include "AMDGPU.h"
30 #include "AMDGPUSubtarget.h"
32 #include "Utils/AMDGPUBaseInfo.h"
33 #include "llvm/ADT/SmallSet.h"
34 #include "llvm/ADT/SmallVector.h"
35 #include "llvm/IR/Function.h"
36 #include "llvm/IR/Module.h"
39 #include <string>
40 
41 #define DEBUG_TYPE "amdgpu-propagate-attributes"
42 
43 using namespace llvm;
44 
45 namespace llvm {
46 extern const SubtargetFeatureKV AMDGPUFeatureKV[AMDGPU::NumSubtargetFeatures-1];
47 }
48 
49 namespace {
50 
51 class AMDGPUPropagateAttributes {
52  const FeatureBitset TargetFeatures = {
53  AMDGPU::FeatureWavefrontSize16,
54  AMDGPU::FeatureWavefrontSize32,
55  AMDGPU::FeatureWavefrontSize64
56  };
57 
58  class Clone{
59  public:
60  Clone(FeatureBitset FeatureMask, Function *OrigF, Function *NewF) :
61  FeatureMask(FeatureMask), OrigF(OrigF), NewF(NewF) {}
62 
63  FeatureBitset FeatureMask;
64  Function *OrigF;
65  Function *NewF;
66  };
67 
68  const TargetMachine *TM;
69 
70  // Clone functions as needed or just set attributes.
71  bool AllowClone;
72 
73  // Option propagation roots.
75 
76  // Clones of functions with their attributes.
78 
79  // Find a clone with required features.
80  Function *findFunction(const FeatureBitset &FeaturesNeeded,
81  Function *OrigF);
82 
83  // Clone function F and set NewFeatures on the clone.
84  // Cole takes the name of original function.
85  Function *cloneWithFeatures(Function &F,
86  const FeatureBitset &NewFeatures);
87 
88  // Set new function's features in place.
89  void setFeatures(Function &F, const FeatureBitset &NewFeatures);
90 
91  std::string getFeatureString(const FeatureBitset &Features) const;
92 
93  // Propagate attributes from Roots.
94  bool process();
95 
96 public:
97  AMDGPUPropagateAttributes(const TargetMachine *TM, bool AllowClone) :
98  TM(TM), AllowClone(AllowClone) {}
99 
100  // Use F as a root and propagate its attributes.
101  bool process(Function &F);
102 
103  // Propagate attributes starting from kernel functions.
104  bool process(Module &M);
105 };
106 
107 // Allows to propagate attributes early, but no clonning is allowed as it must
108 // be a function pass to run before any optimizations.
109 // TODO: We shall only need a one instance of module pass, but that needs to be
110 // in the linker pipeline which is currently not possible.
111 class AMDGPUPropagateAttributesEarly : public FunctionPass {
112  const TargetMachine *TM;
113 
114 public:
115  static char ID; // Pass identification
116 
117  AMDGPUPropagateAttributesEarly(const TargetMachine *TM = nullptr) :
118  FunctionPass(ID), TM(TM) {
121  }
122 
123  bool runOnFunction(Function &F) override;
124 };
125 
126 // Allows to propagate attributes with clonning but does that late in the
127 // pipeline.
128 class AMDGPUPropagateAttributesLate : public ModulePass {
129  const TargetMachine *TM;
130 
131 public:
132  static char ID; // Pass identification
133 
134  AMDGPUPropagateAttributesLate(const TargetMachine *TM = nullptr) :
135  ModulePass(ID), TM(TM) {
138  }
139 
140  bool runOnModule(Module &M) override;
141 };
142 
143 } // end anonymous namespace.
144 
147 
148 INITIALIZE_PASS(AMDGPUPropagateAttributesEarly,
149  "amdgpu-propagate-attributes-early",
150  "Early propagate attributes from kernels to functions",
151  false, false)
152 INITIALIZE_PASS(AMDGPUPropagateAttributesLate,
153  "amdgpu-propagate-attributes-late",
154  "Late propagate attributes from kernels to functions",
155  false, false)
156 
157 Function *
158 AMDGPUPropagateAttributes::findFunction(const FeatureBitset &FeaturesNeeded,
159  Function *OrigF) {
160  // TODO: search for clone's clones.
161  for (Clone &C : Clones)
162  if (C.OrigF == OrigF && FeaturesNeeded == C.FeatureMask)
163  return C.NewF;
164 
165  return nullptr;
166 }
167 
168 bool AMDGPUPropagateAttributes::process(Module &M) {
169  for (auto &F : M.functions())
170  if (AMDGPU::isEntryFunctionCC(F.getCallingConv()))
171  Roots.insert(&F);
172 
173  return process();
174 }
175 
176 bool AMDGPUPropagateAttributes::process(Function &F) {
177  Roots.insert(&F);
178  return process();
179 }
180 
181 bool AMDGPUPropagateAttributes::process() {
182  bool Changed = false;
183  SmallSet<Function *, 32> NewRoots;
184  SmallSet<Function *, 32> Replaced;
185 
186  if (Roots.empty())
187  return false;
188  Module &M = *(*Roots.begin())->getParent();
189 
190  do {
191  Roots.insert(NewRoots.begin(), NewRoots.end());
192  NewRoots.clear();
193 
194  for (auto &F : M.functions()) {
195  if (F.isDeclaration() || Roots.count(&F) || Roots.count(&F))
196  continue;
197 
198  const FeatureBitset &CalleeBits =
199  TM->getSubtargetImpl(F)->getFeatureBits();
201 
202  for (User *U : F.users()) {
204  if (!I)
205  continue;
206  CallBase *CI = dyn_cast<CallBase>(I);
207  if (!CI)
208  continue;
209  Function *Caller = CI->getCaller();
210  if (!Caller)
211  continue;
212  if (!Roots.count(Caller))
213  continue;
214 
215  const FeatureBitset &CallerBits =
216  TM->getSubtargetImpl(*Caller)->getFeatureBits() & TargetFeatures;
217 
218  if (CallerBits == (CalleeBits & TargetFeatures)) {
219  NewRoots.insert(&F);
220  continue;
221  }
222 
223  Function *NewF = findFunction(CallerBits, &F);
224  if (!NewF) {
225  FeatureBitset NewFeatures((CalleeBits & ~TargetFeatures) |
226  CallerBits);
227  if (!AllowClone) {
228  // This may set different features on different iteartions if
229  // there is a contradiction in callers' attributes. In this case
230  // we rely on a second pass running on Module, which is allowed
231  // to clone.
232  setFeatures(F, NewFeatures);
233  NewRoots.insert(&F);
234  Changed = true;
235  break;
236  }
237 
238  NewF = cloneWithFeatures(F, NewFeatures);
239  Clones.push_back(Clone(CallerBits, &F, NewF));
240  NewRoots.insert(NewF);
241  }
242 
243  ToReplace.push_back(std::make_pair(CI, NewF));
244  Replaced.insert(&F);
245 
246  Changed = true;
247  }
248 
249  while (!ToReplace.empty()) {
250  auto R = ToReplace.pop_back_val();
251  R.first->setCalledFunction(R.second);
252  }
253  }
254  } while (!NewRoots.empty());
255 
256  for (Function *F : Replaced) {
257  if (F->use_empty())
258  F->eraseFromParent();
259  }
260 
261  return Changed;
262 }
263 
264 Function *
265 AMDGPUPropagateAttributes::cloneWithFeatures(Function &F,
266  const FeatureBitset &NewFeatures) {
267  LLVM_DEBUG(dbgs() << "Cloning " << F.getName() << '\n');
268 
269  ValueToValueMapTy dummy;
270  Function *NewF = CloneFunction(&F, dummy);
271  setFeatures(*NewF, NewFeatures);
272 
273  // Swap names. If that is the only clone it will retain the name of now
274  // dead value.
275  if (F.hasName()) {
276  std::string NewName = NewF->getName();
277  NewF->takeName(&F);
278  F.setName(NewName);
279 
280  // Name has changed, it does not need an external symbol.
283  }
284 
285  return NewF;
286 }
287 
288 void AMDGPUPropagateAttributes::setFeatures(Function &F,
289  const FeatureBitset &NewFeatures) {
290  std::string NewFeatureStr = getFeatureString(NewFeatures);
291 
292  LLVM_DEBUG(dbgs() << "Set features "
293  << getFeatureString(NewFeatures & TargetFeatures)
294  << " on " << F.getName() << '\n');
295 
296  F.removeFnAttr("target-features");
297  F.addFnAttr("target-features", NewFeatureStr);
298 }
299 
300 std::string
301 AMDGPUPropagateAttributes::getFeatureString(const FeatureBitset &Features) const
302 {
303  std::string Ret;
304  for (const SubtargetFeatureKV &KV : AMDGPUFeatureKV) {
305  if (Features[KV.Value])
306  Ret += (StringRef("+") + KV.Key + ",").str();
307  else if (TargetFeatures[KV.Value])
308  Ret += (StringRef("-") + KV.Key + ",").str();
309  }
310  Ret.pop_back(); // Remove last comma.
311  return Ret;
312 }
313 
316  return false;
317 
318  return AMDGPUPropagateAttributes(TM, false).process(F);
319 }
320 
321 bool AMDGPUPropagateAttributesLate::runOnModule(Module &M) {
322  if (!TM)
323  return false;
324 
325  return AMDGPUPropagateAttributes(TM, true).process(M);
326 }
327 
330  return new AMDGPUPropagateAttributesEarly(TM);
331 }
332 
335  return new AMDGPUPropagateAttributesLate(TM);
336 }
void setVisibility(VisibilityTypes V)
Definition: GlobalValue.h:242
FunctionPass * createAMDGPUPropagateAttributesEarlyPass(const TargetMachine *)
uint64_t CallInst * C
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
AMDGPU specific subclass of TargetSubtarget.
const SubtargetFeatureKV AMDGPUFeatureKV[AMDGPU::NumSubtargetFeatures-1]
This class represents lattice values for constants.
Definition: AllocatorList.h:23
INITIALIZE_PASS(AMDGPUPropagateAttributesEarly, "amdgpu-propagate-attributes-early", "Early propagate attributes from kernels to functions", false, false) INITIALIZE_PASS(AMDGPUPropagateAttributesLate
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:65
Function * getCaller()
Helper to get the caller (the parent function).
Function * CloneFunction(Function *F, ValueToValueMapTy &VMap, ClonedCodeInfo *CodeInfo=nullptr)
Return a copy of the specified function and add it to that function&#39;s module.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1100
F(f)
LLVM_NODISCARD bool empty() const
Definition: SmallSet.h:155
const_iterator begin() const
Definition: SmallSet.h:223
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:285
void clear()
Definition: SmallSet.h:218
Used to provide key value pairs for feature and CPU bit flags.
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:291
const_iterator end() const
Definition: SmallSet.h:229
static bool runOnFunction(Function &F, bool PostInlining)
iterator_range< iterator > functions()
Definition: Module.h:609
Container class for subtarget features.
bool hasName() const
Definition: Value.h:251
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:134
bool isEntryFunctionCC(CallingConv::ID CC)
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn&#39;t already there.
Definition: SmallSet.h:180
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:212
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
Module.h This file contains the declarations for the Module class.
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:374
void setLinkage(LinkageTypes LT)
Definition: GlobalValue.h:454
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
iterator_range< user_iterator > users()
Definition: Value.h:419
Provides AMDGPU specific target descriptions.
void initializeAMDGPUPropagateAttributesLatePass(PassRegistry &)
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
#define I(x, y, z)
Definition: MD5.cpp:58
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:224
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
Rename collisions when linking (static functions).
Definition: GlobalValue.h:55
void eraseFromParent()
eraseFromParent - This method unlinks &#39;this&#39; from the containing module and deletes it...
Definition: Function.cpp:226
ModulePass * createAMDGPUPropagateAttributesLatePass(const TargetMachine *)
void removeFnAttr(Attribute::AttrKind Kind)
Remove function attributes from this function.
Definition: Function.h:245
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:231
aarch64 promote const
void initializeAMDGPUPropagateAttributesEarlyPass(PassRegistry &)
static const Function * getParent(const Value *V)
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:65
void addFnAttr(Attribute::AttrKind Kind)
Add function attributes to this function.
Definition: Function.h:229
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
#define LLVM_DEBUG(X)
Definition: Debug.h:122
bool use_empty() const
Definition: Value.h:342