LLVM  14.0.0git
AMDGPUPropagateAttributes.cpp
Go to the documentation of this file.
1 //===--- AMDGPUPropagateAttributes.cpp --------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// \brief This pass propagates attributes from kernels to the non-entry
11 /// functions. Most of the library functions were not compiled for specific ABI,
12 /// yet will be correctly compiled if proper attributes are propagated from the
13 /// caller.
14 ///
15 /// The pass analyzes call graph and propagates ABI target features through the
16 /// call graph.
17 ///
18 /// It can run in two modes: as a function or module pass. A function pass
19 /// simply propagates attributes. A module pass clones functions if there are
20 /// callers with different ABI. If a function is cloned all call sites will
21 /// be updated to use a correct clone.
22 ///
23 /// A function pass is limited in functionality but can run early in the
24 /// pipeline. A module pass is more powerful but has to run late, so misses
25 /// library folding opportunities.
26 //
27 //===----------------------------------------------------------------------===//
28 
29 #include "AMDGPU.h"
31 #include "Utils/AMDGPUBaseInfo.h"
32 #include "llvm/ADT/SmallSet.h"
35 #include "llvm/IR/InstrTypes.h"
38 
39 #define DEBUG_TYPE "amdgpu-propagate-attributes"
40 
41 using namespace llvm;
42 
43 namespace llvm {
44 extern const SubtargetFeatureKV AMDGPUFeatureKV[AMDGPU::NumSubtargetFeatures-1];
45 }
46 
47 namespace {
48 
49 // Target features to propagate.
50 static constexpr const FeatureBitset TargetFeatures = {
51  AMDGPU::FeatureWavefrontSize16,
52  AMDGPU::FeatureWavefrontSize32,
53  AMDGPU::FeatureWavefrontSize64
54 };
55 
56 // Attributes to propagate.
57 // TODO: Support conservative min/max merging instead of cloning.
58 static constexpr const char *AttributeNames[] = {"amdgpu-waves-per-eu"};
59 
60 static constexpr unsigned NumAttr =
61  sizeof(AttributeNames) / sizeof(AttributeNames[0]);
62 
63 class AMDGPUPropagateAttributes {
64 
65  class FnProperties {
66  private:
67  explicit FnProperties(const FeatureBitset &&FB) : Features(FB) {}
68 
69  public:
70  explicit FnProperties(const TargetMachine &TM, const Function &F) {
71  Features = TM.getSubtargetImpl(F)->getFeatureBits();
72 
73  for (unsigned I = 0; I < NumAttr; ++I)
74  if (F.hasFnAttribute(AttributeNames[I]))
75  Attributes[I] = F.getFnAttribute(AttributeNames[I]);
76  }
77 
78  bool operator == (const FnProperties &Other) const {
79  if ((Features & TargetFeatures) != (Other.Features & TargetFeatures))
80  return false;
81  for (unsigned I = 0; I < NumAttr; ++I)
82  if (Attributes[I] != Other.Attributes[I])
83  return false;
84  return true;
85  }
86 
87  FnProperties adjustToCaller(const FnProperties &CallerProps) const {
88  FnProperties New((Features & ~TargetFeatures) | CallerProps.Features);
89  for (unsigned I = 0; I < NumAttr; ++I)
90  New.Attributes[I] = CallerProps.Attributes[I];
91  return New;
92  }
93 
94  FeatureBitset Features;
96  };
97 
98  class Clone {
99  public:
100  Clone(const FnProperties &Props, Function *OrigF, Function *NewF) :
101  Properties(Props), OrigF(OrigF), NewF(NewF) {}
102 
103  FnProperties Properties;
104  Function *OrigF;
105  Function *NewF;
106  };
107 
108  const TargetMachine *TM;
109 
110  // Clone functions as needed or just set attributes.
111  bool AllowClone;
112 
113  // Option propagation roots.
115 
116  // Clones of functions with their attributes.
117  SmallVector<Clone, 32> Clones;
118 
119  // Find a clone with required features.
120  Function *findFunction(const FnProperties &PropsNeeded,
121  Function *OrigF);
122 
123  // Clone function \p F and set \p NewProps on the clone.
124  // Cole takes the name of original function.
125  Function *cloneWithProperties(Function &F, const FnProperties &NewProps);
126 
127  // Set new function's features in place.
128  void setFeatures(Function &F, const FeatureBitset &NewFeatures);
129 
130  // Set new function's attributes in place.
131  void setAttributes(Function &F, const ArrayRef<Optional<Attribute>> NewAttrs);
132 
133  std::string getFeatureString(const FeatureBitset &Features) const;
134 
135  // Propagate attributes from Roots.
136  bool process();
137 
138 public:
139  AMDGPUPropagateAttributes(const TargetMachine *TM, bool AllowClone) :
140  TM(TM), AllowClone(AllowClone) {}
141 
142  // Use F as a root and propagate its attributes.
143  bool process(Function &F);
144 
145  // Propagate attributes starting from kernel functions.
146  bool process(Module &M);
147 };
148 
149 // Allows to propagate attributes early, but no cloning is allowed as it must
150 // be a function pass to run before any optimizations.
151 // TODO: We shall only need a one instance of module pass, but that needs to be
152 // in the linker pipeline which is currently not possible.
153 class AMDGPUPropagateAttributesEarly : public FunctionPass {
154  const TargetMachine *TM;
155 
156 public:
157  static char ID; // Pass identification
158 
159  AMDGPUPropagateAttributesEarly(const TargetMachine *TM = nullptr) :
160  FunctionPass(ID), TM(TM) {
163  }
164 
165  bool runOnFunction(Function &F) override;
166 };
167 
168 // Allows to propagate attributes with cloning but does that late in the
169 // pipeline.
170 class AMDGPUPropagateAttributesLate : public ModulePass {
171  const TargetMachine *TM;
172 
173 public:
174  static char ID; // Pass identification
175 
176  AMDGPUPropagateAttributesLate(const TargetMachine *TM = nullptr) :
177  ModulePass(ID), TM(TM) {
180  }
181 
182  bool runOnModule(Module &M) override;
183 };
184 
185 } // end anonymous namespace.
186 
189 
190 INITIALIZE_PASS(AMDGPUPropagateAttributesEarly,
191  "amdgpu-propagate-attributes-early",
192  "Early propagate attributes from kernels to functions",
193  false, false)
194 INITIALIZE_PASS(AMDGPUPropagateAttributesLate,
196  "Late propagate attributes from kernels to functions",
198 
199 Function *
200 AMDGPUPropagateAttributes::findFunction(const FnProperties &PropsNeeded,
201  Function *OrigF) {
202  // TODO: search for clone's clones.
203  for (Clone &C : Clones)
204  if (C.OrigF == OrigF && PropsNeeded == C.Properties)
205  return C.NewF;
206 
207  return nullptr;
208 }
209 
210 bool AMDGPUPropagateAttributes::process(Module &M) {
211  for (auto &F : M.functions())
212  if (AMDGPU::isKernel(F.getCallingConv()))
213  Roots.insert(&F);
214 
215  return Roots.empty() ? false : process();
216 }
217 
218 bool AMDGPUPropagateAttributes::process(Function &F) {
219  Roots.insert(&F);
220  return process();
221 }
222 
223 bool AMDGPUPropagateAttributes::process() {
224  bool Changed = false;
225  SmallSet<Function *, 32> NewRoots;
226  SmallSet<Function *, 32> Replaced;
227 
228  assert(!Roots.empty());
229  Module &M = *(*Roots.begin())->getParent();
230 
231  do {
232  Roots.insert(NewRoots.begin(), NewRoots.end());
233  NewRoots.clear();
234 
235  for (auto &F : M.functions()) {
236  if (F.isDeclaration())
237  continue;
238 
239  const FnProperties CalleeProps(*TM, F);
241  SmallSet<CallBase *, 32> Visited;
242 
243  for (User *U : F.users()) {
244  Instruction *I = dyn_cast<Instruction>(U);
245  if (!I)
246  continue;
247  CallBase *CI = dyn_cast<CallBase>(I);
248  // Only propagate attributes if F is the called function. Specifically,
249  // do not propagate attributes if F is passed as an argument.
250  // FIXME: handle bitcasted callee, e.g.
251  // %retval = call i8* bitcast (i32* ()* @f to i8* ()*)()
252  if (!CI || CI->getCalledOperand() != &F)
253  continue;
254  Function *Caller = CI->getCaller();
255  if (!Caller || !Visited.insert(CI).second)
256  continue;
257  if (!Roots.count(Caller) && !NewRoots.count(Caller))
258  continue;
259 
260  const FnProperties CallerProps(*TM, *Caller);
261 
262  if (CalleeProps == CallerProps) {
263  if (!Roots.count(&F))
264  NewRoots.insert(&F);
265  continue;
266  }
267 
268  Function *NewF = findFunction(CallerProps, &F);
269  if (!NewF) {
270  const FnProperties NewProps = CalleeProps.adjustToCaller(CallerProps);
271  if (!AllowClone) {
272  // This may set different features on different iterations if
273  // there is a contradiction in callers' attributes. In this case
274  // we rely on a second pass running on Module, which is allowed
275  // to clone.
276  setFeatures(F, NewProps.Features);
277  setAttributes(F, NewProps.Attributes);
278  NewRoots.insert(&F);
279  Changed = true;
280  break;
281  }
282 
283  NewF = cloneWithProperties(F, NewProps);
284  Clones.push_back(Clone(CallerProps, &F, NewF));
285  NewRoots.insert(NewF);
286  }
287 
288  ToReplace.push_back(std::make_pair(CI, NewF));
289  Replaced.insert(&F);
290 
291  Changed = true;
292  }
293 
294  while (!ToReplace.empty()) {
295  auto R = ToReplace.pop_back_val();
296  R.first->setCalledFunction(R.second);
297  }
298  }
299  } while (!NewRoots.empty());
300 
301  for (Function *F : Replaced) {
302  if (F->use_empty())
303  F->eraseFromParent();
304  }
305 
306  Roots.clear();
307  Clones.clear();
308 
309  return Changed;
310 }
311 
312 Function *
313 AMDGPUPropagateAttributes::cloneWithProperties(Function &F,
314  const FnProperties &NewProps) {
315  LLVM_DEBUG(dbgs() << "Cloning " << F.getName() << '\n');
316 
317  ValueToValueMapTy dummy;
318  Function *NewF = CloneFunction(&F, dummy);
319  setFeatures(*NewF, NewProps.Features);
320  setAttributes(*NewF, NewProps.Attributes);
323 
324  // Swap names. If that is the only clone it will retain the name of now
325  // dead value. Preserve original name for externally visible functions.
326  if (F.hasName() && F.hasLocalLinkage()) {
327  std::string NewName = std::string(NewF->getName());
328  NewF->takeName(&F);
329  F.setName(NewName);
330  }
331 
332  return NewF;
333 }
334 
335 void AMDGPUPropagateAttributes::setFeatures(Function &F,
336  const FeatureBitset &NewFeatures) {
337  std::string NewFeatureStr = getFeatureString(NewFeatures);
338 
339  LLVM_DEBUG(dbgs() << "Set features "
340  << getFeatureString(NewFeatures & TargetFeatures)
341  << " on " << F.getName() << '\n');
342 
343  F.removeFnAttr("target-features");
344  F.addFnAttr("target-features", NewFeatureStr);
345 }
346 
347 void AMDGPUPropagateAttributes::setAttributes(Function &F,
348  const ArrayRef<Optional<Attribute>> NewAttrs) {
349  LLVM_DEBUG(dbgs() << "Set attributes on " << F.getName() << ":\n");
350  for (unsigned I = 0; I < NumAttr; ++I) {
351  F.removeFnAttr(AttributeNames[I]);
352  if (NewAttrs[I]) {
353  LLVM_DEBUG(dbgs() << '\t' << NewAttrs[I]->getAsString() << '\n');
354  F.addFnAttr(*NewAttrs[I]);
355  }
356  }
357 }
358 
359 std::string
360 AMDGPUPropagateAttributes::getFeatureString(const FeatureBitset &Features) const
361 {
362  std::string Ret;
363  for (const SubtargetFeatureKV &KV : AMDGPUFeatureKV) {
364  if (Features[KV.Value])
365  Ret += (StringRef("+") + KV.Key + ",").str();
366  else if (TargetFeatures[KV.Value])
367  Ret += (StringRef("-") + KV.Key + ",").str();
368  }
369  Ret.pop_back(); // Remove last comma.
370  return Ret;
371 }
372 
374  if (!TM) {
375  auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
376  if (!TPC)
377  return false;
378 
379  TM = &TPC->getTM<TargetMachine>();
380  }
381 
382  if (!AMDGPU::isKernel(F.getCallingConv()))
383  return false;
384 
385  return AMDGPUPropagateAttributes(TM, false).process(F);
386 }
387 
388 bool AMDGPUPropagateAttributesLate::runOnModule(Module &M) {
389  if (!TM) {
390  auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
391  if (!TPC)
392  return false;
393 
394  TM = &TPC->getTM<TargetMachine>();
395  }
396 
397  return AMDGPUPropagateAttributes(TM, true).process(M);
398 }
399 
402  return new AMDGPUPropagateAttributesEarly(TM);
403 }
404 
407  return new AMDGPUPropagateAttributesLate(TM);
408 }
409 
413  if (!AMDGPU::isEntryFunctionCC(F.getCallingConv()))
414  return PreservedAnalyses::all();
415 
416  return AMDGPUPropagateAttributes(&TM, false).process(F)
419 }
420 
423  return AMDGPUPropagateAttributes(&TM, true).process(M)
426 }
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
functions
amdgpu propagate attributes Late propagate attributes from kernels to functions
Definition: AMDGPUPropagateAttributes.cpp:196
llvm::AMDGPUPropagateAttributesEarlyPass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Definition: AMDGPUPropagateAttributes.cpp:411
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AllocatorList.h:23
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::ModulePass
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:238
llvm::Function::empty
bool empty() const
Definition: Function.h:729
llvm::Function
Definition: Function.h:62
llvm::initializeAMDGPUPropagateAttributesLatePass
void initializeAMDGPUPropagateAttributesLatePass(PassRegistry &)
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1177
to
Should compile to
Definition: README.txt:449
llvm::PreservedAnalyses::none
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: PassManager.h:158
llvm::SmallSet
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:134
llvm::AMDGPUFeatureKV
const SubtargetFeatureKV AMDGPUFeatureKV[AMDGPU::NumSubtargetFeatures-1]
llvm::Optional
Definition: APInt.h:33
llvm::GlobalValue::DefaultVisibility
@ DefaultVisibility
The GV is visible.
Definition: GlobalValue.h:63
llvm::FeatureBitset
Container class for subtarget features.
Definition: SubtargetFeature.h:40
attributes
Deduce and propagate attributes
Definition: Attributor.cpp:3111
llvm::MipsISD::Ret
@ Ret
Definition: MipsISelLowering.h:116
llvm::SmallVectorImpl::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:644
llvm::initializeAMDGPUPropagateAttributesEarlyPass
void initializeAMDGPUPropagateAttributesEarlyPass(PassRegistry &)
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:207
llvm::AMDGPU::isKernel
LLVM_READNONE bool isKernel(CallingConv::ID CC)
Definition: AMDGPUBaseInfo.h:723
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
TargetMachine.h
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::User
Definition: User.h:44
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
InstrTypes.h
false
Definition: StackSlotColoring.cpp:142
llvm::pdb::PDB_SymType::Caller
@ Caller
llvm::Instruction
Definition: Instruction.h:45
INITIALIZE_PASS
INITIALIZE_PASS(AMDGPUPropagateAttributesEarly, "amdgpu-propagate-attributes-early", "Early propagate attributes from kernels to functions", false, false) INITIALIZE_PASS(AMDGPUPropagateAttributesLate
llvm::createAMDGPUPropagateAttributesLatePass
ModulePass * createAMDGPUPropagateAttributesLatePass(const TargetMachine *)
Definition: AMDGPUPropagateAttributes.cpp:406
llvm::GlobalValue::InternalLinkage
@ InternalLinkage
Rename collisions when linking (static functions).
Definition: GlobalValue.h:55
llvm::SubtargetFeatureKV
Used to provide key value pairs for feature and CPU bit flags.
Definition: MCSubtargetInfo.h:34
llvm::CallBase::getCaller
Function * getCaller()
Helper to get the caller (the parent function).
Definition: Instructions.cpp:282
llvm::AMDGPU::isEntryFunctionCC
bool isEntryFunctionCC(CallingConv::ID CC)
Definition: AMDGPUBaseInfo.cpp:1392
llvm::SmallSet::count
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:164
AMDGPUMCTargetDesc.h
false
amdgpu propagate attributes Late propagate attributes from kernels to false
Definition: AMDGPUPropagateAttributes.cpp:197
const
aarch64 promote const
Definition: AArch64PromoteConstant.cpp:232
I
#define I(x, y, z)
Definition: MD5.cpp:58
Cloning.h
Attributes
AMDGPU Kernel Attributes
Definition: AMDGPULowerKernelAttributes.cpp:254
propagate
static void propagate(InstantiatedValue From, InstantiatedValue To, MatchState State, ReachabilitySet &ReachSet, std::vector< WorkListItem > &WorkList)
Definition: CFLAndersAliasAnalysis.cpp:596
llvm::GlobalValue::setLinkage
void setLinkage(LinkageTypes LT)
Definition: GlobalValue.h:460
TargetPassConfig.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:80
llvm::SmallSet::begin
const_iterator begin() const
Definition: SmallSet.h:223
llvm::operator==
bool operator==(uint64_t V1, const APInt &V2)
Definition: APInt.h:1986
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
AMDGPU.h
getParent
static const Function * getParent(const Value *V)
Definition: BasicAliasAnalysis.cpp:870
llvm::ms_demangle::IntrinsicFunctionKind::New
@ New
TargetSubtargetInfo.h
llvm::CloneFunction
Function * CloneFunction(Function *F, ValueToValueMapTy &VMap, ClonedCodeInfo *CodeInfo=nullptr)
Return a copy of the specified function and add it to that function's module.
Definition: CloneFunction.cpp:283
llvm::SmallSet::insert
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:180
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
llvm::ValueMap< const Value *, WeakTrackingVH >
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
llvm::createAMDGPUPropagateAttributesEarlyPass
FunctionPass * createAMDGPUPropagateAttributesEarlyPass(const TargetMachine *)
Definition: AMDGPUPropagateAttributes.cpp:401
llvm::AMDGPUPropagateAttributesLatePass::run
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
Definition: AMDGPUPropagateAttributes.cpp:422
llvm::SmallSet::end
const_iterator end() const
Definition: SmallSet.h:229
llvm::CallBase::getCalledOperand
Value * getCalledOperand() const
Definition: InstrTypes.h:1391
late
amdgpu propagate attributes late
Definition: AMDGPUPropagateAttributes.cpp:195
llvm::SmallSet::clear
void clear()
Definition: SmallSet.h:218
llvm::SmallSet::empty
LLVM_NODISCARD bool empty() const
Definition: SmallSet.h:155
llvm::CallBase
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1176
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
llvm::Value::takeName
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:382
llvm::GlobalValue::setVisibility
void setVisibility(VisibilityTypes V)
Definition: GlobalValue.h:235
Other
Optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1198
AMDGPUBaseInfo.h
SmallSet.h
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38
INITIALIZE_PASS
TargetPassConfig.
Definition: TargetPassConfig.cpp:359