39#define DEBUG_TYPE "amdgpu-propagate-attributes"
51 AMDGPU::FeatureWavefrontSize16,
52 AMDGPU::FeatureWavefrontSize32,
53 AMDGPU::FeatureWavefrontSize64
58static constexpr const char *AttributeNames[] = {
"amdgpu-waves-per-eu"};
60static constexpr unsigned NumAttr = std::size(AttributeNames);
62class AMDGPUPropagateAttributes {
66 explicit FnProperties(
const FeatureBitset &&FB) : Features(FB) {}
70 Features =
TM.getSubtargetImpl(
F)->getFeatureBits();
72 for (
unsigned I = 0;
I < NumAttr; ++
I)
73 if (
F.hasFnAttribute(AttributeNames[
I]))
78 if ((Features & TargetFeatures) != (
Other.Features & TargetFeatures))
80 for (
unsigned I = 0;
I < NumAttr; ++
I)
81 if (Attributes[
I] !=
Other.Attributes[
I])
86 FnProperties adjustToCaller(
const FnProperties &CallerProps)
const {
87 FnProperties
New((Features & ~TargetFeatures) | CallerProps.Features);
88 for (
unsigned I = 0;
I < NumAttr; ++
I)
89 New.Attributes[
I] = CallerProps.Attributes[
I];
100 Properties(Props), OrigF(OrigF), NewF(NewF) {}
102 FnProperties Properties;
119 Function *findFunction(
const FnProperties &PropsNeeded,
131 const ArrayRef<std::optional<Attribute>> NewAttrs);
133 std::string getFeatureString(
const FeatureBitset &Features)
const;
139 AMDGPUPropagateAttributes(
const TargetMachine *TM,
bool AllowClone) :
140 TM(
TM), AllowClone(AllowClone) {}
153class AMDGPUPropagateAttributesEarly :
public FunctionPass {
159 AMDGPUPropagateAttributesEarly(
const TargetMachine *TM =
nullptr) :
170class AMDGPUPropagateAttributesLate :
public ModulePass {
176 AMDGPUPropagateAttributesLate(
const TargetMachine *TM =
nullptr) :
187char AMDGPUPropagateAttributesEarly::ID = 0;
188char AMDGPUPropagateAttributesLate::ID = 0;
191 "amdgpu-propagate-attributes-early",
192 "Early propagate attributes from kernels to functions",
195 "amdgpu-propagate-attributes-
late",
200AMDGPUPropagateAttributes::findFunction(
const FnProperties &PropsNeeded,
203 for (Clone &
C : Clones)
204 if (
C.OrigF == OrigF && PropsNeeded ==
C.Properties)
210bool AMDGPUPropagateAttributes::process(
Module &M) {
211 for (
auto &
F :
M.functions())
215 return Roots.empty() ?
false : process();
218bool AMDGPUPropagateAttributes::process(
Function &
F) {
223bool AMDGPUPropagateAttributes::process() {
224 bool Changed =
false;
235 for (
auto &
F :
M.functions()) {
236 if (
F.isDeclaration())
239 const FnProperties CalleeProps(*
TM,
F);
243 for (
User *U :
F.users()) {
255 if (!Caller || !Visited.
insert(CI).second)
257 if (!Roots.count(Caller) && !NewRoots.
count(Caller))
260 const FnProperties CallerProps(*
TM, *Caller);
262 if (CalleeProps == CallerProps) {
263 if (!Roots.count(&
F))
268 Function *NewF = findFunction(CallerProps, &
F);
270 const FnProperties NewProps = CalleeProps.adjustToCaller(CallerProps);
276 setFeatures(
F, NewProps.Features);
277 setAttributes(
F, NewProps.Attributes);
283 NewF = cloneWithProperties(
F, NewProps);
284 Clones.push_back(Clone(CallerProps, &
F, NewF));
288 ToReplace.
push_back(std::pair(CI, NewF));
294 while (!ToReplace.
empty()) {
296 R.first->setCalledFunction(
R.second);
299 }
while (!NewRoots.
empty());
303 F->eraseFromParent();
313AMDGPUPropagateAttributes::cloneWithProperties(
Function &
F,
314 const FnProperties &NewProps) {
319 setFeatures(*NewF, NewProps.Features);
320 setAttributes(*NewF, NewProps.Attributes);
326 if (
F.hasName() &&
F.hasLocalLinkage()) {
327 std::string NewName = std::string(NewF->
getName());
335void AMDGPUPropagateAttributes::setFeatures(
Function &
F,
337 std::string NewFeatureStr = getFeatureString(NewFeatures);
340 << getFeatureString(NewFeatures & TargetFeatures)
341 <<
" on " <<
F.getName() <<
'\n');
343 F.removeFnAttr(
"target-features");
344 F.addFnAttr(
"target-features", NewFeatureStr);
347void AMDGPUPropagateAttributes::setAttributes(
350 for (
unsigned I = 0;
I < NumAttr; ++
I) {
351 F.removeFnAttr(AttributeNames[
I]);
354 F.addFnAttr(*NewAttrs[
I]);
360AMDGPUPropagateAttributes::getFeatureString(
const FeatureBitset &Features)
const
364 if (Features[KV.Value])
366 else if (TargetFeatures[KV.Value])
373bool AMDGPUPropagateAttributesEarly::runOnFunction(
Function &
F) {
375 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
385 return AMDGPUPropagateAttributes(
TM,
false).process(
F);
388bool AMDGPUPropagateAttributesLate::runOnModule(
Module &M) {
390 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
397 return AMDGPUPropagateAttributes(
TM,
true).process(M);
402 return new AMDGPUPropagateAttributesEarly(
TM);
407 return new AMDGPUPropagateAttributesLate(
TM);
416 return AMDGPUPropagateAttributes(&
TM,
false).process(
F)
423 return AMDGPUPropagateAttributes(&
TM,
true).process(M)
Provides AMDGPU specific target descriptions.
amdgpu propagate attributes Late propagate attributes from kernels to false
amdgpu propagate attributes late
amdgpu propagate attributes Late propagate attributes from kernels to functions
static const Function * getParent(const Value *V)
std::optional< std::vector< StOtherPiece > > Other
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallSet class.
Target-Independent Code Generator Pass Configuration Options pass.
A container for analyses that lazily runs them and caches their results.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Value * getCalledOperand() const
Function * getCaller()
Helper to get the caller (the parent function).
Container class for subtarget features.
FunctionPass class - This class is used to implement most global optimizations.
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
Function::iterator insert(Function::iterator Position, BasicBlock *BB)
Insert BB in the basic block list at Position.
void setLinkage(LinkageTypes LT)
@ DefaultVisibility
The GV is visible.
void setVisibility(VisibilityTypes V)
@ InternalLinkage
Rename collisions when linking (static functions).
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
virtual bool runOnModule(Module &M)=0
runOnModule - Virtual method overriden by subclasses to process the module being operated on.
A Module instance is used to store all the information related to an LLVM module.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
const_iterator begin() const
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
const_iterator end() const
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Primary interface to the complete machine description for the target machine.
StringRef getName() const
Return a constant reference to the value's name.
void takeName(Value *V)
Transfer the name from V to this value.
bool isEntryFunctionCC(CallingConv::ID CC)
LLVM_READNONE bool isKernel(CallingConv::ID CC)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
This is an optimization pass for GlobalISel generic memory operations.
const SubtargetFeatureKV AMDGPUFeatureKV[AMDGPU::NumSubtargetFeatures-1]
void initializeAMDGPUPropagateAttributesLatePass(PassRegistry &)
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
FunctionPass * createAMDGPUPropagateAttributesEarlyPass(const TargetMachine *)
void initializeAMDGPUPropagateAttributesEarlyPass(PassRegistry &)
ModulePass * createAMDGPUPropagateAttributesLatePass(const TargetMachine *)
Function * CloneFunction(Function *F, ValueToValueMapTy &VMap, ClonedCodeInfo *CodeInfo=nullptr)
Return a copy of the specified function and add it to that function's module.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
Used to provide key value pairs for feature and CPU bit flags.