LLVM 20.0.0git
AMDGPURemoveIncompatibleFunctions.cpp
Go to the documentation of this file.
1//===-- AMDGPURemoveIncompatibleFunctions.cpp -----------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This pass replaces all uses of functions that use GPU features
11/// incompatible with the current GPU with null then deletes the function.
12//
13//===----------------------------------------------------------------------===//
14
15#include "AMDGPU.h"
16#include "GCNSubtarget.h"
18#include "llvm/IR/Function.h"
19#include "llvm/IR/Module.h"
20#include "llvm/Pass.h"
22
23#define DEBUG_TYPE "amdgpu-remove-incompatible-functions"
24
25using namespace llvm;
26
27namespace llvm {
28extern const SubtargetFeatureKV
29 AMDGPUFeatureKV[AMDGPU::NumSubtargetFeatures - 1];
30} // namespace llvm
31
32namespace {
33
34using Generation = AMDGPUSubtarget::Generation;
35
36class AMDGPURemoveIncompatibleFunctions : public ModulePass {
37public:
38 static char ID;
39
40 AMDGPURemoveIncompatibleFunctions(const TargetMachine *TM = nullptr)
41 : ModulePass(ID), TM(TM) {
42 assert(TM && "No TargetMachine!");
43 }
44
45 StringRef getPassName() const override {
46 return "AMDGPU Remove Incompatible Functions";
47 }
48
49 void getAnalysisUsage(AnalysisUsage &AU) const override {}
50
51 /// Checks a single function, returns true if the function must be deleted.
52 bool checkFunction(Function &F);
53
54 bool runOnModule(Module &M) override {
55 assert(TM->getTargetTriple().isAMDGCN());
56
58 for (Function &F : M) {
59 if (checkFunction(F))
60 FnsToDelete.push_back(&F);
61 }
62
63 for (Function *F : FnsToDelete) {
64 F->replaceAllUsesWith(ConstantPointerNull::get(F->getType()));
65 F->eraseFromParent();
66 }
67 return !FnsToDelete.empty();
68 }
69
70private:
71 const TargetMachine *TM = nullptr;
72};
73
74StringRef getFeatureName(unsigned Feature) {
75 for (const SubtargetFeatureKV &KV : AMDGPUFeatureKV)
76 if (Feature == KV.Value)
77 return KV.Key;
78
79 llvm_unreachable("Unknown Target feature");
80}
81
82const SubtargetSubTypeKV *getGPUInfo(const GCNSubtarget &ST,
83 StringRef GPUName) {
84 for (const SubtargetSubTypeKV &KV : ST.getAllProcessorDescriptions())
85 if (StringRef(KV.Key) == GPUName)
86 return &KV;
87
88 return nullptr;
89}
90
91constexpr unsigned FeaturesToCheck[] = {AMDGPU::FeatureGFX11Insts,
92 AMDGPU::FeatureGFX10Insts,
93 AMDGPU::FeatureGFX9Insts,
94 AMDGPU::FeatureGFX8Insts,
95 AMDGPU::FeatureDPP,
96 AMDGPU::Feature16BitInsts,
97 AMDGPU::FeatureDot1Insts,
98 AMDGPU::FeatureDot2Insts,
99 AMDGPU::FeatureDot3Insts,
100 AMDGPU::FeatureDot4Insts,
101 AMDGPU::FeatureDot5Insts,
102 AMDGPU::FeatureDot6Insts,
103 AMDGPU::FeatureDot7Insts,
104 AMDGPU::FeatureDot8Insts,
105 AMDGPU::FeatureExtendedImageInsts,
106 AMDGPU::FeatureSMemRealTime,
107 AMDGPU::FeatureSMemTimeInst,
108 AMDGPU::FeatureGWS};
109
110FeatureBitset expandImpliedFeatures(const FeatureBitset &Features) {
111 FeatureBitset Result = Features;
112 for (const SubtargetFeatureKV &FE : AMDGPUFeatureKV) {
113 if (Features.test(FE.Value) && FE.Implies.any())
114 Result |= expandImpliedFeatures(FE.Implies.getAsBitset());
115 }
116 return Result;
117}
118
119void reportFunctionRemoved(Function &F, unsigned Feature) {
121 ORE.emit([&]() {
122 // Note: we print the function name as part of the diagnostic because if
123 // debug info is not present, users get "<unknown>:0:0" as the debug
124 // loc. If we didn't print the function name there would be no way to
125 // tell which function got removed.
126 return OptimizationRemark(DEBUG_TYPE, "AMDGPUIncompatibleFnRemoved", &F)
127 << "removing function '" << F.getName() << "': +"
128 << getFeatureName(Feature)
129 << " is not supported on the current target";
130 });
131}
132} // end anonymous namespace
133
134bool AMDGPURemoveIncompatibleFunctions::checkFunction(Function &F) {
135 if (F.isDeclaration())
136 return false;
137
138 const GCNSubtarget *ST =
139 static_cast<const GCNSubtarget *>(TM->getSubtargetImpl(F));
140
141 // Check the GPU isn't generic or generic-hsa. Generic is used for testing
142 // only and we don't want this pass to interfere with it.
143 StringRef GPUName = ST->getCPU();
144 if (GPUName.empty() || GPUName.starts_with("generic"))
145 return false;
146
147 // Try to fetch the GPU's info. If we can't, it's likely an unknown processor
148 // so just bail out.
149 const SubtargetSubTypeKV *GPUInfo = getGPUInfo(*ST, GPUName);
150 if (!GPUInfo)
151 return false;
152
153 // Get all the features implied by the current GPU, and recursively expand
154 // the features that imply other features.
155 //
156 // e.g. GFX90A implies FeatureGFX9, and FeatureGFX9 implies a whole set of
157 // other features.
158 const FeatureBitset GPUFeatureBits =
159 expandImpliedFeatures(GPUInfo->Implies.getAsBitset());
160
161 // Now that the have a FeatureBitset containing all possible features for
162 // the chosen GPU, check our list of "suspicious" features.
163
164 // Check that the user didn't enable any features that aren't part of that
165 // GPU's feature set. We only check a predetermined set of features.
166 for (unsigned Feature : FeaturesToCheck) {
167 if (ST->hasFeature(Feature) && !GPUFeatureBits.test(Feature)) {
168 reportFunctionRemoved(F, Feature);
169 return true;
170 }
171 }
172
173 // Delete FeatureWavefrontSize32 functions for
174 // gfx9 and below targets that don't support the mode.
175 // gfx10+ is implied to support both wave32 and 64 features.
176 // They are not in the feature set. So, we need a separate check
177 if (ST->getGeneration() < AMDGPUSubtarget::GFX10 &&
178 ST->hasFeature(AMDGPU::FeatureWavefrontSize32)) {
179 reportFunctionRemoved(F, AMDGPU::FeatureWavefrontSize32);
180 return true;
181 }
182 return false;
183}
184
185INITIALIZE_PASS(AMDGPURemoveIncompatibleFunctions, DEBUG_TYPE,
186 "AMDGPU Remove Incompatible Functions", false, false)
187
188char AMDGPURemoveIncompatibleFunctions::ID = 0;
189
192 return new AMDGPURemoveIncompatibleFunctions(TM);
193}
aarch64 promote const
AMD GCN specific subclass of TargetSubtarget.
Module.h This file contains the declarations for the Module class.
#define F(x, y, z)
Definition: MD5.cpp:55
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Represent the analysis usage information of a pass.
static ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
Definition: Constants.cpp:1826
const FeatureBitset & getAsBitset() const
Container class for subtarget features.
constexpr bool test(unsigned I) const
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:251
virtual bool runOnModule(Module &M)=0
runOnModule - Virtual method overriden by subclasses to process the module being operated on.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
The optimization diagnostic interface.
Diagnostic information for applied optimization remarks.
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:98
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:265
constexpr bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:147
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
const SubtargetFeatureKV AMDGPUFeatureKV[AMDGPU::NumSubtargetFeatures - 1]
ModulePass * createAMDGPURemoveIncompatibleFunctionsPass(const TargetMachine *)
Used to provide key value pairs for feature and CPU bit flags.
Used to provide key value pairs for feature and CPU bit flags.
FeatureBitArray Implies
K-V bit mask.