Go to the documentation of this file.
48 #define DEBUG_TYPE "amdgpu-lower-module-lds"
53 "amdgpu-super-align-lds-globals",
54 cl::desc(
"Increase alignment of LDS if it is not on align boundary"),
64 UsedList.
insert(TmpVec.begin(), TmpVec.end());
68 UsedList.
insert(TmpVec.begin(), TmpVec.end());
73 class AMDGPULowerModuleLDS :
public ModulePass {
84 for (
auto &
Op : CA->operands()) {
87 if (!
ToRemove.contains(
C->stripPointerCasts())) {
92 if (
Init.size() == CA->getNumOperands()) {
99 C->removeDeadConstantUsers();
113 removeFromUsedLists(
Module &M,
114 const std::vector<GlobalVariable *> &LocalVars) {
119 removeFromUsedList(M,
"llvm.used", LocalVarsSet);
120 removeFromUsedList(M,
"llvm.compiler.used", LocalVarsSet);
141 Builder.SetInsertPoint(
Func->getEntryBlock().getFirstNonPHI());
151 Builder.CreateCall(FTy, Decl, {},
166 bool runOnModule(
Module &M)
override {
168 UsedList = getUsedList(M);
169 bool Changed = superAlignLDSGlobals(M);
170 Changed |= processUsedLDS(CG, M);
173 if (
F.isDeclaration())
179 Changed |= processUsedLDS(CG, M, &
F);
189 static bool superAlignLDSGlobals(
Module &M) {
191 bool Changed =
false;
196 for (
auto &GV :
M.globals()) {
212 }
else if (GVSize > 4) {
215 }
else if (GVSize > 2) {
218 }
else if (GVSize > 1) {
236 std::vector<GlobalVariable *> FoundLocalVars =
239 if (FoundLocalVars.empty()) {
245 LayoutFields.
reserve(FoundLocalVars.size());
254 std::vector<GlobalVariable *> LocalVars;
255 LocalVars.reserve(FoundLocalVars.size());
259 for (
size_t I = 0;
I < LayoutFields.size();
I++) {
261 const_cast<void *
>(LayoutFields[
I].Id));
262 Align DataAlign = LayoutFields[
I].Alignment;
265 if (
uint64_t Rem = CurrentOffset % DataAlignV) {
280 LocalVars.push_back(FGV);
281 CurrentOffset += LayoutFields[
I].Size;
285 std::vector<Type *> LocalVarTypes;
286 LocalVarTypes.
reserve(LocalVars.size());
288 LocalVars.cbegin(), LocalVars.cend(), std::back_inserter(LocalVarTypes),
292 F ? (
Twine(
"llvm.amdgcn.kernel.") +
F->getName() +
".lds").str()
293 :
"llvm.amdgcn.module.lds");
313 removeFromUsedLists(M, LocalVars);
319 if (LocalVars.size() > 1) {
321 AliasScopes.
reserve(LocalVars.size());
323 for (
size_t I = 0;
I < LocalVars.size();
I++) {
325 AliasScopes.push_back(
Scope);
327 NoAliasList.
append(&AliasScopes[1], AliasScopes.end());
333 for (
size_t I = 0;
I < LocalVars.size();
I++) {
349 return I &&
I->getFunction() ==
F;
363 NoAliasList[
I - 1] = AliasScopes[
I - 1];
365 NoAliasList.empty() ? nullptr :
MDNode::get(Ctx, NoAliasList);
367 AliasScopes.empty() ? nullptr :
MDNode::get(Ctx, {AliasScopes[
I]});
369 refineUsesAlignmentAndAA(
GEP, A,
DL, AliasScope, NoAlias);
380 const bool CalleesRequireModuleLDS =
N->size() > 0;
382 if (CalleesRequireModuleLDS) {
386 markUsedByKernel(
Builder, &Func, SGV);
391 Func.addFnAttr(
"amdgpu-elide-module-lds");
402 if (!
MaxDepth || (A == 1 && !AliasScope))
406 if (
auto *
I = dyn_cast<Instruction>(U)) {
407 if (AliasScope &&
I->mayReadOrWriteMemory()) {
408 MDNode *AS =
I->getMetadata(LLVMContext::MD_alias_scope);
411 I->setMetadata(LLVMContext::MD_alias_scope, AS);
413 MDNode *NA =
I->getMetadata(LLVMContext::MD_noalias);
415 I->setMetadata(LLVMContext::MD_noalias, NA);
419 if (
auto *LI = dyn_cast<LoadInst>(U)) {
420 LI->setAlignment(
std::max(A, LI->getAlign()));
423 if (
auto *
SI = dyn_cast<StoreInst>(U)) {
424 if (
SI->getPointerOperand() == Ptr)
428 if (
auto *AI = dyn_cast<AtomicRMWInst>(U)) {
431 if (AI->getPointerOperand() == Ptr)
432 AI->setAlignment(
std::max(A, AI->getAlign()));
435 if (
auto *AI = dyn_cast<AtomicCmpXchgInst>(U)) {
436 if (AI->getPointerOperand() == Ptr)
437 AI->setAlignment(
std::max(A, AI->getAlign()));
440 if (
auto *
GEP = dyn_cast<GetElementPtrInst>(U)) {
441 unsigned BitWidth =
DL.getIndexTypeSizeInBits(
GEP->getType());
443 if (
GEP->getPointerOperand() == Ptr) {
445 if (
GEP->accumulateConstantOffset(
DL, Off))
447 refineUsesAlignmentAndAA(
GEP, GA,
DL, AliasScope, NoAlias,
452 if (
auto *
I = dyn_cast<Instruction>(U)) {
453 if (
I->getOpcode() == Instruction::BitCast ||
454 I->getOpcode() == Instruction::AddrSpaceCast)
455 refineUsesAlignmentAndAA(
I, A,
DL, AliasScope, NoAlias,
MaxDepth - 1);
467 "Lower uses of LDS variables from non-kernel functions",
false,
471 return new AMDGPULowerModuleLDS();
std::vector< GlobalVariable * > findVariablesToLower(Module &M, const Function *F)
A set of analyses that are preserved following a run of a transformation pass.
void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
This is an optimization pass for GlobalISel generic memory operations.
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
A parsed version of the target data layout string in and methods for querying it.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
char & AMDGPULowerModuleLDSID
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
ReachingDefAnalysis InstSet & ToRemove
static FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
The basic data container for the call graph of a Module of IR.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
The instances of the Type class are immutable: once they are created, they are never changed.
A container for an operand bundle being viewed as a set of values rather than a set of uses.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Class to represent array types.
static StructType * create(LLVMContext &Context, StringRef Name)
This creates an identified struct.
static IntegerType * getInt8Ty(LLVMContext &C)
static IntegerType * getInt32Ty(LLVMContext &C)
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
bool hasInitializer() const
Definitions have initializers, declarations don't.
GlobalVariable * collectUsedGlobalVariables(const Module &M, SmallVectorImpl< GlobalValue * > &Vec, bool CompilerUsed)
Given "llvm.used" or "llvm.compiler.used" as a global name, collect the initializer elements of that ...
LLVM_READNONE bool isKernel(CallingConv::ID CC)
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
void setSection(StringRef S)
Change the section for this global.
(vector float) vec_cmpeq(*A, *B) C
ModulePass * createAMDGPULowerModuleLDSPass()
@ LOCAL_ADDRESS
Address space for local memory.
void appendToCompilerUsed(Module &M, ArrayRef< GlobalValue * > Values)
Adds global values to the llvm.compiler.used list.
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
User * getUser() const
Returns the User that contains this Use.
A node in the call graph for a module.
INITIALIZE_PASS(AMDGPULowerModuleLDS, DEBUG_TYPE, "Lower uses of LDS variables from non-kernel functions", false, false) ModulePass *llvm
@ InternalLinkage
Rename collisions when linking (static functions).
This struct is a compact representation of a valid (non-zero power of two) alignment.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
This is an important base class in LLVM.
static MDNode * intersect(MDNode *A, MDNode *B)
void replaceConstantUsesInFunction(ConstantExpr *C, const Function *F)
Replace all uses of constant C with instructions in F.
This is an important class for using LLVM in a threaded context.
initializer< Ty > init(const Ty &Val)
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
StandardInstrumentations SI(Debug, VerifyEach)
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
A Module instance is used to store all the information related to an LLVM module.
Class for arbitrary precision integers.
@ AppendingLinkage
Special purpose, only applies to global arrays.
void removeDeadConstantUsers() const
If there are any dead constant users dangling off of this constant, remove them.
Scope
Defines the scope in which this symbol should be visible: Default – Visible in the public interface o...
Class to represent struct types.
StringRef - Represent a constant reference to a string, i.e.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
static MDNode * getMostGenericAliasScope(MDNode *A, MDNode *B)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< bool > SuperAlignLDSGlobals("amdgpu-super-align-lds-globals", cl::desc("Increase alignment of LDS if it is not on align boundary"), cl::init(true), cl::Hidden)
std::pair< uint64_t, Align > performOptimizedStructLayout(MutableArrayRef< OptimizedStructLayoutField > Fields)
Compute a layout for a struct containing the given fields, making a best-effort attempt to minimize t...
static const unsigned MaxDepth
void initializeAMDGPULowerModuleLDSPass(PassRegistry &)
Align getAlign(DataLayout const &DL, const GlobalVariable *GV)
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Align commonAlignment(Align A, Align B)
Returns the alignment that satisfies both alignments.
A constant value that is initialized with an expression using other constant values.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
uint64_t value() const
This is a hole in the type system and should not be abused.
constexpr unsigned BitWidth
static Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static Constant * getGetElementPtr(Type *Ty, Constant *C, ArrayRef< Constant * > IdxList, bool InBounds=false, Optional< unsigned > InRangeIndex=None, Type *OnlyIfReducedTy=nullptr)
Getelementptr form.
instcombine should handle this transform
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
static Type * getVoidTy(LLVMContext &C)
static Constant * getPointerBitCastOrAddrSpaceCast(Constant *C, Type *Ty)
Create a BitCast or AddrSpaceCast for a pointer type depending on the address space.
PointerType * getType() const
Global values are always pointers.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
A container for analyses that lazily runs them and caches their results.
Type * getValueType() const
void replaceUsesWithIf(Value *New, llvm::function_ref< bool(Use &U)> ShouldReplace)
Go through the uses list for this definition and make each use point to "V" if the callback ShouldRep...
void setAlignment(MaybeAlign Align)
void reserve(size_type N)
bool isKernelCC(const Function *Func)
LLVM Value Representation.
iterator_range< user_iterator > users()
Class to represent function types.
A Use represents the edge between a Value definition and its users.
reference emplace_back(ArgTypes &&... Args)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.