197#include "llvm/IR/IntrinsicsAMDGPU.h"
214#define DEBUG_TYPE "amdgpu-lower-module-lds"
222 "amdgpu-super-align-lds-globals",
223 cl::desc(
"Increase alignment of LDS if it is not on align boundary"),
226enum class LoweringKind { module, table, kernel, hybrid };
228 "amdgpu-lower-module-lds-strategy",
232 clEnumValN(LoweringKind::table,
"table",
"Lower via table lookup"),
233 clEnumValN(LoweringKind::module,
"module",
"Lower via module struct"),
235 LoweringKind::kernel,
"kernel",
236 "Lower variables reachable from one kernel, otherwise abort"),
238 "Lower via mixture of above strategies")));
240template <
typename T> std::vector<T> sortByName(std::vector<T> &&V) {
241 llvm::sort(V, [](
const auto *L,
const auto *R) {
242 return L->getName() < R->getName();
244 return {std::move(V)};
247class AMDGPULowerModuleLDS {
251 removeLocalVarsFromUsedLists(
Module &M,
263 LocalVar->removeDeadConstantUsers();
288 IRBuilder<> Builder(Entry, Entry->getFirstNonPHIIt());
291 Func->getParent(), Intrinsic::donothing, {});
293 Value *UseInstance[1] = {
294 Builder.CreateConstInBoundsGEP1_32(SGV->
getValueType(), SGV, 0)};
303 struct LDSVariableReplacement {
313 static Constant *getAddressesOfVariablesInKernel(
327 auto ConstantGepIt = LDSVarsToConstantGEP.
find(GV);
328 if (ConstantGepIt != LDSVarsToConstantGEP.
end()) {
330 Elements.push_back(elt);
342 if (Variables.
empty()) {
347 const size_t NumberVariables = Variables.
size();
348 const size_t NumberKernels = kernels.
size();
357 std::vector<Constant *> overallConstantExprElts(NumberKernels);
358 for (
size_t i = 0; i < NumberKernels; i++) {
359 auto Replacement = KernelToReplacement.
find(kernels[i]);
360 overallConstantExprElts[i] =
361 (Replacement == KernelToReplacement.
end())
363 : getAddressesOfVariablesInKernel(
364 Ctx, Variables, Replacement->second.LDSVarsToConstantGEP);
379 Value *OptionalIndex) {
385 Value *tableKernelIndex = getTableLookupKernelIndex(M,
I->getFunction());
391 Builder.SetInsertPoint(
I);
395 ConstantInt::get(I32, 0),
401 Value *Address = Builder.CreateInBoundsGEP(
402 LookupTable->getValueType(), LookupTable, GEPIdx, GV->
getName());
404 Value *loaded = Builder.CreateLoad(I32, Address);
412 void replaceUsesInInstructionsWithTableLookup(
420 for (
size_t Index = 0; Index < ModuleScopeVariables.
size(); Index++) {
421 auto *GV = ModuleScopeVariables[Index];
428 replaceUseWithTableLookup(M, Builder, LookupTable, GV, U,
429 ConstantInt::get(I32, Index));
440 if (VariableSet.
empty())
443 for (
Function &Func : M.functions()) {
458 chooseBestVariableForModuleStrategy(
const DataLayout &
DL,
464 size_t UserCount = 0;
467 CandidateTy() =
default;
470 : GV(GV), UserCount(UserCount),
Size(AllocSize) {}
474 if (UserCount <
Other.UserCount) {
477 if (UserCount >
Other.UserCount) {
495 CandidateTy MostUsed;
497 for (
auto &K : LDSVars) {
499 if (K.second.size() <= 1) {
504 CandidateTy Candidate(
507 if (MostUsed < Candidate)
508 MostUsed = Candidate;
532 auto [It, Inserted] = tableKernelIndexCache.
try_emplace(
F);
534 auto InsertAt =
F->getEntryBlock().getFirstNonPHIOrDbgOrAlloca();
537 It->second = Builder.CreateIntrinsic(Intrinsic::amdgcn_lds_kernel_id, {});
543 static std::vector<Function *> assignLDSKernelIDToEachKernel(
551 std::vector<Function *> OrderedKernels;
552 if (!KernelsThatAllocateTableLDS.
empty() ||
553 !KernelsThatIndirectlyAllocateDynamicLDS.
empty()) {
555 for (
Function &Func : M->functions()) {
556 if (Func.isDeclaration())
561 if (KernelsThatAllocateTableLDS.
contains(&Func) ||
562 KernelsThatIndirectlyAllocateDynamicLDS.
contains(&Func)) {
564 OrderedKernels.push_back(&Func);
569 OrderedKernels = sortByName(std::move(OrderedKernels));
575 if (OrderedKernels.size() > UINT32_MAX) {
580 for (
size_t i = 0; i < OrderedKernels.size(); i++) {
584 OrderedKernels[i]->setMetadata(
"llvm.amdgcn.lds.kernel.id",
588 return OrderedKernels;
591 static void partitionVariablesIntoIndirectStrategies(
600 LoweringKindLoc != LoweringKind::hybrid
602 : chooseBestVariableForModuleStrategy(
603 M.getDataLayout(), LDSToKernelsThatNeedToAccessItIndirectly);
608 ? LDSToKernelsThatNeedToAccessItIndirectly[HybridModuleRoot]
611 for (
auto &K : LDSToKernelsThatNeedToAccessItIndirectly) {
617 assert(K.second.size() != 0);
620 DynamicVariables.
insert(GV);
624 switch (LoweringKindLoc) {
625 case LoweringKind::module:
626 ModuleScopeVariables.insert(GV);
629 case LoweringKind::table:
630 TableLookupVariables.
insert(GV);
633 case LoweringKind::kernel:
634 if (K.second.size() == 1) {
635 KernelAccessVariables.
insert(GV);
639 "cannot lower LDS '" + GV->
getName() +
640 "' to kernel access as it is reachable from multiple kernels");
644 case LoweringKind::hybrid: {
645 if (GV == HybridModuleRoot) {
646 assert(K.second.size() != 1);
647 ModuleScopeVariables.insert(GV);
648 }
else if (K.second.size() == 1) {
649 KernelAccessVariables.
insert(GV);
650 }
else if (K.second == HybridModuleRootKernels) {
651 ModuleScopeVariables.insert(GV);
653 TableLookupVariables.
insert(GV);
662 assert(ModuleScopeVariables.
size() + TableLookupVariables.
size() +
663 KernelAccessVariables.
size() + DynamicVariables.
size() ==
664 LDSToKernelsThatNeedToAccessItIndirectly.size());
677 if (ModuleScopeVariables.
empty()) {
683 LDSVariableReplacement ModuleScopeReplacement =
684 createLDSVariableReplacement(M,
"llvm.amdgcn.module.lds",
685 ModuleScopeVariables);
693 recordLDSAbsoluteAddress(&M, ModuleScopeReplacement.SGV, 0);
696 removeLocalVarsFromUsedLists(M, ModuleScopeVariables);
699 replaceLDSVariablesWithStruct(
700 M, ModuleScopeVariables, ModuleScopeReplacement, [&](
Use &U) {
713 for (
Function &Func : M.functions()) {
717 if (KernelsThatAllocateModuleLDS.
contains(&Func)) {
718 replaceLDSVariablesWithStruct(
719 M, ModuleScopeVariables, ModuleScopeReplacement, [&](
Use &U) {
728 markUsedByKernel(&Func, ModuleScopeReplacement.SGV);
732 return ModuleScopeReplacement.SGV;
736 lowerKernelScopeStructVariables(
745 for (
Function &Func : M.functions()) {
754 KernelUsedVariables.
insert(v);
762 KernelUsedVariables.
insert(v);
768 if (KernelsThatAllocateModuleLDS.
contains(&Func)) {
770 KernelUsedVariables.
erase(v);
774 if (KernelUsedVariables.
empty()) {
786 if (!Func.hasName()) {
790 std::string VarName =
791 (
Twine(
"llvm.amdgcn.kernel.") + Func.getName() +
".lds").str();
794 createLDSVariableReplacement(M, VarName, KernelUsedVariables);
802 markUsedByKernel(&Func, Replacement.SGV);
805 removeLocalVarsFromUsedLists(M, KernelUsedVariables);
806 KernelToReplacement[&Func] = Replacement;
809 replaceLDSVariablesWithStruct(
810 M, KernelUsedVariables, Replacement, [&Func](
Use &U) {
812 return I &&
I->getFunction() == &Func;
815 return KernelToReplacement;
835 Align MaxDynamicAlignment(1);
839 MaxDynamicAlignment =
845 UpdateMaxAlignment(GV);
849 UpdateMaxAlignment(GV);
858 N->setAlignment(MaxDynamicAlignment);
868 std::vector<Function *>
const &OrderedKernels) {
870 if (!KernelsThatIndirectlyAllocateDynamicLDS.
empty()) {
875 std::vector<Constant *> newDynamicLDS;
878 for (
auto &func : OrderedKernels) {
880 if (KernelsThatIndirectlyAllocateDynamicLDS.
contains(func)) {
887 buildRepresentativeDynamicLDSInstance(M, LDSUsesInfo, func);
889 KernelToCreatedDynamicLDS[func] =
N;
891 markUsedByKernel(func,
N);
895 emptyCharArray,
N, ConstantInt::get(I32, 0),
true);
901 assert(OrderedKernels.size() == newDynamicLDS.size());
907 "llvm.amdgcn.dynlds.offset.table",
nullptr,
918 replaceUseWithTableLookup(M, Builder, table, GV, U,
nullptr);
922 return KernelToCreatedDynamicLDS;
925 bool runOnModule(
Module &M) {
927 bool Changed = superAlignLDSGlobals(M);
943 LDSToKernelsThatNeedToAccessItIndirectly[GV].insert(
F);
952 partitionVariablesIntoIndirectStrategies(
953 M, LDSUsesInfo, LDSToKernelsThatNeedToAccessItIndirectly,
954 ModuleScopeVariables, TableLookupVariables, KernelAccessVariables,
961 kernelsThatIndirectlyAccessAnyOfPassedVariables(M, LDSUsesInfo,
962 ModuleScopeVariables);
964 kernelsThatIndirectlyAccessAnyOfPassedVariables(M, LDSUsesInfo,
965 TableLookupVariables);
968 kernelsThatIndirectlyAccessAnyOfPassedVariables(M, LDSUsesInfo,
971 GlobalVariable *MaybeModuleScopeStruct = lowerModuleScopeStructVariables(
972 M, ModuleScopeVariables, KernelsThatAllocateModuleLDS);
975 lowerKernelScopeStructVariables(M, LDSUsesInfo, ModuleScopeVariables,
976 KernelsThatAllocateModuleLDS,
977 MaybeModuleScopeStruct);
980 for (
auto &GV : KernelAccessVariables) {
981 auto &funcs = LDSToKernelsThatNeedToAccessItIndirectly[GV];
982 assert(funcs.size() == 1);
983 LDSVariableReplacement Replacement =
984 KernelToReplacement[*(funcs.begin())];
989 replaceLDSVariablesWithStruct(M, Vec, Replacement, [](
Use &U) {
995 std::vector<Function *> OrderedKernels =
996 assignLDSKernelIDToEachKernel(&M, KernelsThatAllocateTableLDS,
997 KernelsThatIndirectlyAllocateDynamicLDS);
999 if (!KernelsThatAllocateTableLDS.
empty()) {
1005 auto TableLookupVariablesOrdered =
1006 sortByName(std::vector<GlobalVariable *>(TableLookupVariables.
begin(),
1007 TableLookupVariables.
end()));
1010 M, TableLookupVariablesOrdered, OrderedKernels, KernelToReplacement);
1011 replaceUsesInInstructionsWithTableLookup(M, TableLookupVariablesOrdered,
1016 lowerDynamicLDSVariables(M, LDSUsesInfo,
1017 KernelsThatIndirectlyAllocateDynamicLDS,
1018 DynamicVariables, OrderedKernels);
1023 for (
auto *KernelSet : {&KernelsThatIndirectlyAllocateDynamicLDS,
1024 &KernelsThatAllocateTableLDS})
1033 for (
Function &Func : M.functions()) {
1048 const bool AllocateModuleScopeStruct =
1049 MaybeModuleScopeStruct &&
1050 KernelsThatAllocateModuleLDS.
contains(&Func);
1052 auto Replacement = KernelToReplacement.
find(&Func);
1053 const bool AllocateKernelScopeStruct =
1054 Replacement != KernelToReplacement.
end();
1056 const bool AllocateDynamicVariable =
1057 KernelToCreatedDynamicLDS.
contains(&Func);
1061 if (AllocateModuleScopeStruct) {
1067 if (AllocateKernelScopeStruct) {
1070 recordLDSAbsoluteAddress(&M, KernelStruct,
Offset);
1078 if (AllocateDynamicVariable) {
1079 GlobalVariable *DynamicVariable = KernelToCreatedDynamicLDS[&Func];
1081 recordLDSAbsoluteAddress(&M, DynamicVariable,
Offset);
1096 if (AllocateDynamicVariable)
1099 Func.addFnAttr(
"amdgpu-lds-size", Buffer);
1118 static bool superAlignLDSGlobals(
Module &M) {
1121 if (!SuperAlignLDSGlobals) {
1125 for (
auto &GV : M.globals()) {
1145 Alignment = std::max(Alignment,
Align(16));
1146 }
else if (GVSize > 4) {
1148 Alignment = std::max(Alignment,
Align(8));
1149 }
else if (GVSize > 2) {
1151 Alignment = std::max(Alignment,
Align(4));
1152 }
else if (GVSize > 1) {
1154 Alignment = std::max(Alignment,
Align(2));
1165 static LDSVariableReplacement createLDSVariableReplacement(
1166 Module &M, std::string VarName,
1183 auto Sorted = sortByName(std::vector<GlobalVariable *>(
1184 LDSVarsToTransform.
begin(), LDSVarsToTransform.
end()));
1196 std::vector<GlobalVariable *> LocalVars;
1198 LocalVars.reserve(LDSVarsToTransform.
size());
1199 IsPaddingField.
reserve(LDSVarsToTransform.
size());
1202 for (
auto &
F : LayoutFields) {
1205 Align DataAlign =
F.Alignment;
1208 if (
uint64_t Rem = CurrentOffset % DataAlignV) {
1209 uint64_t Padding = DataAlignV - Rem;
1221 CurrentOffset += Padding;
1224 LocalVars.push_back(FGV);
1226 CurrentOffset +=
F.Size;
1230 std::vector<Type *> LocalVarTypes;
1231 LocalVarTypes.reserve(LocalVars.size());
1233 LocalVars.cbegin(), LocalVars.cend(), std::back_inserter(LocalVarTypes),
1248 for (
size_t I = 0;
I < LocalVars.size();
I++) {
1250 Constant *GEPIdx[] = {ConstantInt::get(I32, 0), ConstantInt::get(I32,
I)};
1252 if (IsPaddingField[
I]) {
1259 assert(Map.size() == LDSVarsToTransform.
size());
1260 return {SGV, std::move(Map)};
1263 template <
typename PredicateTy>
1264 static void replaceLDSVariablesWithStruct(
1266 const LDSVariableReplacement &Replacement, PredicateTy
Predicate) {
1273 auto LDSVarsToTransform = sortByName(std::vector<GlobalVariable *>(
1274 LDSVarsToTransformArg.
begin(), LDSVarsToTransformArg.
end()));
1280 const size_t NumberVars = LDSVarsToTransform.
size();
1281 if (NumberVars > 1) {
1283 AliasScopes.
reserve(NumberVars);
1285 for (
size_t I = 0;
I < NumberVars;
I++) {
1289 NoAliasList.
append(&AliasScopes[1], AliasScopes.
end());
1294 for (
size_t I = 0;
I < NumberVars;
I++) {
1296 Constant *
GEP = Replacement.LDSVarsToConstantGEP.at(GV);
1300 APInt APOff(
DL.getIndexTypeSizeInBits(
GEP->getType()), 0);
1301 GEP->stripAndAccumulateInBoundsConstantOffsets(
DL, APOff);
1308 NoAliasList[
I - 1] = AliasScopes[
I - 1];
1314 refineUsesAlignmentAndAA(
GEP,
A,
DL, AliasScope, NoAlias);
1318 static void refineUsesAlignmentAndAA(
Value *Ptr,
Align A,
1320 MDNode *NoAlias,
unsigned MaxDepth = 5) {
1321 if (!MaxDepth || (
A == 1 && !AliasScope))
1328 if (AliasScope &&
I->mayReadOrWriteMemory()) {
1329 MDNode *AS =
I->getMetadata(LLVMContext::MD_alias_scope);
1332 I->setMetadata(LLVMContext::MD_alias_scope, AS);
1334 MDNode *NA =
I->getMetadata(LLVMContext::MD_noalias);
1358 if (Intersection.empty()) {
1363 I->setMetadata(LLVMContext::MD_noalias, NA);
1368 LI->setAlignment(std::max(
A, LI->getAlign()));
1372 if (
SI->getPointerOperand() == Ptr)
1373 SI->setAlignment(std::max(
A,
SI->getAlign()));
1379 if (AI->getPointerOperand() == Ptr)
1380 AI->setAlignment(std::max(
A, AI->getAlign()));
1384 if (AI->getPointerOperand() == Ptr)
1385 AI->setAlignment(std::max(
A, AI->getAlign()));
1389 unsigned BitWidth =
DL.getIndexTypeSizeInBits(
GEP->getType());
1391 if (
GEP->getPointerOperand() == Ptr) {
1393 if (
GEP->accumulateConstantOffset(
DL, Off))
1395 refineUsesAlignmentAndAA(
GEP, GA,
DL, AliasScope, NoAlias,
1401 if (
I->getOpcode() == Instruction::BitCast ||
1402 I->getOpcode() == Instruction::AddrSpaceCast)
1403 refineUsesAlignmentAndAA(
I,
A,
DL, AliasScope, NoAlias, MaxDepth - 1);
1409class AMDGPULowerModuleLDSLegacy :
public ModulePass {
1422 bool runOnModule(
Module &M)
override {
1424 auto &TPC = getAnalysis<TargetPassConfig>();
1433char AMDGPULowerModuleLDSLegacy::ID = 0;
1438 "Lower uses of LDS variables from non-kernel functions",
1442 "Lower uses of LDS variables from non-kernel functions",
1447 return new AMDGPULowerModuleLDSLegacy(TM);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
The AMDGPU TargetMachine interface definition for hw codegen targets.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file implements the BitVector class.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
This file provides interfaces used to build and manipulate a call graph, which is a very useful tool ...
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
This file contains the declarations for the subclasses of Constant, which represent the different fla...
DXIL Forward Handle Accesses
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
This file provides an interface for laying out a sequence of fields as a struct in a way that attempt...
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This is the interface for a metadata-based scoped no-alias analysis.
This file defines generic set operations that may be used on set's of different types,...
Target-Independent Code Generator Pass Configuration Options pass.
Class for arbitrary precision integers.
uint64_t getZExtValue() const
Get zero extended value.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
LLVM Basic Block Representation.
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
The basic data container for the call graph of a Module of IR.
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static LLVM_ABI Constant * getPointerBitCastOrAddrSpaceCast(Constant *C, Type *Ty)
Create a BitCast or AddrSpaceCast for a pointer type depending on the address space.
static LLVM_ABI Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static Constant * getGetElementPtr(Type *Ty, Constant *C, ArrayRef< Constant * > IdxList, GEPNoWrapFlags NW=GEPNoWrapFlags::none(), std::optional< ConstantRange > InRange=std::nullopt, Type *OnlyIfReducedTy=nullptr)
Getelementptr form.
This is an important base class in LLVM.
LLVM_ABI void removeDeadConstantUsers() const
If there are any dead constant users dangling off of this constant, remove them.
A parsed version of the target data layout string in and methods for querying it.
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Implements a dense probed hash-table based set.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set a particular kind of metadata attachment.
LLVM_ABI bool isAbsoluteSymbolRef() const
Returns whether this is a reference to an absolute symbol.
PointerType * getType() const
Global values are always pointers.
@ InternalLinkage
Rename collisions when linking (static functions).
@ ExternalLinkage
Externally visible function.
Type * getValueType() const
bool hasInitializer() const
Definitions have initializers, declarations don't.
LLVM_ABI void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
void setAlignment(Align Align)
Sets the alignment attribute of the GlobalVariable.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
bool runOnModule(Module &) override
ImmutablePasses are never run.
This is an important class for using LLVM in a threaded context.
MDNode * createAnonymousAliasScope(MDNode *Domain, StringRef Name=StringRef())
Return metadata appropriate for an alias scope root node.
MDNode * createAnonymousAliasScopeDomain(StringRef Name=StringRef())
Return metadata appropriate for an alias scope domain node.
static LLVM_ABI MDNode * getMostGenericAliasScope(MDNode *A, MDNode *B)
static LLVM_ABI MDNode * concatenate(MDNode *A, MDNode *B)
Methods for metadata merging.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
static LLVM_ABI MDNode * intersect(MDNode *A, MDNode *B)
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
A Module instance is used to store all the information related to an LLVM module.
A container for an operand bundle being viewed as a set of values rather than a set of uses.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
A simple AA result which uses scoped-noalias metadata to answer queries.
static LLVM_ABI void collectScopedDomains(const MDNode *NoAlias, SmallPtrSetImpl< const MDNode * > &Domains)
Collect the set of scoped domains relevant to the noalias scopes.
bool insert(const value_type &X)
Insert a new element into the SetVector.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Class to represent struct types.
static LLVM_ABI StructType * create(LLVMContext &Context, StringRef Name)
This creates an identified struct.
Target-Independent Code Generator Pass Configuration Options.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
iterator_range< user_iterator > users()
LLVM_ABI void replaceUsesWithIf(Value *New, llvm::function_ref< bool(Use &U)> ShouldReplace)
Go through the uses list for this definition and make each use point to "V" if the callback ShouldRep...
iterator_range< use_iterator > uses()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
std::pair< iterator, bool > insert(const ValueT &V)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
bool erase(const ValueT &V)
A raw_ostream that writes to an std::string.
@ LOCAL_ADDRESS
Address space for local memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
bool isDynamicLDS(const GlobalVariable &GV)
void removeFnAttrFromReachable(CallGraph &CG, Function *KernelRoot, ArrayRef< StringRef > FnAttrs)
Strip FnAttr attribute from any functions where we may have introduced its use.
LDSUsesInfoTy getTransitiveUsesOfLDS(const CallGraph &CG, Module &M)
bool isLDSVariableToLower(const GlobalVariable &GV)
bool eliminateConstantExprUsesOfLDSFromAllInstructions(Module &M)
Align getAlign(const DataLayout &DL, const GlobalVariable *GV)
DenseMap< GlobalVariable *, DenseSet< Function * > > VariableFunctionMap
bool isKernelLDS(const Function *F)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
bool operator<(int64_t V1, const APSInt &V2)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
void sort(IteratorTy Start, IteratorTy End)
char & AMDGPULowerModuleLDSLegacyPassID
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
S1Ty set_intersection(const S1Ty &S1, const S2Ty &S2)
set_intersection(A, B) - Return A ^ B
LLVM_ABI void removeFromUsedLists(Module &M, function_ref< bool(Constant *)> ShouldRemove)
Removes global values from the llvm.used and llvm.compiler.used arrays.
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
ModulePass * createAMDGPULowerModuleLDSLegacyPass(const AMDGPUTargetMachine *TM=nullptr)
LLVM_ABI void appendToCompilerUsed(Module &M, ArrayRef< GlobalValue * > Values)
Adds global values to the llvm.compiler.used list.
LLVM_ABI std::pair< uint64_t, Align > performOptimizedStructLayout(MutableArrayRef< OptimizedStructLayoutField > Fields)
Compute a layout for a struct containing the given fields, making a best-effort attempt to minimize t...
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
const AMDGPUTargetMachine & TM
FunctionVariableMap direct_access
FunctionVariableMap indirect_access
This struct is a compact representation of a valid (non-zero power of two) alignment.
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.