LLVM  15.0.0git
AMDGPULowerModuleLDSPass.cpp
Go to the documentation of this file.
1 //===-- AMDGPULowerModuleLDSPass.cpp ------------------------------*- C++ -*-=//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass eliminates LDS uses from non-kernel functions.
10 //
11 // The strategy is to create a new struct with a field for each LDS variable
12 // and allocate that struct at the same address for every kernel. Uses of the
13 // original LDS variables are then replaced with compile time offsets from that
14 // known address. AMDGPUMachineFunction allocates the LDS global.
15 //
16 // Local variables with constant annotation or non-undef initializer are passed
17 // through unchanged for simplification or error diagnostics in later passes.
18 //
19 // To reduce the memory overhead variables that are only used by kernels are
20 // excluded from this transform. The analysis to determine whether a variable
21 // is only used by a kernel is cheap and conservative so this may allocate
22 // a variable in every kernel when it was not strictly necessary to do so.
23 //
24 // A possible future refinement is to specialise the structure per-kernel, so
25 // that fields can be elided based on more expensive analysis.
26 //
27 //===----------------------------------------------------------------------===//
28 
29 #include "AMDGPU.h"
30 #include "Utils/AMDGPUBaseInfo.h"
32 #include "llvm/ADT/STLExtras.h"
34 #include "llvm/IR/Constants.h"
35 #include "llvm/IR/DerivedTypes.h"
36 #include "llvm/IR/IRBuilder.h"
37 #include "llvm/IR/InlineAsm.h"
38 #include "llvm/IR/Instructions.h"
39 #include "llvm/IR/MDBuilder.h"
40 #include "llvm/InitializePasses.h"
41 #include "llvm/Pass.h"
43 #include "llvm/Support/Debug.h"
46 #include <vector>
47 
48 #define DEBUG_TYPE "amdgpu-lower-module-lds"
49 
50 using namespace llvm;
51 
53  "amdgpu-super-align-lds-globals",
54  cl::desc("Increase alignment of LDS if it is not on align boundary"),
55  cl::init(true), cl::Hidden);
56 
57 namespace {
58 
59 SmallPtrSet<GlobalValue *, 32> getUsedList(Module &M) {
61 
63  collectUsedGlobalVariables(M, TmpVec, true);
64  UsedList.insert(TmpVec.begin(), TmpVec.end());
65 
66  TmpVec.clear();
67  collectUsedGlobalVariables(M, TmpVec, false);
68  UsedList.insert(TmpVec.begin(), TmpVec.end());
69 
70  return UsedList;
71 }
72 
73 class AMDGPULowerModuleLDS : public ModulePass {
74 
75  static void removeFromUsedList(Module &M, StringRef Name,
77  GlobalVariable *GV = M.getNamedGlobal(Name);
78  if (!GV || ToRemove.empty()) {
79  return;
80  }
81 
83  auto *CA = cast<ConstantArray>(GV->getInitializer());
84  for (auto &Op : CA->operands()) {
85  // ModuleUtils::appendToUsed only inserts Constants
86  Constant *C = cast<Constant>(Op);
87  if (!ToRemove.contains(C->stripPointerCasts())) {
88  Init.push_back(C);
89  }
90  }
91 
92  if (Init.size() == CA->getNumOperands()) {
93  return; // none to remove
94  }
95 
96  GV->eraseFromParent();
97 
98  for (Constant *C : ToRemove) {
99  C->removeDeadConstantUsers();
100  }
101 
102  if (!Init.empty()) {
103  ArrayType *ATy =
104  ArrayType::get(Type::getInt8PtrTy(M.getContext()), Init.size());
105  GV =
107  ConstantArray::get(ATy, Init), Name);
108  GV->setSection("llvm.metadata");
109  }
110  }
111 
112  static void
113  removeFromUsedLists(Module &M,
114  const std::vector<GlobalVariable *> &LocalVars) {
115  SmallPtrSet<Constant *, 32> LocalVarsSet;
116  for (GlobalVariable *LocalVar : LocalVars)
117  if (Constant *C = dyn_cast<Constant>(LocalVar->stripPointerCasts()))
118  LocalVarsSet.insert(C);
119  removeFromUsedList(M, "llvm.used", LocalVarsSet);
120  removeFromUsedList(M, "llvm.compiler.used", LocalVarsSet);
121  }
122 
123  static void markUsedByKernel(IRBuilder<> &Builder, Function *Func,
124  GlobalVariable *SGV) {
125  // The llvm.amdgcn.module.lds instance is implicitly used by all kernels
126  // that might call a function which accesses a field within it. This is
127  // presently approximated to 'all kernels' if there are any such functions
128  // in the module. This implicit use is redefined as an explicit use here so
129  // that later passes, specifically PromoteAlloca, account for the required
130  // memory without any knowledge of this transform.
131 
132  // An operand bundle on llvm.donothing works because the call instruction
133  // survives until after the last pass that needs to account for LDS. It is
134  // better than inline asm as the latter survives until the end of codegen. A
135  // totally robust solution would be a function with the same semantics as
136  // llvm.donothing that takes a pointer to the instance and is lowered to a
137  // no-op after LDS is allocated, but that is not presently necessary.
138 
139  LLVMContext &Ctx = Func->getContext();
140 
141  Builder.SetInsertPoint(Func->getEntryBlock().getFirstNonPHI());
142 
144 
145  Function *Decl =
146  Intrinsic::getDeclaration(Func->getParent(), Intrinsic::donothing, {});
147 
148  Value *UseInstance[1] = {Builder.CreateInBoundsGEP(
149  SGV->getValueType(), SGV, ConstantInt::get(Type::getInt32Ty(Ctx), 0))};
150 
151  Builder.CreateCall(FTy, Decl, {},
152  {OperandBundleDefT<Value *>("ExplicitUse", UseInstance)},
153  "");
154  }
155 
156 private:
158 
159 public:
160  static char ID;
161 
162  AMDGPULowerModuleLDS() : ModulePass(ID) {
164  }
165 
166  bool runOnModule(Module &M) override {
167  CallGraph CG = CallGraph(M);
168  UsedList = getUsedList(M);
169  bool Changed = superAlignLDSGlobals(M);
170  Changed |= processUsedLDS(CG, M);
171 
172  for (Function &F : M.functions()) {
173  if (F.isDeclaration())
174  continue;
175 
176  // Only lower compute kernels' LDS.
177  if (!AMDGPU::isKernel(F.getCallingConv()))
178  continue;
179  Changed |= processUsedLDS(CG, M, &F);
180  }
181 
182  UsedList.clear();
183  return Changed;
184  }
185 
186 private:
187  // Increase the alignment of LDS globals if necessary to maximise the chance
188  // that we can use aligned LDS instructions to access them.
189  static bool superAlignLDSGlobals(Module &M) {
190  const DataLayout &DL = M.getDataLayout();
191  bool Changed = false;
192  if (!SuperAlignLDSGlobals) {
193  return Changed;
194  }
195 
196  for (auto &GV : M.globals()) {
198  // Only changing alignment of LDS variables
199  continue;
200  }
201  if (!GV.hasInitializer()) {
202  // cuda/hip extern __shared__ variable, leave alignment alone
203  continue;
204  }
205 
207  TypeSize GVSize = DL.getTypeAllocSize(GV.getValueType());
208 
209  if (GVSize > 8) {
210  // We might want to use a b96 or b128 load/store
211  Alignment = std::max(Alignment, Align(16));
212  } else if (GVSize > 4) {
213  // We might want to use a b64 load/store
214  Alignment = std::max(Alignment, Align(8));
215  } else if (GVSize > 2) {
216  // We might want to use a b32 load/store
217  Alignment = std::max(Alignment, Align(4));
218  } else if (GVSize > 1) {
219  // We might want to use a b16 load/store
220  Alignment = std::max(Alignment, Align(2));
221  }
222 
223  if (Alignment != AMDGPU::getAlign(DL, &GV)) {
224  Changed = true;
225  GV.setAlignment(Alignment);
226  }
227  }
228  return Changed;
229  }
230 
231  bool processUsedLDS(CallGraph const &CG, Module &M, Function *F = nullptr) {
232  LLVMContext &Ctx = M.getContext();
233  const DataLayout &DL = M.getDataLayout();
234 
235  // Find variables to move into new struct instance
236  std::vector<GlobalVariable *> FoundLocalVars =
238 
239  if (FoundLocalVars.empty()) {
240  // No variables to rewrite, no changes made.
241  return false;
242  }
243 
245  LayoutFields.reserve(FoundLocalVars.size());
246  for (GlobalVariable *GV : FoundLocalVars) {
247  OptimizedStructLayoutField F(GV, DL.getTypeAllocSize(GV->getValueType()),
248  AMDGPU::getAlign(DL, GV));
249  LayoutFields.emplace_back(F);
250  }
251 
252  performOptimizedStructLayout(LayoutFields);
253 
254  std::vector<GlobalVariable *> LocalVars;
255  LocalVars.reserve(FoundLocalVars.size()); // will be at least this large
256  {
257  // This usually won't need to insert any padding, perhaps avoid the alloc
258  uint64_t CurrentOffset = 0;
259  for (size_t I = 0; I < LayoutFields.size(); I++) {
260  GlobalVariable *FGV = static_cast<GlobalVariable *>(
261  const_cast<void *>(LayoutFields[I].Id));
262  Align DataAlign = LayoutFields[I].Alignment;
263 
264  uint64_t DataAlignV = DataAlign.value();
265  if (uint64_t Rem = CurrentOffset % DataAlignV) {
266  uint64_t Padding = DataAlignV - Rem;
267 
268  // Append an array of padding bytes to meet alignment requested
269  // Note (o + (a - (o % a)) ) % a == 0
270  // (offset + Padding ) % align == 0
271 
272  Type *ATy = ArrayType::get(Type::getInt8Ty(Ctx), Padding);
273  LocalVars.push_back(new GlobalVariable(
274  M, ATy, false, GlobalValue::InternalLinkage, UndefValue::get(ATy),
276  false));
277  CurrentOffset += Padding;
278  }
279 
280  LocalVars.push_back(FGV);
281  CurrentOffset += LayoutFields[I].Size;
282  }
283  }
284 
285  std::vector<Type *> LocalVarTypes;
286  LocalVarTypes.reserve(LocalVars.size());
288  LocalVars.cbegin(), LocalVars.cend(), std::back_inserter(LocalVarTypes),
289  [](const GlobalVariable *V) -> Type * { return V->getValueType(); });
290 
291  std::string VarName(
292  F ? (Twine("llvm.amdgcn.kernel.") + F->getName() + ".lds").str()
293  : "llvm.amdgcn.module.lds");
294  StructType *LDSTy = StructType::create(Ctx, LocalVarTypes, VarName + ".t");
295 
296  Align StructAlign =
297  AMDGPU::getAlign(DL, LocalVars[0]);
298 
299  GlobalVariable *SGV = new GlobalVariable(
300  M, LDSTy, false, GlobalValue::InternalLinkage, UndefValue::get(LDSTy),
302  false);
303  SGV->setAlignment(StructAlign);
304  if (!F) {
306  M, {static_cast<GlobalValue *>(
308  cast<Constant>(SGV), Type::getInt8PtrTy(Ctx)))});
309  }
310 
311  // The verifier rejects used lists containing an inttoptr of a constant
312  // so remove the variables from these lists before replaceAllUsesWith
313  removeFromUsedLists(M, LocalVars);
314 
315  // Create alias.scope and their lists. Each field in the new structure
316  // does not alias with all other fields.
317  SmallVector<MDNode *> AliasScopes;
318  SmallVector<Metadata *> NoAliasList;
319  if (LocalVars.size() > 1) {
320  MDBuilder MDB(Ctx);
321  AliasScopes.reserve(LocalVars.size());
322  MDNode *Domain = MDB.createAnonymousAliasScopeDomain();
323  for (size_t I = 0; I < LocalVars.size(); I++) {
324  MDNode *Scope = MDB.createAnonymousAliasScope(Domain);
325  AliasScopes.push_back(Scope);
326  }
327  NoAliasList.append(&AliasScopes[1], AliasScopes.end());
328  }
329 
330  // Replace uses of ith variable with a constantexpr to the ith field of the
331  // instance that will be allocated by AMDGPUMachineFunction
332  Type *I32 = Type::getInt32Ty(Ctx);
333  for (size_t I = 0; I < LocalVars.size(); I++) {
334  GlobalVariable *GV = LocalVars[I];
335  Constant *GEPIdx[] = {ConstantInt::get(I32, 0), ConstantInt::get(I32, I)};
336  Constant *GEP = ConstantExpr::getGetElementPtr(LDSTy, SGV, GEPIdx);
337  if (F) {
338  // Replace all constant uses with instructions if they belong to the
339  // current kernel.
340  for (User *U : make_early_inc_range(GV->users())) {
341  if (ConstantExpr *C = dyn_cast<ConstantExpr>(U))
343  }
344 
346 
347  GV->replaceUsesWithIf(GEP, [F](Use &U) {
348  Instruction *I = dyn_cast<Instruction>(U.getUser());
349  return I && I->getFunction() == F;
350  });
351  } else {
352  GV->replaceAllUsesWith(GEP);
353  }
354  if (GV->use_empty()) {
355  UsedList.erase(GV);
356  GV->eraseFromParent();
357  }
358 
359  uint64_t Off = DL.getStructLayout(LDSTy)->getElementOffset(I);
360  Align A = commonAlignment(StructAlign, Off);
361 
362  if (I)
363  NoAliasList[I - 1] = AliasScopes[I - 1];
364  MDNode *NoAlias =
365  NoAliasList.empty() ? nullptr : MDNode::get(Ctx, NoAliasList);
366  MDNode *AliasScope =
367  AliasScopes.empty() ? nullptr : MDNode::get(Ctx, {AliasScopes[I]});
368 
369  refineUsesAlignmentAndAA(GEP, A, DL, AliasScope, NoAlias);
370  }
371 
372  // This ensures the variable is allocated when called functions access it.
373  // It also lets other passes, specifically PromoteAlloca, accurately
374  // calculate how much LDS will be used by the kernel after lowering.
375  if (!F) {
376  IRBuilder<> Builder(Ctx);
377  for (Function &Func : M.functions()) {
378  if (!Func.isDeclaration() && AMDGPU::isKernelCC(&Func)) {
379  const CallGraphNode *N = CG[&Func];
380  const bool CalleesRequireModuleLDS = N->size() > 0;
381 
382  if (CalleesRequireModuleLDS) {
383  // If a function this kernel might call requires module LDS,
384  // annotate the kernel to let later passes know it will allocate
385  // this structure, even if not apparent from the IR.
386  markUsedByKernel(Builder, &Func, SGV);
387  } else {
388  // However if we are certain this kernel cannot call a function that
389  // requires module LDS, annotate the kernel so the backend can elide
390  // the allocation without repeating callgraph walks.
391  Func.addFnAttr("amdgpu-elide-module-lds");
392  }
393  }
394  }
395  }
396  return true;
397  }
398 
399  void refineUsesAlignmentAndAA(Value *Ptr, Align A, const DataLayout &DL,
400  MDNode *AliasScope, MDNode *NoAlias,
401  unsigned MaxDepth = 5) {
402  if (!MaxDepth || (A == 1 && !AliasScope))
403  return;
404 
405  for (User *U : Ptr->users()) {
406  if (auto *I = dyn_cast<Instruction>(U)) {
407  if (AliasScope && I->mayReadOrWriteMemory()) {
408  MDNode *AS = I->getMetadata(LLVMContext::MD_alias_scope);
409  AS = (AS ? MDNode::getMostGenericAliasScope(AS, AliasScope)
410  : AliasScope);
411  I->setMetadata(LLVMContext::MD_alias_scope, AS);
412 
413  MDNode *NA = I->getMetadata(LLVMContext::MD_noalias);
414  NA = (NA ? MDNode::intersect(NA, NoAlias) : NoAlias);
415  I->setMetadata(LLVMContext::MD_noalias, NA);
416  }
417  }
418 
419  if (auto *LI = dyn_cast<LoadInst>(U)) {
420  LI->setAlignment(std::max(A, LI->getAlign()));
421  continue;
422  }
423  if (auto *SI = dyn_cast<StoreInst>(U)) {
424  if (SI->getPointerOperand() == Ptr)
425  SI->setAlignment(std::max(A, SI->getAlign()));
426  continue;
427  }
428  if (auto *AI = dyn_cast<AtomicRMWInst>(U)) {
429  // None of atomicrmw operations can work on pointers, but let's
430  // check it anyway in case it will or we will process ConstantExpr.
431  if (AI->getPointerOperand() == Ptr)
432  AI->setAlignment(std::max(A, AI->getAlign()));
433  continue;
434  }
435  if (auto *AI = dyn_cast<AtomicCmpXchgInst>(U)) {
436  if (AI->getPointerOperand() == Ptr)
437  AI->setAlignment(std::max(A, AI->getAlign()));
438  continue;
439  }
440  if (auto *GEP = dyn_cast<GetElementPtrInst>(U)) {
441  unsigned BitWidth = DL.getIndexTypeSizeInBits(GEP->getType());
442  APInt Off(BitWidth, 0);
443  if (GEP->getPointerOperand() == Ptr) {
444  Align GA;
445  if (GEP->accumulateConstantOffset(DL, Off))
446  GA = commonAlignment(A, Off.getLimitedValue());
447  refineUsesAlignmentAndAA(GEP, GA, DL, AliasScope, NoAlias,
448  MaxDepth - 1);
449  }
450  continue;
451  }
452  if (auto *I = dyn_cast<Instruction>(U)) {
453  if (I->getOpcode() == Instruction::BitCast ||
454  I->getOpcode() == Instruction::AddrSpaceCast)
455  refineUsesAlignmentAndAA(I, A, DL, AliasScope, NoAlias, MaxDepth - 1);
456  }
457  }
458  }
459 };
460 
461 } // namespace
462 char AMDGPULowerModuleLDS::ID = 0;
463 
465 
466 INITIALIZE_PASS(AMDGPULowerModuleLDS, DEBUG_TYPE,
467  "Lower uses of LDS variables from non-kernel functions", false,
468  false)
469 
471  return new AMDGPULowerModuleLDS();
472 }
473 
476  return AMDGPULowerModuleLDS().runOnModule(M) ? PreservedAnalyses::none()
478 }
llvm::AMDGPU::findVariablesToLower
std::vector< GlobalVariable * > findVariablesToLower(Module &M, const Function *F)
Definition: AMDGPUMemoryUtils.cpp:108
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
llvm::GlobalVariable::eraseFromParent
void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
Definition: Globals.cpp:451
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::SmallPtrSetImpl::erase
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
Definition: SmallPtrSet.h:379
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::Intrinsic::getDeclaration
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1418
llvm::Type::getInt8PtrTy
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:291
llvm::ModulePass
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:248
llvm::Function
Definition: Function.h:60
Pass.h
llvm::lltok::LocalVar
@ LocalVar
Definition: LLToken.h:418
llvm::GlobalValue::NotThreadLocal
@ NotThreadLocal
Definition: GlobalValue.h:184
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1185
InlineAsm.h
llvm::AMDGPULowerModuleLDSID
char & AMDGPULowerModuleLDSID
Definition: AMDGPULowerModuleLDSPass.cpp:464
llvm::Type::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:729
ToRemove
ReachingDefAnalysis InstSet & ToRemove
Definition: ARMLowOverheadLoops.cpp:542
llvm::IRBuilder<>
llvm::GlobalVariable
Definition: GlobalVariable.h:39
llvm::FunctionType::get
static FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
Definition: Type.cpp:361
llvm::CallGraph
The basic data container for the call graph of a Module of IR.
Definition: CallGraph.h:72
llvm::tgtok::VarName
@ VarName
Definition: TGLexer.h:72
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:139
llvm::PreservedAnalyses::none
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: PassManager.h:155
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::OperandBundleDefT
A container for an operand bundle being viewed as a set of values rather than a set of uses.
Definition: AutoUpgrade.h:32
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
llvm::max
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:337
STLExtras.h
llvm::ArrayType
Class to represent array types.
Definition: DerivedTypes.h:357
llvm::StructType::create
static StructType * create(LLVMContext &Context, StringRef Name)
This creates an identified struct.
Definition: Type.cpp:513
llvm::Type::getInt8Ty
static IntegerType * getInt8Ty(LLVMContext &C)
Definition: Type.cpp:237
llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:239
llvm::MDNode::get
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1300
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::GlobalVariable::hasInitializer
bool hasInitializer() const
Definitions have initializers, declarations don't.
Definition: GlobalVariable.h:91
llvm::collectUsedGlobalVariables
GlobalVariable * collectUsedGlobalVariables(const Module &M, SmallVectorImpl< GlobalValue * > &Vec, bool CompilerUsed)
Given "llvm.used" or "llvm.compiler.used" as a global name, collect the initializer elements of that ...
Definition: Module.cpp:786
llvm::AMDGPU::isKernel
LLVM_READNONE bool isKernel(CallingConv::ID CC)
Definition: AMDGPUBaseInfo.h:786
CommandLine.h
AMDGPUMemoryUtils.h
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
Constants.h
llvm::SmallVectorImpl::append
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:667
llvm::GlobalObject::setSection
void setSection(StringRef S)
Change the section for this global.
Definition: Globals.cpp:248
llvm::User
Definition: User.h:44
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::ARM_PROC::A
@ A
Definition: ARMBaseInfo.h:34
llvm::createAMDGPULowerModuleLDSPass
ModulePass * createAMDGPULowerModuleLDSPass()
Domain
Domain
Definition: CorrelatedValuePropagation.cpp:710
DEBUG_TYPE
#define DEBUG_TYPE
Definition: AMDGPULowerModuleLDSPass.cpp:48
llvm::AMDGPUAS::LOCAL_ADDRESS
@ LOCAL_ADDRESS
Address space for local memory.
Definition: AMDGPU.h:363
llvm::Instruction
Definition: Instruction.h:42
MDBuilder.h
llvm::appendToCompilerUsed
void appendToCompilerUsed(Module &M, ArrayRef< GlobalValue * > Values)
Adds global values to the llvm.compiler.used list.
Definition: ModuleUtils.cpp:109
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1777
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:928
llvm::Use::getUser
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:72
llvm::CallGraphNode
A node in the call graph for a module.
Definition: CallGraph.h:166
Align
uint64_t Align
Definition: ELFObjHandler.cpp:81
INITIALIZE_PASS
INITIALIZE_PASS(AMDGPULowerModuleLDS, DEBUG_TYPE, "Lower uses of LDS variables from non-kernel functions", false, false) ModulePass *llvm
Definition: AMDGPULowerModuleLDSPass.cpp:466
llvm::GlobalValue::InternalLinkage
@ InternalLinkage
Rename collisions when linking (static functions).
Definition: GlobalValue.h:55
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::Value::use_empty
bool use_empty() const
Definition: Value.h:344
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::SPIRV::Decoration::Alignment
@ Alignment
llvm::cl::opt< bool >
llvm::GlobalValue
Definition: GlobalValue.h:44
llvm::GlobalVariable::getInitializer
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
Definition: GlobalVariable.h:135
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::MDNode::intersect
static MDNode * intersect(MDNode *A, MDNode *B)
Definition: Metadata.cpp:958
uint64_t
llvm::AMDGPU::replaceConstantUsesInFunction
void replaceConstantUsesInFunction(ConstantExpr *C, const Function *F)
Replace all uses of constant C with instructions in F.
Definition: AMDGPUMemoryUtils.cpp:55
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:618
OptimizedStructLayout.h
IRBuilder.h
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::ArrayType::get
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Definition: Type.cpp:638
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
llvm::MDNode
Metadata node.
Definition: Metadata.h:937
llvm::AMDGPU::IsaInfo::TargetIDSetting::Off
@ Off
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:651
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::GlobalValue::AppendingLinkage
@ AppendingLinkage
Special purpose, only applies to global arrays.
Definition: GlobalValue.h:54
llvm::SmallPtrSetImplBase::clear
void clear()
Definition: SmallPtrSet.h:95
llvm::Constant::removeDeadConstantUsers
void removeDeadConstantUsers() const
If there are any dead constant users dangling off of this constant, remove them.
Definition: Constants.cpp:751
llvm::StructType
Class to represent struct types.
Definition: DerivedTypes.h:213
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
AMDGPU.h
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:529
llvm::MDNode::getMostGenericAliasScope
static MDNode * getMostGenericAliasScope(MDNode *A, MDNode *B)
Definition: Metadata.cpp:971
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
SuperAlignLDSGlobals
static cl::opt< bool > SuperAlignLDSGlobals("amdgpu-super-align-lds-globals", cl::desc("Increase alignment of LDS if it is not on align boundary"), cl::init(true), cl::Hidden)
llvm::ifs::IFSSymbolType::Func
@ Func
llvm::Init
Definition: Record.h:281
llvm::performOptimizedStructLayout
std::pair< uint64_t, Align > performOptimizedStructLayout(MutableArrayRef< OptimizedStructLayoutField > Fields)
Compute a layout for a struct containing the given fields, making a best-effort attempt to minimize t...
Definition: OptimizedStructLayout.cpp:42
I32
@ I32
Definition: DXILOpLowering.cpp:40
MaxDepth
static const unsigned MaxDepth
Definition: InstCombineMulDivRem.cpp:918
llvm::initializeAMDGPULowerModuleLDSPass
void initializeAMDGPULowerModuleLDSPass(PassRegistry &)
llvm::AMDGPU::getAlign
Align getAlign(DataLayout const &DL, const GlobalVariable *GV)
Definition: AMDGPUMemoryUtils.cpp:30
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:83
llvm::commonAlignment
Align commonAlignment(Align A, Align B)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:213
llvm::ConstantExpr
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:972
llvm::GraphProgram::Name
Name
Definition: GraphWriter.h:50
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:345
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
llvm::Align::value
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
llvm::TypeSize
Definition: TypeSize.h:435
llvm::BitWidth
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:147
llvm::ConstantArray::get
static Constant * get(ArrayType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1290
llvm::ConstantExpr::getGetElementPtr
static Constant * getGetElementPtr(Type *Ty, Constant *C, ArrayRef< Constant * > IdxList, bool InBounds=false, Optional< unsigned > InRangeIndex=None, Type *OnlyIfReducedTy=nullptr)
Getelementptr form.
Definition: Constants.h:1247
transform
instcombine should handle this transform
Definition: README.txt:262
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:591
llvm::pdb::PDB_ColorItem::Padding
@ Padding
llvm::MDBuilder
Definition: MDBuilder.h:35
CallGraph.h
llvm::AMDGPULowerModuleLDSPass::run
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
Definition: AMDGPULowerModuleLDSPass.cpp:474
llvm::Type::getVoidTy
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:222
Instructions.h
llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast
static Constant * getPointerBitCastOrAddrSpaceCast(Constant *C, Type *Ty)
Create a BitCast or AddrSpaceCast for a pointer type depending on the address space.
Definition: Constants.cpp:2093
llvm::SPIRV::FunctionParameterAttribute::NoAlias
@ NoAlias
ModuleUtils.h
N
#define N
llvm::GlobalValue::getType
PointerType * getType() const
Global values are always pointers.
Definition: GlobalValue.h:276
DerivedTypes.h
llvm::SmallPtrSetImpl
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:344
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:42
llvm::GlobalValue::getValueType
Type * getValueType() const
Definition: GlobalValue.h:278
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:171
llvm::Value::replaceUsesWithIf
void replaceUsesWithIf(Value *New, llvm::function_ref< bool(Use &U)> ShouldReplace)
Go through the uses list for this definition and make each use point to "V" if the callback ShouldRep...
Definition: Value.cpp:537
llvm::OptimizedStructLayoutField
A field in a structure.
Definition: OptimizedStructLayout.h:45
llvm::cl::desc
Definition: CommandLine.h:405
llvm::GlobalObject::setAlignment
void setAlignment(MaybeAlign Align)
Definition: Globals.cpp:126
llvm::SmallVectorImpl::reserve
void reserve(size_type N)
Definition: SmallVector.h:644
llvm::AMDGPU::isKernelCC
bool isKernelCC(const Function *Func)
Definition: AMDGPUBaseInfo.cpp:1665
InitializePasses.h
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
Debug.h
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:421
llvm::FunctionType
Class to represent function types.
Definition: DerivedTypes.h:103
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:927
AMDGPUBaseInfo.h
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38