LLVM  14.0.0git
AMDGPULowerModuleLDSPass.cpp
Go to the documentation of this file.
1 //===-- AMDGPULowerModuleLDSPass.cpp ------------------------------*- C++ -*-=//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass eliminates LDS uses from non-kernel functions.
10 //
11 // The strategy is to create a new struct with a field for each LDS variable
12 // and allocate that struct at the same address for every kernel. Uses of the
13 // original LDS variables are then replaced with compile time offsets from that
14 // known address. AMDGPUMachineFunction allocates the LDS global.
15 //
16 // Local variables with constant annotation or non-undef initializer are passed
17 // through unchanged for simplication or error diagnostics in later passes.
18 //
19 // To reduce the memory overhead variables that are only used by kernels are
20 // excluded from this transform. The analysis to determine whether a variable
21 // is only used by a kernel is cheap and conservative so this may allocate
22 // a variable in every kernel when it was not strictly necessary to do so.
23 //
24 // A possible future refinement is to specialise the structure per-kernel, so
25 // that fields can be elided based on more expensive analysis.
26 //
27 //===----------------------------------------------------------------------===//
28 
29 #include "AMDGPU.h"
30 #include "Utils/AMDGPUBaseInfo.h"
31 #include "Utils/AMDGPULDSUtils.h"
32 #include "llvm/ADT/STLExtras.h"
33 #include "llvm/IR/Constants.h"
34 #include "llvm/IR/DerivedTypes.h"
35 #include "llvm/IR/IRBuilder.h"
36 #include "llvm/IR/InlineAsm.h"
37 #include "llvm/IR/Instructions.h"
38 #include "llvm/IR/MDBuilder.h"
39 #include "llvm/InitializePasses.h"
40 #include "llvm/Pass.h"
42 #include "llvm/Support/Debug.h"
45 #include <vector>
46 
47 #define DEBUG_TYPE "amdgpu-lower-module-lds"
48 
49 using namespace llvm;
50 
52  "amdgpu-super-align-lds-globals",
53  cl::desc("Increase alignment of LDS if it is not on align boundary"),
54  cl::init(true), cl::Hidden);
55 
56 namespace {
57 
58 SmallPtrSet<GlobalValue *, 32> getUsedList(Module &M) {
60 
62  collectUsedGlobalVariables(M, TmpVec, true);
63  UsedList.insert(TmpVec.begin(), TmpVec.end());
64 
65  TmpVec.clear();
66  collectUsedGlobalVariables(M, TmpVec, false);
67  UsedList.insert(TmpVec.begin(), TmpVec.end());
68 
69  return UsedList;
70 }
71 
72 class AMDGPULowerModuleLDS : public ModulePass {
73 
74  static void removeFromUsedList(Module &M, StringRef Name,
76  GlobalVariable *GV = M.getNamedGlobal(Name);
77  if (!GV || ToRemove.empty()) {
78  return;
79  }
80 
82  auto *CA = cast<ConstantArray>(GV->getInitializer());
83  for (auto &Op : CA->operands()) {
84  // ModuleUtils::appendToUsed only inserts Constants
85  Constant *C = cast<Constant>(Op);
86  if (!ToRemove.contains(C->stripPointerCasts())) {
87  Init.push_back(C);
88  }
89  }
90 
91  if (Init.size() == CA->getNumOperands()) {
92  return; // none to remove
93  }
94 
95  GV->eraseFromParent();
96 
97  for (Constant *C : ToRemove) {
98  C->removeDeadConstantUsers();
99  }
100 
101  if (!Init.empty()) {
102  ArrayType *ATy =
103  ArrayType::get(Type::getInt8PtrTy(M.getContext()), Init.size());
104  GV =
106  ConstantArray::get(ATy, Init), Name);
107  GV->setSection("llvm.metadata");
108  }
109  }
110 
111  static void
112  removeFromUsedLists(Module &M,
113  const std::vector<GlobalVariable *> &LocalVars) {
114  SmallPtrSet<Constant *, 32> LocalVarsSet;
115  for (GlobalVariable *LocalVar : LocalVars)
116  if (Constant *C = dyn_cast<Constant>(LocalVar->stripPointerCasts()))
117  LocalVarsSet.insert(C);
118  removeFromUsedList(M, "llvm.used", LocalVarsSet);
119  removeFromUsedList(M, "llvm.compiler.used", LocalVarsSet);
120  }
121 
122  static void markUsedByKernel(IRBuilder<> &Builder, Function *Func,
123  GlobalVariable *SGV) {
124  // The llvm.amdgcn.module.lds instance is implicitly used by all kernels
125  // that might call a function which accesses a field within it. This is
126  // presently approximated to 'all kernels' if there are any such functions
127  // in the module. This implicit use is redefined as an explicit use here so
128  // that later passes, specifically PromoteAlloca, account for the required
129  // memory without any knowledge of this transform.
130 
131  // An operand bundle on llvm.donothing works because the call instruction
132  // survives until after the last pass that needs to account for LDS. It is
133  // better than inline asm as the latter survives until the end of codegen. A
134  // totally robust solution would be a function with the same semantics as
135  // llvm.donothing that takes a pointer to the instance and is lowered to a
136  // no-op after LDS is allocated, but that is not presently necessary.
137 
138  LLVMContext &Ctx = Func->getContext();
139 
140  Builder.SetInsertPoint(Func->getEntryBlock().getFirstNonPHI());
141 
143 
144  Function *Decl =
145  Intrinsic::getDeclaration(Func->getParent(), Intrinsic::donothing, {});
146 
147  Value *UseInstance[1] = {Builder.CreateInBoundsGEP(
148  SGV->getValueType(), SGV, ConstantInt::get(Type::getInt32Ty(Ctx), 0))};
149 
150  Builder.CreateCall(FTy, Decl, {},
151  {OperandBundleDefT<Value *>("ExplicitUse", UseInstance)},
152  "");
153  }
154 
155 private:
157 
158 public:
159  static char ID;
160 
161  AMDGPULowerModuleLDS() : ModulePass(ID) {
163  }
164 
165  bool runOnModule(Module &M) override {
166  UsedList = getUsedList(M);
167  bool Changed = superAlignLDSGlobals(M);
168  Changed |= processUsedLDS(M);
169 
170  for (Function &F : M.functions()) {
171  if (F.isDeclaration())
172  continue;
173 
174  // Only lower compute kernels' LDS.
175  if (!AMDGPU::isKernel(F.getCallingConv()))
176  continue;
177  Changed |= processUsedLDS(M, &F);
178  }
179 
180  UsedList.clear();
181  return Changed;
182  }
183 
184 private:
185  // Increase the alignment of LDS globals if necessary to maximise the chance
186  // that we can use aligned LDS instructions to access them.
187  static bool superAlignLDSGlobals(Module &M) {
188  const DataLayout &DL = M.getDataLayout();
189  bool Changed = false;
190  if (!SuperAlignLDSGlobals) {
191  return Changed;
192  }
193 
194  for (auto &GV : M.globals()) {
196  // Only changing alignment of LDS variables
197  continue;
198  }
199  if (!GV.hasInitializer()) {
200  // cuda/hip extern __shared__ variable, leave alignment alone
201  continue;
202  }
203 
204  Align Alignment = AMDGPU::getAlign(DL, &GV);
205  TypeSize GVSize = DL.getTypeAllocSize(GV.getValueType());
206 
207  if (GVSize > 8) {
208  // We might want to use a b96 or b128 load/store
209  Alignment = std::max(Alignment, Align(16));
210  } else if (GVSize > 4) {
211  // We might want to use a b64 load/store
212  Alignment = std::max(Alignment, Align(8));
213  } else if (GVSize > 2) {
214  // We might want to use a b32 load/store
215  Alignment = std::max(Alignment, Align(4));
216  } else if (GVSize > 1) {
217  // We might want to use a b16 load/store
218  Alignment = std::max(Alignment, Align(2));
219  }
220 
221  if (Alignment != AMDGPU::getAlign(DL, &GV)) {
222  Changed = true;
223  GV.setAlignment(Alignment);
224  }
225  }
226  return Changed;
227  }
228 
229  bool processUsedLDS(Module &M, Function *F = nullptr) {
230  LLVMContext &Ctx = M.getContext();
231  const DataLayout &DL = M.getDataLayout();
232 
233  // Find variables to move into new struct instance
234  std::vector<GlobalVariable *> FoundLocalVars =
236 
237  if (FoundLocalVars.empty()) {
238  // No variables to rewrite, no changes made.
239  return false;
240  }
241 
243  LayoutFields.reserve(FoundLocalVars.size());
244  for (GlobalVariable *GV : FoundLocalVars) {
245  OptimizedStructLayoutField F(GV, DL.getTypeAllocSize(GV->getValueType()),
246  AMDGPU::getAlign(DL, GV));
247  LayoutFields.emplace_back(F);
248  }
249 
250  performOptimizedStructLayout(LayoutFields);
251 
252  std::vector<GlobalVariable *> LocalVars;
253  LocalVars.reserve(FoundLocalVars.size()); // will be at least this large
254  {
255  // This usually won't need to insert any padding, perhaps avoid the alloc
256  uint64_t CurrentOffset = 0;
257  for (size_t I = 0; I < LayoutFields.size(); I++) {
258  GlobalVariable *FGV = static_cast<GlobalVariable *>(
259  const_cast<void *>(LayoutFields[I].Id));
260  Align DataAlign = LayoutFields[I].Alignment;
261 
262  uint64_t DataAlignV = DataAlign.value();
263  if (uint64_t Rem = CurrentOffset % DataAlignV) {
264  uint64_t Padding = DataAlignV - Rem;
265 
266  // Append an array of padding bytes to meet alignment requested
267  // Note (o + (a - (o % a)) ) % a == 0
268  // (offset + Padding ) % align == 0
269 
270  Type *ATy = ArrayType::get(Type::getInt8Ty(Ctx), Padding);
271  LocalVars.push_back(new GlobalVariable(
272  M, ATy, false, GlobalValue::InternalLinkage, UndefValue::get(ATy),
274  false));
275  CurrentOffset += Padding;
276  }
277 
278  LocalVars.push_back(FGV);
279  CurrentOffset += LayoutFields[I].Size;
280  }
281  }
282 
283  std::vector<Type *> LocalVarTypes;
284  LocalVarTypes.reserve(LocalVars.size());
286  LocalVars.cbegin(), LocalVars.cend(), std::back_inserter(LocalVarTypes),
287  [](const GlobalVariable *V) -> Type * { return V->getValueType(); });
288 
289  std::string VarName(
290  F ? (Twine("llvm.amdgcn.kernel.") + F->getName() + ".lds").str()
291  : "llvm.amdgcn.module.lds");
292  StructType *LDSTy = StructType::create(Ctx, LocalVarTypes, VarName + ".t");
293 
294  Align StructAlign =
295  AMDGPU::getAlign(DL, LocalVars[0]);
296 
297  GlobalVariable *SGV = new GlobalVariable(
298  M, LDSTy, false, GlobalValue::InternalLinkage, UndefValue::get(LDSTy),
300  false);
301  SGV->setAlignment(StructAlign);
302  if (!F) {
304  M, {static_cast<GlobalValue *>(
306  cast<Constant>(SGV), Type::getInt8PtrTy(Ctx)))});
307  }
308 
309  // The verifier rejects used lists containing an inttoptr of a constant
310  // so remove the variables from these lists before replaceAllUsesWith
311  removeFromUsedLists(M, LocalVars);
312 
313  // Create alias.scope and their lists. Each field in the new structure
314  // does not alias with all other fields.
315  SmallVector<MDNode *> AliasScopes;
316  SmallVector<Metadata *> NoAliasList;
317  if (LocalVars.size() > 1) {
318  MDBuilder MDB(Ctx);
319  AliasScopes.reserve(LocalVars.size());
320  MDNode *Domain = MDB.createAnonymousAliasScopeDomain();
321  for (size_t I = 0; I < LocalVars.size(); I++) {
322  MDNode *Scope = MDB.createAnonymousAliasScope(Domain);
323  AliasScopes.push_back(Scope);
324  }
325  NoAliasList.append(&AliasScopes[1], AliasScopes.end());
326  }
327 
328  // Replace uses of ith variable with a constantexpr to the ith field of the
329  // instance that will be allocated by AMDGPUMachineFunction
330  Type *I32 = Type::getInt32Ty(Ctx);
331  for (size_t I = 0; I < LocalVars.size(); I++) {
332  GlobalVariable *GV = LocalVars[I];
333  Constant *GEPIdx[] = {ConstantInt::get(I32, 0), ConstantInt::get(I32, I)};
334  Constant *GEP = ConstantExpr::getGetElementPtr(LDSTy, SGV, GEPIdx);
335  if (F) {
336  // Replace all constant uses with instructions if they belong to the
337  // current kernel.
338  for (User *U : make_early_inc_range(GV->users())) {
339  if (ConstantExpr *C = dyn_cast<ConstantExpr>(U))
341  }
342 
344 
345  GV->replaceUsesWithIf(GEP, [F](Use &U) {
346  Instruction *I = dyn_cast<Instruction>(U.getUser());
347  return I && I->getFunction() == F;
348  });
349  } else {
350  GV->replaceAllUsesWith(GEP);
351  }
352  if (GV->use_empty()) {
353  UsedList.erase(GV);
354  GV->eraseFromParent();
355  }
356 
357  uint64_t Off = DL.getStructLayout(LDSTy)->getElementOffset(I);
358  Align A = commonAlignment(StructAlign, Off);
359 
360  if (I)
361  NoAliasList[I - 1] = AliasScopes[I - 1];
362  MDNode *NoAlias =
363  NoAliasList.empty() ? nullptr : MDNode::get(Ctx, NoAliasList);
364  MDNode *AliasScope =
365  AliasScopes.empty() ? nullptr : MDNode::get(Ctx, {AliasScopes[I]});
366 
367  refineUsesAlignmentAndAA(GEP, A, DL, AliasScope, NoAlias);
368  }
369 
370  // This ensures the variable is allocated when called functions access it.
371  // It also lets other passes, specifically PromoteAlloca, accurately
372  // calculate how much LDS will be used by the kernel after lowering.
373  if (!F) {
374  IRBuilder<> Builder(Ctx);
375  for (Function &Func : M.functions()) {
376  if (!Func.isDeclaration() && AMDGPU::isKernelCC(&Func)) {
377  markUsedByKernel(Builder, &Func, SGV);
378  }
379  }
380  }
381  return true;
382  }
383 
384  void refineUsesAlignmentAndAA(Value *Ptr, Align A, const DataLayout &DL,
385  MDNode *AliasScope, MDNode *NoAlias,
386  unsigned MaxDepth = 5) {
387  if (!MaxDepth || (A == 1 && !AliasScope))
388  return;
389 
390  for (User *U : Ptr->users()) {
391  if (auto *I = dyn_cast<Instruction>(U)) {
392  if (AliasScope && I->mayReadOrWriteMemory()) {
393  MDNode *AS = I->getMetadata(LLVMContext::MD_alias_scope);
394  AS = (AS ? MDNode::getMostGenericAliasScope(AS, AliasScope)
395  : AliasScope);
396  I->setMetadata(LLVMContext::MD_alias_scope, AS);
397 
398  MDNode *NA = I->getMetadata(LLVMContext::MD_noalias);
399  NA = (NA ? MDNode::intersect(NA, NoAlias) : NoAlias);
400  I->setMetadata(LLVMContext::MD_noalias, NA);
401  }
402  }
403 
404  if (auto *LI = dyn_cast<LoadInst>(U)) {
405  LI->setAlignment(std::max(A, LI->getAlign()));
406  continue;
407  }
408  if (auto *SI = dyn_cast<StoreInst>(U)) {
409  if (SI->getPointerOperand() == Ptr)
410  SI->setAlignment(std::max(A, SI->getAlign()));
411  continue;
412  }
413  if (auto *AI = dyn_cast<AtomicRMWInst>(U)) {
414  // None of atomicrmw operations can work on pointers, but let's
415  // check it anyway in case it will or we will process ConstantExpr.
416  if (AI->getPointerOperand() == Ptr)
417  AI->setAlignment(std::max(A, AI->getAlign()));
418  continue;
419  }
420  if (auto *AI = dyn_cast<AtomicCmpXchgInst>(U)) {
421  if (AI->getPointerOperand() == Ptr)
422  AI->setAlignment(std::max(A, AI->getAlign()));
423  continue;
424  }
425  if (auto *GEP = dyn_cast<GetElementPtrInst>(U)) {
426  unsigned BitWidth = DL.getIndexTypeSizeInBits(GEP->getType());
427  APInt Off(BitWidth, 0);
428  if (GEP->getPointerOperand() == Ptr) {
429  Align GA;
430  if (GEP->accumulateConstantOffset(DL, Off))
431  GA = commonAlignment(A, Off.getLimitedValue());
432  refineUsesAlignmentAndAA(GEP, GA, DL, AliasScope, NoAlias,
433  MaxDepth - 1);
434  }
435  continue;
436  }
437  if (auto *I = dyn_cast<Instruction>(U)) {
438  if (I->getOpcode() == Instruction::BitCast ||
439  I->getOpcode() == Instruction::AddrSpaceCast)
440  refineUsesAlignmentAndAA(I, A, DL, AliasScope, NoAlias, MaxDepth - 1);
441  }
442  }
443  }
444 };
445 
446 } // namespace
447 char AMDGPULowerModuleLDS::ID = 0;
448 
450 
451 INITIALIZE_PASS(AMDGPULowerModuleLDS, DEBUG_TYPE,
452  "Lower uses of LDS variables from non-kernel functions", false,
453  false)
454 
456  return new AMDGPULowerModuleLDS();
457 }
458 
461  return AMDGPULowerModuleLDS().runOnModule(M) ? PreservedAnalyses::none()
463 }
llvm::AMDGPU::findVariablesToLower
std::vector< GlobalVariable * > findVariablesToLower(Module &M, const Function *F)
Definition: AMDGPULDSUtils.cpp:109
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
llvm::GlobalVariable::eraseFromParent
void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
Definition: Globals.cpp:430
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AllocatorList.h:22
llvm::wasm::ValType::I32
@ I32
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::SmallPtrSetImpl::erase
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
Definition: SmallPtrSet.h:378
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::Intrinsic::getDeclaration
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1400
llvm::Type::getInt8PtrTy
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:293
llvm::ModulePass
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:238
llvm::Function
Definition: Function.h:62
Pass.h
llvm::lltok::LocalVar
@ LocalVar
Definition: LLToken.h:478
llvm::GlobalValue::NotThreadLocal
@ NotThreadLocal
Definition: GlobalValue.h:179
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1176
InlineAsm.h
llvm::AMDGPULowerModuleLDSID
char & AMDGPULowerModuleLDSID
Definition: AMDGPULowerModuleLDSPass.cpp:449
llvm::Type::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:736
ToRemove
ReachingDefAnalysis InstSet & ToRemove
Definition: ARMLowOverheadLoops.cpp:540
llvm::IRBuilder<>
llvm::GlobalVariable
Definition: GlobalVariable.h:39
llvm::FunctionType::get
static FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
Definition: Type.cpp:363
llvm::tgtok::VarName
@ VarName
Definition: TGLexer.h:71
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
llvm::PreservedAnalyses::none
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: PassManager.h:158
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::OperandBundleDefT
A container for an operand bundle being viewed as a set of values rather than a set of uses.
Definition: InstrTypes.h:1129
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:449
STLExtras.h
llvm::ArrayType
Class to represent array types.
Definition: DerivedTypes.h:357
llvm::StructType::create
static StructType * create(LLVMContext &Context, StringRef Name)
This creates an identified struct.
Definition: Type.cpp:515
llvm::Type::getInt8Ty
static IntegerType * getInt8Ty(LLVMContext &C)
Definition: Type.cpp:239
llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:241
llvm::MDNode::get
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1233
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::GlobalVariable::hasInitializer
bool hasInitializer() const
Definitions have initializers, declarations don't.
Definition: GlobalVariable.h:91
llvm::collectUsedGlobalVariables
GlobalVariable * collectUsedGlobalVariables(const Module &M, SmallVectorImpl< GlobalValue * > &Vec, bool CompilerUsed)
Given "llvm.used" or "llvm.compiler.used" as a global name, collect the initializer elements of that ...
Definition: Module.cpp:782
llvm::AMDGPU::isKernel
LLVM_READNONE bool isKernel(CallingConv::ID CC)
Definition: AMDGPUBaseInfo.h:740
CommandLine.h
AMDGPULDSUtils.h
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
Constants.h
llvm::SmallVectorImpl::append
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:656
llvm::GlobalObject::setSection
void setSection(StringRef S)
Change the section for this global.
Definition: Globals.cpp:227
llvm::User
Definition: User.h:44
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::ARM_PROC::A
@ A
Definition: ARMBaseInfo.h:34
llvm::createAMDGPULowerModuleLDSPass
ModulePass * createAMDGPULowerModuleLDSPass()
Domain
Domain
Definition: CorrelatedValuePropagation.cpp:704
DEBUG_TYPE
#define DEBUG_TYPE
Definition: AMDGPULowerModuleLDSPass.cpp:47
llvm::Instruction
Definition: Instruction.h:45
MDBuilder.h
llvm::appendToCompilerUsed
void appendToCompilerUsed(Module &M, ArrayRef< GlobalValue * > Values)
Adds global values to the llvm.compiler.used list.
Definition: ModuleUtils.cpp:110
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1775
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:925
llvm::Use::getUser
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:73
Align
uint64_t Align
Definition: ELFObjHandler.cpp:82
INITIALIZE_PASS
INITIALIZE_PASS(AMDGPULowerModuleLDS, DEBUG_TYPE, "Lower uses of LDS variables from non-kernel functions", false, false) ModulePass *llvm
Definition: AMDGPULowerModuleLDSPass.cpp:451
llvm::GlobalValue::InternalLinkage
@ InternalLinkage
Rename collisions when linking (static functions).
Definition: GlobalValue.h:55
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::Value::use_empty
bool use_empty() const
Definition: Value.h:344
llvm::cl::opt< bool >
llvm::GlobalValue
Definition: GlobalValue.h:44
llvm::GlobalVariable::getInitializer
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
Definition: GlobalVariable.h:135
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::MDNode::intersect
static MDNode * intersect(MDNode *A, MDNode *B)
Definition: Metadata.cpp:928
uint64_t
llvm::AMDGPU::replaceConstantUsesInFunction
void replaceConstantUsesInFunction(ConstantExpr *C, const Function *F)
Replace all uses of constant C with instructions in F.
Definition: AMDGPULDSUtils.cpp:56
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:441
llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:586
llvm::AMDGPUAS::LOCAL_ADDRESS
@ LOCAL_ADDRESS
Address space for local memory.
Definition: AMDGPU.h:363
OptimizedStructLayout.h
IRBuilder.h
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::ArrayType::get
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Definition: Type.cpp:640
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
llvm::MDNode
Metadata node.
Definition: Metadata.h:906
llvm::AMDGPU::IsaInfo::TargetIDSetting::Off
@ Off
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:650
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::GlobalValue::AppendingLinkage
@ AppendingLinkage
Special purpose, only applies to global arrays.
Definition: GlobalValue.h:54
llvm::SmallPtrSetImplBase::clear
void clear()
Definition: SmallPtrSet.h:94
llvm::Constant::removeDeadConstantUsers
void removeDeadConstantUsers() const
If there are any dead constant users dangling off of this constant, remove them.
Definition: Constants.cpp:748
llvm::StructType
Class to represent struct types.
Definition: DerivedTypes.h:213
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
AMDGPU.h
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:532
llvm::MDNode::getMostGenericAliasScope
static MDNode * getMostGenericAliasScope(MDNode *A, MDNode *B)
Definition: Metadata.cpp:941
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
SuperAlignLDSGlobals
static cl::opt< bool > SuperAlignLDSGlobals("amdgpu-super-align-lds-globals", cl::desc("Increase alignment of LDS if it is not on align boundary"), cl::init(true), cl::Hidden)
llvm::ifs::IFSSymbolType::Func
@ Func
llvm::Init
Definition: Record.h:274
llvm::performOptimizedStructLayout
std::pair< uint64_t, Align > performOptimizedStructLayout(MutableArrayRef< OptimizedStructLayoutField > Fields)
Compute a layout for a struct containing the given fields, making a best-effort attempt to minimize t...
Definition: OptimizedStructLayout.cpp:42
MaxDepth
static const unsigned MaxDepth
Definition: InstCombineMulDivRem.cpp:901
llvm::initializeAMDGPULowerModuleLDSPass
void initializeAMDGPULowerModuleLDSPass(PassRegistry &)
llvm::AMDGPU::getAlign
Align getAlign(DataLayout const &DL, const GlobalVariable *GV)
Definition: AMDGPULDSUtils.cpp:31
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:83
llvm::commonAlignment
Align commonAlignment(Align A, Align B)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:211
llvm::ConstantExpr
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:971
llvm::GraphProgram::Name
Name
Definition: GraphWriter.h:50
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:325
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
llvm::Align::value
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
llvm::TypeSize
Definition: TypeSize.h:416
llvm::BitWidth
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:147
llvm::ConstantArray::get
static Constant * get(ArrayType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1288
llvm::ConstantExpr::getGetElementPtr
static Constant * getGetElementPtr(Type *Ty, Constant *C, ArrayRef< Constant * > IdxList, bool InBounds=false, Optional< unsigned > InRangeIndex=None, Type *OnlyIfReducedTy=nullptr)
Getelementptr form.
Definition: Constants.h:1238
transform
instcombine should handle this transform
Definition: README.txt:262
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:580
llvm::MDBuilder
Definition: MDBuilder.h:35
llvm::AMDGPULowerModuleLDSPass::run
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
Definition: AMDGPULowerModuleLDSPass.cpp:459
llvm::Type::getVoidTy
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:224
Instructions.h
llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast
static Constant * getPointerBitCastOrAddrSpaceCast(Constant *C, Type *Ty)
Create a BitCast or AddrSpaceCast for a pointer type depending on the address space.
Definition: Constants.cpp:2080
ModuleUtils.h
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
llvm::GlobalValue::getType
PointerType * getType() const
Global values are always pointers.
Definition: GlobalValue.h:271
DerivedTypes.h
llvm::SmallPtrSetImpl
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:343
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
llvm::GlobalValue::getValueType
Type * getValueType() const
Definition: GlobalValue.h:273
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:172
llvm::Value::replaceUsesWithIf
void replaceUsesWithIf(Value *New, llvm::function_ref< bool(Use &U)> ShouldReplace)
Go through the uses list for this definition and make each use point to "V" if the callback ShouldRep...
Definition: Value.cpp:540
llvm::OptimizedStructLayoutField
A field in a structure.
Definition: OptimizedStructLayout.h:45
llvm::cl::desc
Definition: CommandLine.h:412
llvm::GlobalObject::setAlignment
void setAlignment(MaybeAlign Align)
Definition: Globals.cpp:124
llvm::SmallVectorImpl::reserve
void reserve(size_type N)
Definition: SmallVector.h:633
llvm::AMDGPU::isKernelCC
bool isKernelCC(const Function *Func)
Definition: AMDGPULDSUtils.cpp:27
InitializePasses.h
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
Debug.h
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:421
llvm::FunctionType
Class to represent function types.
Definition: DerivedTypes.h:103
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:44
llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:916
AMDGPUBaseInfo.h
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38