LLVM  13.0.0git
AMDGPULDSUtils.cpp
Go to the documentation of this file.
1 //===- AMDGPULDSUtils.cpp -------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // AMDGPU LDS related helper utility functions.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "AMDGPULDSUtils.h"
14 #include "Utils/AMDGPUBaseInfo.h"
15 #include "llvm/ADT/SetVector.h"
16 #include "llvm/IR/Constants.h"
18 
19 using namespace llvm;
20 
21 namespace llvm {
22 
23 namespace AMDGPU {
24 
25 bool isKernelCC(const Function *Func) {
26  return AMDGPU::isModuleEntryFunctionCC(Func->getCallingConv());
27 }
28 
30  return DL.getValueOrABITypeAlignment(GV->getPointerAlignment(DL),
31  GV->getValueType());
32 }
33 
34 static void collectFunctionUses(User *U, const Function *F,
35  SetVector<Instruction *> &InstUsers) {
36  SmallVector<User *> Stack{U};
37 
38  while (!Stack.empty()) {
39  U = Stack.pop_back_val();
40 
41  if (auto *I = dyn_cast<Instruction>(U)) {
42  if (I->getFunction() == F)
43  InstUsers.insert(I);
44  continue;
45  }
46 
47  if (!isa<ConstantExpr>(U))
48  continue;
49 
50  append_range(Stack, U->users());
51  }
52 }
53 
55  SetVector<Instruction *> InstUsers;
56 
57  collectFunctionUses(C, F, InstUsers);
58  for (Instruction *I : InstUsers) {
60  }
61 }
62 
64  const GlobalVariable &GV, const Function *F) {
65  // Any LDS variable can be lowered by moving into the created struct
66  // Each variable so lowered is allocated in every kernel, so variables
67  // whose users are all known to be safe to lower without the transform
68  // are left unchanged.
69  bool Ret = false;
72 
73  assert(!F || isKernelCC(F));
74 
75  while (!Stack.empty()) {
76  const User *V = Stack.pop_back_val();
77  Visited.insert(V);
78 
79  if (auto *G = dyn_cast<GlobalValue>(V->stripPointerCasts())) {
80  if (UsedList.contains(G)) {
81  continue;
82  }
83  }
84 
85  if (auto *I = dyn_cast<Instruction>(V)) {
86  const Function *UF = I->getFunction();
87  if (UF == F) {
88  // Used from this kernel, we want to put it into the structure.
89  Ret = true;
90  } else if (!F) {
91  Ret |= !isKernelCC(UF);
92  }
93  continue;
94  }
95 
96  if (auto *E = dyn_cast<ConstantExpr>(V)) {
97  for (const User *U : E->users()) {
98  if (Visited.insert(U).second) {
99  Stack.push_back(U);
100  }
101  }
102  continue;
103  }
104 
105  // Unknown user, conservatively lower the variable.
106  // For module LDS conservatively means place it into the module LDS struct.
107  // For kernel LDS it means lower as a standalone variable.
108  return !F;
109  }
110 
111  return Ret;
112 }
113 
114 std::vector<GlobalVariable *>
116  const Function *F) {
117  std::vector<llvm::GlobalVariable *> LocalVars;
118  for (auto &GV : M.globals()) {
119  if (GV.getType()->getPointerAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) {
120  continue;
121  }
122  if (!GV.hasInitializer()) {
123  // addrspace(3) without initializer implies cuda/hip extern __shared__
124  // the semantics for such a variable appears to be that all extern
125  // __shared__ variables alias one another, in which case this transform
126  // is not required
127  continue;
128  }
129  if (!isa<UndefValue>(GV.getInitializer())) {
130  // Initializers are unimplemented for local address space.
131  // Leave such variables in place for consistent error reporting.
132  continue;
133  }
134  if (GV.isConstant()) {
135  // A constant undef variable can't be written to, and any load is
136  // undef, so it should be eliminated by the optimizer. It could be
137  // dropped by the back end if not. This pass skips over it.
138  continue;
139  }
140  if (!shouldLowerLDSToStruct(UsedList, GV, F)) {
141  continue;
142  }
143  LocalVars.push_back(&GV);
144  }
145  return LocalVars;
146 }
147 
150 
152  collectUsedGlobalVariables(M, TmpVec, true);
153  UsedList.insert(TmpVec.begin(), TmpVec.end());
154 
155  TmpVec.clear();
156  collectUsedGlobalVariables(M, TmpVec, false);
157  UsedList.insert(TmpVec.begin(), TmpVec.end());
158 
159  return UsedList;
160 }
161 
162 } // end namespace AMDGPU
163 
164 } // end namespace llvm
llvm
Definition: AllocatorList.h:23
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::Value::getPointerAlignment
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition: Value.cpp:892
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:112
llvm::Function
Definition: Function.h:61
llvm::AMDGPU::shouldLowerLDSToStruct
bool shouldLowerLDSToStruct(const SmallPtrSetImpl< GlobalValue * > &UsedList, const GlobalVariable &GV, const Function *F)
Definition: AMDGPULDSUtils.cpp:63
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1167
llvm::GlobalVariable
Definition: GlobalVariable.h:40
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:449
llvm::MipsISD::Ret
@ Ret
Definition: MipsISelLowering.h:116
llvm::AMDGPUAS::LOCAL_ADDRESS
@ LOCAL_ADDRESS
Address space for local memory.
Definition: AMDGPU.h:364
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::collectUsedGlobalVariables
GlobalVariable * collectUsedGlobalVariables(const Module &M, SmallVectorImpl< GlobalValue * > &Vec, bool CompilerUsed)
Given "llvm.used" or "llvm.compiler.used" as a global name, collect the initializer elements of that ...
Definition: Module.cpp:763
llvm::AMDGPU::getUsedList
SmallPtrSet< GlobalValue *, 32 > getUsedList(Module &M)
Definition: AMDGPULDSUtils.cpp:148
AMDGPULDSUtils.h
Constants.h
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::User
Definition: User.h:44
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::convertConstantExprsToInstructions
void convertConstantExprsToInstructions(Instruction *I, ConstantExpr *CE, SmallPtrSetImpl< Instruction * > *Insts=nullptr)
The given instruction I contains given constant expression CE as one of its operands,...
Definition: ReplaceConstant.cpp:72
llvm::Instruction
Definition: Instruction.h:45
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::AMDGPU::collectFunctionUses
static void collectFunctionUses(User *U, const Function *F, SetVector< Instruction * > &InstUsers)
Definition: AMDGPULDSUtils.cpp:34
G
const DataFlowGraph & G
Definition: RDFGraph.cpp:202
llvm::AMDGPU::replaceConstantUsesInFunction
void replaceConstantUsesInFunction(ConstantExpr *C, const Function *F)
Replace all uses of constant C with instructions in F.
Definition: AMDGPULDSUtils.cpp:54
I
#define I(x, y, z)
Definition: MD5.cpp:59
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
llvm::SetVector::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
llvm::AMDGPU::findVariablesToLower
std::vector< GlobalVariable * > findVariablesToLower(Module &M, const SmallPtrSetImpl< GlobalValue * > &UsedList, const Function *F)
Definition: AMDGPULDSUtils.cpp:115
llvm::AMDGPU::isModuleEntryFunctionCC
bool isModuleEntryFunctionCC(CallingConv::ID CC)
Definition: AMDGPUBaseInfo.cpp:1387
llvm::append_range
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
Definition: STLExtras.h:1672
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::Value::stripPointerCasts
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:662
llvm::AMDGPU::getAlign
Align getAlign(DataLayout const &DL, const GlobalVariable *GV)
Definition: AMDGPULDSUtils.cpp:29
llvm::ConstantExpr
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:931
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:584
ReplaceConstant.h
llvm::SmallPtrSetImpl
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:343
llvm::GlobalValue::getValueType
Type * getValueType() const
Definition: GlobalValue.h:273
llvm::SetVector
A vector that has set insertion semantics.
Definition: SetVector.h:40
llvm::SmallPtrSetImpl::contains
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:388
llvm::AMDGPU::isKernelCC
bool isKernelCC(const Function *Func)
Definition: AMDGPULDSUtils.cpp:25
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:422
SetVector.h
AMDGPUBaseInfo.h
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364