LLVM  15.0.0git
AMDGPUMemoryUtils.cpp
Go to the documentation of this file.
1 //===-- AMDGPUMemoryUtils.cpp - -------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPUMemoryUtils.h"
10 #include "AMDGPU.h"
11 #include "AMDGPUBaseInfo.h"
12 #include "llvm/ADT/SetVector.h"
13 #include "llvm/ADT/SmallSet.h"
16 #include "llvm/IR/DataLayout.h"
17 #include "llvm/IR/Instructions.h"
18 #include "llvm/IR/IntrinsicInst.h"
19 #include "llvm/IR/IntrinsicsAMDGPU.h"
21 
22 #define DEBUG_TYPE "amdgpu-memory-utils"
23 
24 using namespace llvm;
25 
26 namespace llvm {
27 
28 namespace AMDGPU {
29 
31  return DL.getValueOrABITypeAlignment(GV->getPointerAlignment(DL),
32  GV->getValueType());
33 }
34 
35 static void collectFunctionUses(User *U, const Function *F,
36  SetVector<Instruction *> &InstUsers) {
37  SmallVector<User *> Stack{U};
38 
39  while (!Stack.empty()) {
40  U = Stack.pop_back_val();
41 
42  if (auto *I = dyn_cast<Instruction>(U)) {
43  if (I->getFunction() == F)
44  InstUsers.insert(I);
45  continue;
46  }
47 
48  if (!isa<ConstantExpr>(U))
49  continue;
50 
51  append_range(Stack, U->users());
52  }
53 }
54 
56  SetVector<Instruction *> InstUsers;
57 
58  collectFunctionUses(C, F, InstUsers);
59  for (Instruction *I : InstUsers) {
61  }
62 }
63 
64 static bool shouldLowerLDSToStruct(const GlobalVariable &GV,
65  const Function *F) {
66  // We are not interested in kernel LDS lowering for module LDS itself.
67  if (F && GV.getName() == "llvm.amdgcn.module.lds")
68  return false;
69 
70  bool Ret = false;
73 
74  assert(!F || isKernelCC(F));
75 
76  while (!Stack.empty()) {
77  const User *V = Stack.pop_back_val();
78  Visited.insert(V);
79 
80  if (isa<GlobalValue>(V)) {
81  // This use of the LDS variable is the initializer of a global variable.
82  // This is ill formed. The address of an LDS variable is kernel dependent
83  // and unknown until runtime. It can't be written to a global variable.
84  continue;
85  }
86 
87  if (auto *I = dyn_cast<Instruction>(V)) {
88  const Function *UF = I->getFunction();
89  if (UF == F) {
90  // Used from this kernel, we want to put it into the structure.
91  Ret = true;
92  } else if (!F) {
93  // For module LDS lowering, lowering is required if the user instruction
94  // is from non-kernel function.
95  Ret |= !isKernelCC(UF);
96  }
97  continue;
98  }
99 
100  // User V should be a constant, recursively visit users of V.
101  assert(isa<Constant>(V) && "Expected a constant.");
102  append_range(Stack, V->users());
103  }
104 
105  return Ret;
106 }
107 
108 std::vector<GlobalVariable *> findVariablesToLower(Module &M,
109  const Function *F) {
110  std::vector<llvm::GlobalVariable *> LocalVars;
111  for (auto &GV : M.globals()) {
112  if (GV.getType()->getPointerAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) {
113  continue;
114  }
115  if (!GV.hasInitializer()) {
116  // addrspace(3) without initializer implies cuda/hip extern __shared__
117  // the semantics for such a variable appears to be that all extern
118  // __shared__ variables alias one another, in which case this transform
119  // is not required
120  continue;
121  }
122  if (!isa<UndefValue>(GV.getInitializer())) {
123  // Initializers are unimplemented for LDS address space.
124  // Leave such variables in place for consistent error reporting.
125  continue;
126  }
127  if (GV.isConstant()) {
128  // A constant undef variable can't be written to, and any load is
129  // undef, so it should be eliminated by the optimizer. It could be
130  // dropped by the back end if not. This pass skips over it.
131  continue;
132  }
133  if (!shouldLowerLDSToStruct(GV, F)) {
134  continue;
135  }
136  LocalVars.push_back(&GV);
137  }
138  return LocalVars;
139 }
140 
142  Instruction *DefInst = Def->getMemoryInst();
143 
144  if (isa<FenceInst>(DefInst))
145  return false;
146 
147  if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(DefInst)) {
148  switch (II->getIntrinsicID()) {
149  case Intrinsic::amdgcn_s_barrier:
150  case Intrinsic::amdgcn_wave_barrier:
151  case Intrinsic::amdgcn_sched_barrier:
152  return false;
153  default:
154  break;
155  }
156  }
157 
158  // Ignore atomics not aliasing with the original load, any atomic is a
159  // universal MemoryDef from MSSA's point of view too, just like a fence.
160  const auto checkNoAlias = [AA, Ptr](auto I) -> bool {
161  return I && AA->isNoAlias(I->getPointerOperand(), Ptr);
162  };
163 
164  if (checkNoAlias(dyn_cast<AtomicCmpXchgInst>(DefInst)) ||
165  checkNoAlias(dyn_cast<AtomicRMWInst>(DefInst)))
166  return false;
167 
168  return true;
169 }
170 
172  AAResults *AA) {
173  MemorySSAWalker *Walker = MSSA->getWalker();
177 
178  LLVM_DEBUG(dbgs() << "Checking clobbering of: " << *Load << '\n');
179 
180  // Start with a nearest dominating clobbering access, it will be either
181  // live on entry (nothing to do, load is not clobbered), MemoryDef, or
182  // MemoryPhi if several MemoryDefs can define this memory state. In that
183  // case add all Defs to WorkList and continue going up and checking all
184  // the definitions of this memory location until the root. When all the
185  // defs are exhausted and came to the entry state we have no clobber.
186  // Along the scan ignore barriers and fences which are considered clobbers
187  // by the MemorySSA, but not really writing anything into the memory.
188  while (!WorkList.empty()) {
189  MemoryAccess *MA = WorkList.pop_back_val();
190  if (!Visited.insert(MA).second)
191  continue;
192 
193  if (MSSA->isLiveOnEntryDef(MA))
194  continue;
195 
196  if (MemoryDef *Def = dyn_cast<MemoryDef>(MA)) {
197  LLVM_DEBUG(dbgs() << " Def: " << *Def->getMemoryInst() << '\n');
198 
199  if (isReallyAClobber(Load->getPointerOperand(), Def, AA)) {
200  LLVM_DEBUG(dbgs() << " -> load is clobbered\n");
201  return true;
202  }
203 
204  WorkList.push_back(
205  Walker->getClobberingMemoryAccess(Def->getDefiningAccess(), Loc));
206  continue;
207  }
208 
209  const MemoryPhi *Phi = cast<MemoryPhi>(MA);
210  for (auto &Use : Phi->incoming_values())
211  WorkList.push_back(cast<MemoryAccess>(&Use));
212  }
213 
214  LLVM_DEBUG(dbgs() << " -> no clobber\n");
215  return false;
216 }
217 
218 } // end namespace AMDGPU
219 
220 } // end namespace llvm
llvm::AMDGPU::findVariablesToLower
std::vector< GlobalVariable * > findVariablesToLower(Module &M, const Function *F)
Definition: AMDGPUMemoryUtils.cpp:108
llvm::MemoryLocation::get
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
Definition: MemoryLocation.cpp:35
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
llvm::tgtok::Def
@ Def
Definition: TGLexer.h:50
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::Value::getPointerAlignment
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition: Value.cpp:915
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
IntrinsicInst.h
llvm::Function
Definition: Function.h:60
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1185
llvm::GlobalVariable
Definition: GlobalVariable.h:39
llvm::SmallSet
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:136
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
llvm::AMDGPU::shouldLowerLDSToStruct
static bool shouldLowerLDSToStruct(const GlobalVariable &GV, const Function *F)
Definition: AMDGPUMemoryUtils.cpp:64
llvm::MemoryPhi
Represents phi nodes for memory accesses.
Definition: MemorySSA.h:493
llvm::MipsISD::Ret
@ Ret
Definition: MipsISelLowering.h:119
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
F
#define F(x, y, z)
Definition: MD5.cpp:55
AliasAnalysis.h
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::SPII::Load
@ Load
Definition: SparcInstrInfo.h:32
AMDGPUMemoryUtils.h
llvm::AAResults
Definition: AliasAnalysis.h:511
llvm::MemorySSA::isLiveOnEntryDef
bool isLiveOnEntryDef(const MemoryAccess *MA) const
Return true if MA represents the live on entry value.
Definition: MemorySSA.h:751
llvm::User
Definition: User.h:44
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::convertConstantExprsToInstructions
void convertConstantExprsToInstructions(Instruction *I, ConstantExpr *CE, SmallPtrSetImpl< Instruction * > *Insts=nullptr)
The given instruction I contains given constant expression CE as one of its operands,...
Definition: ReplaceConstant.cpp:22
AMDGPU
Definition: AMDGPUReplaceLDSUseWithPointer.cpp:114
llvm::AMDGPUAS::LOCAL_ADDRESS
@ LOCAL_ADDRESS
Address space for local memory.
Definition: AMDGPU.h:372
llvm::Instruction
Definition: Instruction.h:42
llvm::AMDGPU::isClobberedInFunction
bool isClobberedInFunction(const LoadInst *Load, MemorySSA *MSSA, AAResults *AA)
Check is a Load is clobbered in its function.
Definition: AMDGPUMemoryUtils.cpp:171
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::AMDGPU::collectFunctionUses
static void collectFunctionUses(User *U, const Function *F, SetVector< Instruction * > &InstUsers)
Definition: AMDGPUMemoryUtils.cpp:35
llvm::MemorySSAWalker::getClobberingMemoryAccess
MemoryAccess * getClobberingMemoryAccess(const Instruction *I)
Given a memory Mod/Ref/ModRef'ing instruction, calling this will give you the nearest dominating Memo...
Definition: MemorySSA.h:1058
llvm::AMDGPU::replaceConstantUsesInFunction
void replaceConstantUsesInFunction(ConstantExpr *C, const Function *F)
Replace all uses of constant C with instructions in F.
Definition: AMDGPUMemoryUtils.cpp:55
llvm::MemorySSA
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:714
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::MemoryDef
Represents a read-write access to memory, whether it is a must-alias, or a may-alias.
Definition: MemorySSA.h:386
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::MemoryPhi::incoming_values
op_range incoming_values()
Definition: MemorySSA.h:536
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
llvm::MemorySSA::getWalker
MemorySSAWalker * getWalker()
Definition: MemorySSA.cpp:1604
llvm::SetVector::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
DataLayout.h
AMDGPU.h
llvm::append_range
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
Definition: STLExtras.h:1823
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::AMDGPU::isReallyAClobber
bool isReallyAClobber(const Value *Ptr, MemoryDef *Def, AAResults *AA)
Given a Def clobbering a load from Ptr according to the MSSA check if this is actually a memory updat...
Definition: AMDGPUMemoryUtils.cpp:141
llvm::SmallSet::insert
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:182
llvm::MemoryAccess
Definition: MemorySSA.h:142
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:305
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:174
llvm::AMDGPU::getAlign
Align getAlign(DataLayout const &DL, const GlobalVariable *GV)
Definition: AMDGPUMemoryUtils.cpp:30
llvm::ConstantExpr
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:972
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:46
AA
MemorySSA.h
Instructions.h
ReplaceConstant.h
llvm::GlobalValue::getValueType
Type * getValueType() const
Definition: GlobalValue.h:278
llvm::MemorySSAWalker
This is the generic walker interface for walkers of MemorySSA.
Definition: MemorySSA.h:1029
llvm::SetVector
A vector that has set insertion semantics.
Definition: SetVector.h:40
llvm::AMDGPU::isKernelCC
bool isKernelCC(const Function *Func)
Definition: AMDGPUBaseInfo.cpp:1713
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:421
llvm::MemoryLocation
Representation for a specific memory location.
Definition: MemoryLocation.h:210
SetVector.h
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
AMDGPUBaseInfo.h
SmallSet.h
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365