LLVM  15.0.0git
AMDGPUPromoteKernelArguments.cpp
Go to the documentation of this file.
1 //===-- AMDGPUPromoteKernelArguments.cpp ----------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This pass recursively promotes generic pointer arguments of a kernel
10 /// into the global address space.
11 ///
12 /// The pass walks kernel's pointer arguments, then loads from them. If a loaded
13 /// value is a pointer and loaded pointer is unmodified in the kernel before the
14 /// load, then promote loaded pointer to global. Then recursively continue.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #include "AMDGPU.h"
20 #include "llvm/ADT/SmallVector.h"
23 #include "llvm/IR/IRBuilder.h"
24 #include "llvm/InitializePasses.h"
25 
26 #define DEBUG_TYPE "amdgpu-promote-kernel-arguments"
27 
28 using namespace llvm;
29 
30 namespace {
31 
32 class AMDGPUPromoteKernelArguments : public FunctionPass {
33  MemorySSA *MSSA;
34 
36 
37  Instruction *ArgCastInsertPt;
38 
40 
41  void enqueueUsers(Value *Ptr);
42 
43  bool promotePointer(Value *Ptr);
44 
45  bool promoteLoad(LoadInst *LI);
46 
47 public:
48  static char ID;
49 
50  AMDGPUPromoteKernelArguments() : FunctionPass(ID) {}
51 
52  bool run(Function &F, MemorySSA &MSSA, AliasAnalysis &AA);
53 
54  bool runOnFunction(Function &F) override;
55 
56  void getAnalysisUsage(AnalysisUsage &AU) const override {
59  AU.setPreservesAll();
60  }
61 };
62 
63 } // end anonymous namespace
64 
65 void AMDGPUPromoteKernelArguments::enqueueUsers(Value *Ptr) {
66  SmallVector<User *> PtrUsers(Ptr->users());
67 
68  while (!PtrUsers.empty()) {
69  Instruction *U = dyn_cast<Instruction>(PtrUsers.pop_back_val());
70  if (!U)
71  continue;
72 
73  switch (U->getOpcode()) {
74  default:
75  break;
76  case Instruction::Load: {
77  LoadInst *LD = cast<LoadInst>(U);
78  if (LD->getPointerOperand()->stripInBoundsOffsets() == Ptr &&
80  Ptrs.push_back(LD);
81 
82  break;
83  }
84  case Instruction::GetElementPtr:
85  case Instruction::AddrSpaceCast:
86  case Instruction::BitCast:
87  if (U->getOperand(0)->stripInBoundsOffsets() == Ptr)
88  PtrUsers.append(U->user_begin(), U->user_end());
89  break;
90  }
91  }
92 }
93 
94 bool AMDGPUPromoteKernelArguments::promotePointer(Value *Ptr) {
95  bool Changed = false;
96 
97  LoadInst *LI = dyn_cast<LoadInst>(Ptr);
98  if (LI)
99  Changed |= promoteLoad(LI);
100 
101  PointerType *PT = dyn_cast<PointerType>(Ptr->getType());
102  if (!PT)
103  return Changed;
104 
108  enqueueUsers(Ptr);
109 
111  return Changed;
112 
113  IRBuilder<> B(LI ? &*std::next(cast<Instruction>(Ptr)->getIterator())
114  : ArgCastInsertPt);
115 
116  // Cast pointer to global address space and back to flat and let
117  // Infer Address Spaces pass to do all necessary rewriting.
118  PointerType *NewPT =
120  Value *Cast =
121  B.CreateAddrSpaceCast(Ptr, NewPT, Twine(Ptr->getName(), ".global"));
122  Value *CastBack =
123  B.CreateAddrSpaceCast(Cast, PT, Twine(Ptr->getName(), ".flat"));
124  Ptr->replaceUsesWithIf(CastBack,
125  [Cast](Use &U) { return U.getUser() != Cast; });
126 
127  return true;
128 }
129 
130 bool AMDGPUPromoteKernelArguments::promoteLoad(LoadInst *LI) {
131  if (!LI->isSimple())
132  return false;
133 
134  LI->setMetadata("amdgpu.noclobber", MDNode::get(LI->getContext(), {}));
135  return true;
136 }
137 
138 // skip allocas
140  BasicBlock::iterator InsPt = BB.getFirstInsertionPt();
141  for (BasicBlock::iterator E = BB.end(); InsPt != E; ++InsPt) {
142  AllocaInst *AI = dyn_cast<AllocaInst>(&*InsPt);
143 
144  // If this is a dynamic alloca, the value may depend on the loaded kernargs,
145  // so loads will need to be inserted before it.
146  if (!AI || !AI->isStaticAlloca())
147  break;
148  }
149 
150  return InsPt;
151 }
152 
154  AliasAnalysis &AA) {
155  if (skipFunction(F))
156  return false;
157 
158  CallingConv::ID CC = F.getCallingConv();
159  if (CC != CallingConv::AMDGPU_KERNEL || F.arg_empty())
160  return false;
161 
162  ArgCastInsertPt = &*getInsertPt(*F.begin());
163  this->MSSA = &MSSA;
164  this->AA = &AA;
165 
166  for (Argument &Arg : F.args()) {
167  if (Arg.use_empty())
168  continue;
169 
170  PointerType *PT = dyn_cast<PointerType>(Arg.getType());
171  if (!PT || (PT->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS &&
174  continue;
175 
176  Ptrs.push_back(&Arg);
177  }
178 
179  bool Changed = false;
180  while (!Ptrs.empty()) {
181  Value *Ptr = Ptrs.pop_back_val();
182  Changed |= promotePointer(Ptr);
183  }
184 
185  return Changed;
186 }
187 
189  MemorySSA &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
190  AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
191  return run(F, MSSA, AA);
192 }
193 
194 INITIALIZE_PASS_BEGIN(AMDGPUPromoteKernelArguments, DEBUG_TYPE,
195  "AMDGPU Promote Kernel Arguments", false, false)
198 INITIALIZE_PASS_END(AMDGPUPromoteKernelArguments, DEBUG_TYPE,
199  "AMDGPU Promote Kernel Arguments", false, false)
200 
201 char AMDGPUPromoteKernelArguments::ID = 0;
202 
204  return new AMDGPUPromoteKernelArguments();
205 }
206 
210  MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
212  if (AMDGPUPromoteKernelArguments().run(F, MSSA, AA)) {
214  PA.preserveSet<CFGAnalyses>();
216  return PA;
217  }
218  return PreservedAnalyses::all();
219 }
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
llvm::Argument
This class represents an incoming formal argument to a Function.
Definition: Argument.h:28
llvm::AAManager
A manager for alias analyses.
Definition: AliasAnalysis.h:1303
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:87
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:780
llvm::Function
Definition: Function.h:60
llvm::ARM_MB::LD
@ LD
Definition: ARMBaseInfo.h:72
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1185
llvm::IRBuilder<>
llvm::PointerType::getAddressSpace
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:682
getInsertPt
static BasicBlock::iterator getInsertPt(BasicBlock &BB)
Definition: AMDGPUPromoteKernelArguments.cpp:139
llvm::Value::user_begin
user_iterator user_begin()
Definition: Value.h:397
llvm::MDNode::get
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1300
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::Instruction::setMetadata
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1366
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
AliasAnalysis.h
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:186
llvm::SPII::Load
@ Load
Definition: SparcInstrInfo.h:32
llvm::AllocaInst::isStaticAlloca
bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size.
Definition: Instructions.cpp:1419
llvm::Instruction::getOpcode
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:157
AMDGPUMemoryUtils.h
llvm::MemorySSAWrapperPass
Legacy analysis pass which computes MemorySSA.
Definition: MemorySSA.h:998
llvm::AAResults
Definition: AliasAnalysis.h:511
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
false
Definition: StackSlotColoring.cpp:141
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
AMDGPU
Definition: AMDGPUReplaceLDSUseWithPointer.cpp:114
llvm::Instruction
Definition: Instruction.h:42
llvm::AMDGPU::isClobberedInFunction
bool isClobberedInFunction(const LoadInst *Load, MemorySSA *MSSA, AAResults *AA)
Check is a Load is clobbered in its function.
Definition: AMDGPUMemoryUtils.cpp:171
llvm::Use::getUser
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:72
Arguments
AMDGPU Promote Kernel Arguments
Definition: AMDGPUPromoteKernelArguments.cpp:199
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::dxil::PointerTypeAnalysis::run
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
Definition: PointerTypeAnalysis.cpp:101
llvm::Value::user_end
user_iterator user_end()
Definition: Value.h:405
DEBUG_TYPE
#define DEBUG_TYPE
Definition: AMDGPUPromoteKernelArguments.cpp:26
llvm::PreservedAnalyses::preserve
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:173
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::MemorySSA
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:714
llvm::AMDGPUAS::CONSTANT_ADDRESS
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
Definition: AMDGPU.h:362
llvm::PointerType
Class to represent pointers.
Definition: DerivedTypes.h:632
llvm::AMDGPUPromoteKernelArgumentsPass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Definition: AMDGPUPromoteKernelArguments.cpp:208
IRBuilder.h
llvm::MemorySSAAnalysis
An analysis that produces MemorySSA for a function.
Definition: MemorySSA.h:948
llvm::AMDGPUAS::GLOBAL_ADDRESS
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:359
AMDGPU.h
llvm::LoadInst::isSimple
bool isSimple() const
Definition: Instructions.h:252
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(AMDGPUPromoteKernelArguments, DEBUG_TYPE, "AMDGPU Promote Kernel Arguments", false, false) INITIALIZE_PASS_END(AMDGPUPromoteKernelArguments
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::CFGAnalyses
Represents analyses that only rely on functions' control flow.
Definition: PassManager.h:113
llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:991
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:305
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:173
llvm::Value::stripInBoundsOffsets
const Value * stripInBoundsOffsets(function_ref< void(const Value *)> Func=[](const Value *) {}) const
Strip off pointer casts and inbounds GEPs.
Definition: Value.cpp:774
llvm::CallingConv::AMDGPU_KERNEL
@ AMDGPU_KERNEL
Calling convention for AMDGPU code object kernels.
Definition: CallingConv.h:216
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:83
llvm::AnalysisUsage::setPreservesAll
void setPreservesAll()
Set by analyses that do not transform their input at all.
Definition: PassAnalysisSupport.h:130
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
llvm::createAMDGPUPromoteKernelArgumentsPass
FunctionPass * createAMDGPUPromoteKernelArgumentsPass()
Definition: AMDGPUPromoteKernelArguments.cpp:203
AA
MemorySSA.h
llvm::PreservedAnalyses::preserveSet
void preserveSet()
Mark an analysis set as preserved.
Definition: PassManager.h:188
SmallVector.h
llvm::PointerType::getWithSamePointeeType
static PointerType * getWithSamePointeeType(PointerType *PT, unsigned AddressSpace)
This constructs a pointer type with the same pointee type as input PointerType (or opaque pointer if ...
Definition: DerivedTypes.h:666
llvm::AMDGPUAS::FLAT_ADDRESS
@ FLAT_ADDRESS
Address space for flat memory.
Definition: AMDGPU.h:358
llvm::AAResultsWrapperPass
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Definition: AliasAnalysis.h:1351
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:42
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::Value::replaceUsesWithIf
void replaceUsesWithIf(Value *New, llvm::function_ref< bool(Use &U)> ShouldReplace)
Go through the uses list for this definition and make each use point to "V" if the callback ShouldRep...
Definition: Value.cpp:537
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::AllocaInst
an instruction to allocate memory on the stack
Definition: Instructions.h:58
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
InitializePasses.h
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:421
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38