LLVM  14.0.0git
AMDGPUPromoteKernelArguments.cpp
Go to the documentation of this file.
1 //===-- AMDGPUPromoteKernelArguments.cpp ----------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This pass recursively promotes generic pointer arguments of a kernel
10 /// into the global address space.
11 ///
12 /// The pass walks kernel's pointer arguments, then loads from them. If a loaded
13 /// value is a pointer and loaded pointer is unmodified in the kernel before the
14 /// load, then promote loaded pointer to global. Then recursively continue.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #include "AMDGPU.h"
19 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/IR/IRBuilder.h"
22 #include "llvm/InitializePasses.h"
23 
24 #define DEBUG_TYPE "amdgpu-promote-kernel-arguments"
25 
26 using namespace llvm;
27 
28 namespace {
29 
30 class AMDGPUPromoteKernelArguments : public FunctionPass {
31  MemorySSA *MSSA;
32 
33  Instruction *ArgCastInsertPt;
34 
36 
37  void enqueueUsers(Value *Ptr);
38 
39  bool promotePointer(Value *Ptr);
40 
41 public:
42  static char ID;
43 
44  AMDGPUPromoteKernelArguments() : FunctionPass(ID) {}
45 
46  bool run(Function &F, MemorySSA &MSSA);
47 
48  bool runOnFunction(Function &F) override;
49 
50  void getAnalysisUsage(AnalysisUsage &AU) const override {
52  AU.setPreservesAll();
53  }
54 };
55 
56 } // end anonymous namespace
57 
58 void AMDGPUPromoteKernelArguments::enqueueUsers(Value *Ptr) {
59  SmallVector<User *> PtrUsers(Ptr->users());
60 
61  while (!PtrUsers.empty()) {
62  Instruction *U = dyn_cast<Instruction>(PtrUsers.pop_back_val());
63  if (!U)
64  continue;
65 
66  switch (U->getOpcode()) {
67  default:
68  break;
69  case Instruction::Load: {
70  LoadInst *LD = cast<LoadInst>(U);
71  PointerType *PT = dyn_cast<PointerType>(LD->getType());
72  if (!PT ||
76  LD->getPointerOperand()->stripInBoundsOffsets() != Ptr)
77  break;
78  const MemoryAccess *MA = MSSA->getWalker()->getClobberingMemoryAccess(LD);
79  // TODO: This load poprobably can be promoted to constant address space.
80  if (MSSA->isLiveOnEntryDef(MA))
81  Ptrs.push_back(LD);
82  break;
83  }
84  case Instruction::GetElementPtr:
85  case Instruction::AddrSpaceCast:
86  case Instruction::BitCast:
87  if (U->getOperand(0)->stripInBoundsOffsets() == Ptr)
88  PtrUsers.append(U->user_begin(), U->user_end());
89  break;
90  }
91  }
92 }
93 
94 bool AMDGPUPromoteKernelArguments::promotePointer(Value *Ptr) {
95  enqueueUsers(Ptr);
96 
97  PointerType *PT = cast<PointerType>(Ptr->getType());
99  return false;
100 
101  bool IsArg = isa<Argument>(Ptr);
102  IRBuilder<> B(IsArg ? ArgCastInsertPt
103  : &*std::next(cast<Instruction>(Ptr)->getIterator()));
104 
105  // Cast pointer to global address space and back to flat and let
106  // Infer Address Spaces pass to do all necessary rewriting.
107  PointerType *NewPT =
109  Value *Cast =
110  B.CreateAddrSpaceCast(Ptr, NewPT, Twine(Ptr->getName(), ".global"));
111  Value *CastBack =
112  B.CreateAddrSpaceCast(Cast, PT, Twine(Ptr->getName(), ".flat"));
113  Ptr->replaceUsesWithIf(CastBack,
114  [Cast](Use &U) { return U.getUser() != Cast; });
115 
116  return true;
117 }
118 
119 // skip allocas
121  BasicBlock::iterator InsPt = BB.getFirstInsertionPt();
122  for (BasicBlock::iterator E = BB.end(); InsPt != E; ++InsPt) {
123  AllocaInst *AI = dyn_cast<AllocaInst>(&*InsPt);
124 
125  // If this is a dynamic alloca, the value may depend on the loaded kernargs,
126  // so loads will need to be inserted before it.
127  if (!AI || !AI->isStaticAlloca())
128  break;
129  }
130 
131  return InsPt;
132 }
133 
134 bool AMDGPUPromoteKernelArguments::run(Function &F, MemorySSA &MSSA) {
135  if (skipFunction(F))
136  return false;
137 
138  CallingConv::ID CC = F.getCallingConv();
139  if (CC != CallingConv::AMDGPU_KERNEL || F.arg_empty())
140  return false;
141 
142  ArgCastInsertPt = &*getInsertPt(*F.begin());
143  this->MSSA = &MSSA;
144 
145  for (Argument &Arg : F.args()) {
146  if (Arg.use_empty())
147  continue;
148 
149  PointerType *PT = dyn_cast<PointerType>(Arg.getType());
150  if (!PT || (PT->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS &&
153  continue;
154 
155  Ptrs.push_back(&Arg);
156  }
157 
158  bool Changed = false;
159  while (!Ptrs.empty()) {
160  Value *Ptr = Ptrs.pop_back_val();
161  Changed |= promotePointer(Ptr);
162  }
163 
164  return Changed;
165 }
166 
168  MemorySSA &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
169  return run(F, MSSA);
170 }
171 
172 INITIALIZE_PASS_BEGIN(AMDGPUPromoteKernelArguments, DEBUG_TYPE,
173  "AMDGPU Promote Kernel Arguments", false, false)
175 INITIALIZE_PASS_END(AMDGPUPromoteKernelArguments, DEBUG_TYPE,
176  "AMDGPU Promote Kernel Arguments", false, false)
177 
178 char AMDGPUPromoteKernelArguments::ID = 0;
179 
181  return new AMDGPUPromoteKernelArguments();
182 }
183 
187  MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
188  if (AMDGPUPromoteKernelArguments().run(F, MSSA)) {
190  PA.preserveSet<CFGAnalyses>();
192  return PA;
193  }
194  return PreservedAnalyses::all();
195 }
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
llvm::Argument
This class represents an incoming formal argument to a Function.
Definition: Argument.h:29
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AllocatorList.h:23
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:90
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:783
llvm::Function
Definition: Function.h:62
llvm::ARM_MB::LD
@ LD
Definition: ARMBaseInfo.h:72
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
llvm::IRBuilder<>
llvm::PointerType::getAddressSpace
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:687
getInsertPt
static BasicBlock::iterator getInsertPt(BasicBlock &BB)
Definition: AMDGPUPromoteKernelArguments.cpp:120
llvm::Value::user_begin
user_iterator user_begin()
Definition: Value.h:397
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:206
llvm::AllocaInst::isStaticAlloca
bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size.
Definition: Instructions.cpp:1397
llvm::Instruction::getOpcode
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:160
llvm::MemorySSAWrapperPass
Legacy analysis pass which computes MemorySSA.
Definition: MemorySSA.h:981
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::AMDGPUAS::GLOBAL_ADDRESS
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:359
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
false
Definition: StackSlotColoring.cpp:142
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::Instruction
Definition: Instruction.h:45
llvm::Use::getUser
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:73
Arguments
AMDGPU Promote Kernel Arguments
Definition: AMDGPUPromoteKernelArguments.cpp:176
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::Value::user_end
user_iterator user_end()
Definition: Value.h:405
llvm::omp::Kernel
Function * Kernel
Summary of a kernel (=entry point for target offloading).
Definition: OpenMPOpt.h:21
DEBUG_TYPE
#define DEBUG_TYPE
Definition: AMDGPUPromoteKernelArguments.cpp:24
llvm::PreservedAnalyses::preserve
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:176
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::MemorySSA
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:705
llvm::PointerType
Class to represent pointers.
Definition: DerivedTypes.h:632
llvm::AMDGPUPromoteKernelArgumentsPass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Definition: AMDGPUPromoteKernelArguments.cpp:185
IRBuilder.h
llvm::AMDGPUAS::CONSTANT_ADDRESS
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
Definition: AMDGPU.h:362
llvm::MemorySSAAnalysis
An analysis that produces MemorySSA for a function.
Definition: MemorySSA.h:931
llvm::SPII::Load
@ Load
Definition: SparcInstrInfo.h:32
AMDGPU.h
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(AMDGPUPromoteKernelArguments, DEBUG_TYPE, "AMDGPU Promote Kernel Arguments", false, false) INITIALIZE_PASS_END(AMDGPUPromoteKernelArguments
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::CFGAnalyses
Represents analyses that only rely on functions' control flow.
Definition: PassManager.h:116
llvm::MemoryAccess
Definition: MemorySSA.h:137
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:175
llvm::Value::stripInBoundsOffsets
const Value * stripInBoundsOffsets(function_ref< void(const Value *)> Func=[](const Value *) {}) const
Strip off pointer casts and inbounds GEPs.
Definition: Value.cpp:777
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:83
llvm::AnalysisUsage::setPreservesAll
void setPreservesAll()
Set by analyses that do not transform their input at all.
Definition: PassAnalysisSupport.h:130
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
llvm::createAMDGPUPromoteKernelArgumentsPass
FunctionPass * createAMDGPUPromoteKernelArgumentsPass()
Definition: AMDGPUPromoteKernelArguments.cpp:180
MemorySSA.h
llvm::PreservedAnalyses::preserveSet
void preserveSet()
Mark an analysis set as preserved.
Definition: PassManager.h:191
SmallVector.h
llvm::PointerType::getWithSamePointeeType
static PointerType * getWithSamePointeeType(PointerType *PT, unsigned AddressSpace)
This constructs a pointer type with the same pointee type as input PointerType (or opaque pointer is ...
Definition: DerivedTypes.h:666
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::Value::replaceUsesWithIf
void replaceUsesWithIf(Value *New, llvm::function_ref< bool(Use &U)> ShouldReplace)
Go through the uses list for this definition and make each use point to "V" if the callback ShouldRep...
Definition: Value.cpp:540
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::AllocaInst
an instruction to allocate memory on the stack
Definition: Instructions.h:62
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
llvm::CallingConv::AMDGPU_KERNEL
@ AMDGPU_KERNEL
Calling convention for AMDGPU code object kernels.
Definition: CallingConv.h:216
InitializePasses.h
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:421
llvm::AMDGPUAS::FLAT_ADDRESS
@ FLAT_ADDRESS
Address space for flat memory.
Definition: AMDGPU.h:358
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:44
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38