Line data Source code
1 : //===-- AMDGPUAnnotateUniformValues.cpp - ---------------------------------===//
2 : //
3 : // The LLVM Compiler Infrastructure
4 : //
5 : // This file is distributed under the University of Illinois Open Source
6 : // License. See LICENSE.TXT for details.
7 : //
8 : //===----------------------------------------------------------------------===//
9 : //
10 : /// \file
11 : /// This pass adds amdgpu.uniform metadata to IR values so this information
12 : /// can be used during instruction selection.
13 : //
14 : //===----------------------------------------------------------------------===//
15 :
16 : #include "AMDGPU.h"
17 : #include "AMDGPUIntrinsicInfo.h"
18 : #include "llvm/ADT/SetVector.h"
19 : #include "llvm/Analysis/LegacyDivergenceAnalysis.h"
20 : #include "llvm/Analysis/LoopInfo.h"
21 : #include "llvm/Analysis/MemoryDependenceAnalysis.h"
22 : #include "llvm/IR/IRBuilder.h"
23 : #include "llvm/IR/InstVisitor.h"
24 : #include "llvm/Support/Debug.h"
25 : #include "llvm/Support/raw_ostream.h"
26 :
27 : #define DEBUG_TYPE "amdgpu-annotate-uniform"
28 :
29 : using namespace llvm;
30 :
31 : namespace {
32 :
33 : class AMDGPUAnnotateUniformValues : public FunctionPass,
34 : public InstVisitor<AMDGPUAnnotateUniformValues> {
35 : LegacyDivergenceAnalysis *DA;
36 : MemoryDependenceResults *MDR;
37 : LoopInfo *LI;
38 : DenseMap<Value*, GetElementPtrInst*> noClobberClones;
39 : bool isKernelFunc;
40 :
41 : public:
42 : static char ID;
43 1964 : AMDGPUAnnotateUniformValues() :
44 1964 : FunctionPass(ID) { }
45 : bool doInitialization(Module &M) override;
46 : bool runOnFunction(Function &F) override;
47 0 : StringRef getPassName() const override {
48 0 : return "AMDGPU Annotate Uniform Values";
49 : }
50 1954 : void getAnalysisUsage(AnalysisUsage &AU) const override {
51 : AU.addRequired<LegacyDivergenceAnalysis>();
52 : AU.addRequired<MemoryDependenceWrapperPass>();
53 : AU.addRequired<LoopInfoWrapperPass>();
54 : AU.setPreservesAll();
55 1954 : }
56 :
57 : void visitBranchInst(BranchInst &I);
58 : void visitLoadInst(LoadInst &I);
59 : bool isClobberedInFunction(LoadInst * Load);
60 : };
61 :
62 : } // End anonymous namespace
63 :
64 85105 : INITIALIZE_PASS_BEGIN(AMDGPUAnnotateUniformValues, DEBUG_TYPE,
65 : "Add AMDGPU uniform metadata", false, false)
66 85105 : INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
67 85105 : INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)
68 85105 : INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
69 199024 : INITIALIZE_PASS_END(AMDGPUAnnotateUniformValues, DEBUG_TYPE,
70 : "Add AMDGPU uniform metadata", false, false)
71 :
72 : char AMDGPUAnnotateUniformValues::ID = 0;
73 :
74 28891 : static void setUniformMetadata(Instruction *I) {
75 57782 : I->setMetadata("amdgpu.uniform", MDNode::get(I->getContext(), {}));
76 28891 : }
77 26477 : static void setNoClobberMetadata(Instruction *I) {
78 52954 : I->setMetadata("amdgpu.noclobber", MDNode::get(I->getContext(), {}));
79 26477 : }
80 :
81 29855 : static void DFS(BasicBlock *Root, SetVector<BasicBlock*> & Set) {
82 30570 : for (auto I : predecessors(Root))
83 715 : if (Set.insert(I))
84 572 : DFS(I, Set);
85 29855 : }
86 :
87 0 : bool AMDGPUAnnotateUniformValues::isClobberedInFunction(LoadInst * Load) {
88 : // 1. get Loop for the Load->getparent();
89 : // 2. if it exists, collect all the BBs from the most outer
90 : // loop and check for the writes. If NOT - start DFS over all preds.
91 : // 3. Start DFS over all preds from the most outer loop header.
92 0 : SetVector<BasicBlock *> Checklist;
93 0 : BasicBlock *Start = Load->getParent();
94 0 : Checklist.insert(Start);
95 : const Value *Ptr = Load->getPointerOperand();
96 0 : const Loop *L = LI->getLoopFor(Start);
97 0 : if (L) {
98 : const Loop *P = L;
99 : do {
100 : L = P;
101 0 : P = P->getParentLoop();
102 0 : } while (P);
103 0 : Checklist.insert(L->block_begin(), L->block_end());
104 0 : Start = L->getHeader();
105 : }
106 :
107 0 : DFS(Start, Checklist);
108 0 : for (auto &BB : Checklist) {
109 0 : BasicBlock::iterator StartIt = (!L && (BB == Load->getParent())) ?
110 0 : BasicBlock::iterator(Load) : BB->end();
111 0 : auto Q = MDR->getPointerDependencyFrom(MemoryLocation(Ptr), true,
112 0 : StartIt, BB, Load);
113 0 : if (Q.isClobber() || Q.isUnknown())
114 0 : return true;
115 : }
116 : return false;
117 : }
118 :
119 0 : void AMDGPUAnnotateUniformValues::visitBranchInst(BranchInst &I) {
120 0 : if (I.isUnconditional())
121 0 : return;
122 :
123 : Value *Cond = I.getCondition();
124 0 : if (!DA->isUniform(Cond))
125 0 : return;
126 :
127 0 : setUniformMetadata(I.getParent()->getTerminator());
128 : }
129 :
130 34460 : void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) {
131 34460 : Value *Ptr = I.getPointerOperand();
132 34460 : if (!DA->isUniform(Ptr))
133 4466 : return;
134 : auto isGlobalLoad = [&](LoadInst &Load)->bool {
135 : return Load.getPointerAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
136 : };
137 : // We're tracking up to the Function boundaries
138 : // We cannot go beyond because of FunctionPass restrictions
139 : // Thus we can ensure that memory not clobbered for memory
140 : // operations that live in kernel only.
141 29994 : bool NotClobbered = isKernelFunc && !isClobberedInFunction(&I);
142 29994 : Instruction *PtrI = dyn_cast<Instruction>(Ptr);
143 30729 : if (!PtrI && NotClobbered && isGlobalLoad(I)) {
144 375 : if (isa<Argument>(Ptr) || isa<GlobalValue>(Ptr)) {
145 : // Lookup for the existing GEP
146 76 : if (noClobberClones.count(Ptr)) {
147 0 : PtrI = noClobberClones[Ptr];
148 : } else {
149 : // Create GEP of the Value
150 76 : Function *F = I.getParent()->getParent();
151 76 : Value *Idx = Constant::getIntegerValue(
152 152 : Type::getInt32Ty(Ptr->getContext()), APInt(64, 0));
153 : // Insert GEP at the entry to make it dominate all uses
154 152 : PtrI = GetElementPtrInst::Create(
155 : Ptr->getType()->getPointerElementType(), Ptr,
156 76 : ArrayRef<Value*>(Idx), Twine(""), F->getEntryBlock().getFirstNonPHI());
157 : }
158 76 : I.replaceUsesOfWith(Ptr, PtrI);
159 : }
160 : }
161 :
162 29994 : if (PtrI) {
163 28316 : setUniformMetadata(PtrI);
164 28316 : if (NotClobbered)
165 26477 : setNoClobberMetadata(PtrI);
166 : }
167 : }
168 :
169 1953 : bool AMDGPUAnnotateUniformValues::doInitialization(Module &M) {
170 1953 : return false;
171 : }
172 :
173 19771 : bool AMDGPUAnnotateUniformValues::runOnFunction(Function &F) {
174 19771 : if (skipFunction(F))
175 : return false;
176 :
177 19767 : DA = &getAnalysis<LegacyDivergenceAnalysis>();
178 19767 : MDR = &getAnalysis<MemoryDependenceWrapperPass>().getMemDep();
179 19767 : LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
180 19767 : isKernelFunc = F.getCallingConv() == CallingConv::AMDGPU_KERNEL;
181 :
182 : visit(F);
183 19767 : noClobberClones.clear();
184 19767 : return true;
185 : }
186 :
187 : FunctionPass *
188 1964 : llvm::createAMDGPUAnnotateUniformValues() {
189 1964 : return new AMDGPUAnnotateUniformValues();
190 : }
|