Line data Source code
1 : //===-- AMDGPUAlwaysInlinePass.cpp - Promote Allocas ----------------------===//
2 : //
3 : // The LLVM Compiler Infrastructure
4 : //
5 : // This file is distributed under the University of Illinois Open Source
6 : // License. See LICENSE.TXT for details.
7 : //
8 : //===----------------------------------------------------------------------===//
9 : //
10 : /// \file
11 : /// This pass marks all internal functions as always_inline and creates
12 : /// duplicates of all other functions and marks the duplicates as always_inline.
13 : //
14 : //===----------------------------------------------------------------------===//
15 :
16 : #include "AMDGPU.h"
17 : #include "AMDGPUTargetMachine.h"
18 : #include "Utils/AMDGPUBaseInfo.h"
19 : #include "llvm/ADT/SmallPtrSet.h"
20 : #include "llvm/IR/Module.h"
21 : #include "llvm/Transforms/Utils/Cloning.h"
22 :
23 : using namespace llvm;
24 :
25 : namespace {
26 :
27 : static cl::opt<bool> StressCalls(
28 : "amdgpu-stress-function-calls",
29 : cl::Hidden,
30 : cl::desc("Force all functions to be noinline"),
31 : cl::init(false));
32 :
33 : class AMDGPUAlwaysInline : public ModulePass {
34 : bool GlobalOpt;
35 :
36 : void recursivelyVisitUsers(GlobalValue &GV,
37 : SmallPtrSetImpl<Function *> &FuncsToAlwaysInline);
38 : public:
39 : static char ID;
40 :
41 2250 : AMDGPUAlwaysInline(bool GlobalOpt = false) :
42 2250 : ModulePass(ID), GlobalOpt(GlobalOpt) { }
43 : bool runOnModule(Module &M) override;
44 :
45 2233 : void getAnalysisUsage(AnalysisUsage &AU) const override {
46 : AU.setPreservesAll();
47 2233 : }
48 : };
49 :
50 : } // End anonymous namespace
51 :
52 199024 : INITIALIZE_PASS(AMDGPUAlwaysInline, "amdgpu-always-inline",
53 : "AMDGPU Inline All Functions", false, false)
54 :
55 : char AMDGPUAlwaysInline::ID = 0;
56 :
57 0 : void AMDGPUAlwaysInline::recursivelyVisitUsers(
58 : GlobalValue &GV,
59 : SmallPtrSetImpl<Function *> &FuncsToAlwaysInline) {
60 : SmallVector<User *, 16> Stack;
61 :
62 : SmallPtrSet<const Value *, 8> Visited;
63 :
64 0 : for (User *U : GV.users())
65 0 : Stack.push_back(U);
66 :
67 0 : while (!Stack.empty()) {
68 : User *U = Stack.pop_back_val();
69 0 : if (!Visited.insert(U).second)
70 0 : continue;
71 :
72 : if (Instruction *I = dyn_cast<Instruction>(U)) {
73 0 : Function *F = I->getParent()->getParent();
74 0 : if (!AMDGPU::isEntryFunctionCC(F->getCallingConv())) {
75 0 : FuncsToAlwaysInline.insert(F);
76 0 : Stack.push_back(F);
77 : }
78 :
79 : // No need to look at further users, but we do need to inline any callers.
80 0 : continue;
81 : }
82 :
83 0 : for (User *UU : U->users())
84 0 : Stack.push_back(UU);
85 : }
86 0 : }
87 :
88 2232 : bool AMDGPUAlwaysInline::runOnModule(Module &M) {
89 : std::vector<GlobalAlias*> AliasesToRemove;
90 :
91 : SmallPtrSet<Function *, 8> FuncsToAlwaysInline;
92 : SmallPtrSet<Function *, 8> FuncsToNoInline;
93 :
94 2241 : for (GlobalAlias &A : M.aliases()) {
95 : if (Function* F = dyn_cast<Function>(A.getAliasee())) {
96 7 : A.replaceAllUsesWith(F);
97 7 : AliasesToRemove.push_back(&A);
98 : }
99 :
100 : // FIXME: If the aliasee isn't a function, it's some kind of constant expr
101 : // cast that won't be inlined through.
102 : }
103 :
104 2232 : if (GlobalOpt) {
105 2234 : for (GlobalAlias* A : AliasesToRemove) {
106 6 : A->eraseFromParent();
107 : }
108 : }
109 :
110 : // Always force inlining of any function that uses an LDS global address. This
111 : // is something of a workaround because we don't have a way of supporting LDS
112 : // objects defined in functions. LDS is always allocated by a kernel, and it
113 : // is difficult to manage LDS usage if a function may be used by multiple
114 : // kernels.
115 : //
116 : // OpenCL doesn't allow declaring LDS in non-kernels, so in practice this
117 : // should only appear when IPO passes manages to move LDs defined in a kernel
118 : // into a single user function.
119 :
120 2545 : for (GlobalVariable &GV : M.globals()) {
121 : // TODO: Region address
122 : unsigned AS = GV.getType()->getAddressSpace();
123 313 : if (AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS::REGION_ADDRESS)
124 : continue;
125 :
126 175 : recursivelyVisitUsers(GV, FuncsToAlwaysInline);
127 : }
128 :
129 2232 : if (!AMDGPUTargetMachine::EnableFunctionCalls || StressCalls) {
130 : auto IncompatAttr
131 2231 : = StressCalls ? Attribute::AlwaysInline : Attribute::NoInline;
132 :
133 28235 : for (Function &F : M) {
134 26187 : if (!F.isDeclaration() && !F.use_empty() &&
135 : !F.hasFnAttribute(IncompatAttr)) {
136 53 : if (StressCalls) {
137 6 : if (!FuncsToAlwaysInline.count(&F))
138 2 : FuncsToNoInline.insert(&F);
139 : } else
140 47 : FuncsToAlwaysInline.insert(&F);
141 : }
142 : }
143 : }
144 :
145 2302 : for (Function *F : FuncsToAlwaysInline)
146 : F->addFnAttr(Attribute::AlwaysInline);
147 :
148 2234 : for (Function *F : FuncsToNoInline)
149 : F->addFnAttr(Attribute::NoInline);
150 :
151 2232 : return !FuncsToAlwaysInline.empty() || !FuncsToNoInline.empty();
152 : }
153 :
154 2247 : ModulePass *llvm::createAMDGPUAlwaysInlinePass(bool GlobalOpt) {
155 2247 : return new AMDGPUAlwaysInline(GlobalOpt);
156 : }
157 :
|