File: | lib/Transforms/Scalar/TailRecursionElimination.cpp |
Location: | line 597, column 14 |
Description: | Called C++ object pointer is null |
1 | //===- TailRecursionElimination.cpp - Eliminate Tail Calls ----------------===// | |||
2 | // | |||
3 | // The LLVM Compiler Infrastructure | |||
4 | // | |||
5 | // This file is distributed under the University of Illinois Open Source | |||
6 | // License. See LICENSE.TXT for details. | |||
7 | // | |||
8 | //===----------------------------------------------------------------------===// | |||
9 | // | |||
10 | // This file transforms calls of the current function (self recursion) followed | |||
11 | // by a return instruction with a branch to the entry of the function, creating | |||
12 | // a loop. This pass also implements the following extensions to the basic | |||
13 | // algorithm: | |||
14 | // | |||
15 | // 1. Trivial instructions between the call and return do not prevent the | |||
16 | // transformation from taking place, though currently the analysis cannot | |||
17 | // support moving any really useful instructions (only dead ones). | |||
18 | // 2. This pass transforms functions that are prevented from being tail | |||
19 | // recursive by an associative and commutative expression to use an | |||
20 | // accumulator variable, thus compiling the typical naive factorial or | |||
21 | // 'fib' implementation into efficient code. | |||
22 | // 3. TRE is performed if the function returns void, if the return | |||
23 | // returns the result returned by the call, or if the function returns a | |||
24 | // run-time constant on all exits from the function. It is possible, though | |||
25 | // unlikely, that the return returns something else (like constant 0), and | |||
26 | // can still be TRE'd. It can be TRE'd if ALL OTHER return instructions in | |||
27 | // the function return the exact same value. | |||
28 | // 4. If it can prove that callees do not access their caller stack frame, | |||
29 | // they are marked as eligible for tail call elimination (by the code | |||
30 | // generator). | |||
31 | // | |||
32 | // There are several improvements that could be made: | |||
33 | // | |||
34 | // 1. If the function has any alloca instructions, these instructions will be | |||
35 | // moved out of the entry block of the function, causing them to be | |||
36 | // evaluated each time through the tail recursion. Safely keeping allocas | |||
37 | // in the entry block requires analysis to proves that the tail-called | |||
38 | // function does not read or write the stack object. | |||
39 | // 2. Tail recursion is only performed if the call immediately precedes the | |||
40 | // return instruction. It's possible that there could be a jump between | |||
41 | // the call and the return. | |||
42 | // 3. There can be intervening operations between the call and the return that | |||
43 | // prevent the TRE from occurring. For example, there could be GEP's and | |||
44 | // stores to memory that will not be read or written by the call. This | |||
45 | // requires some substantial analysis (such as with DSA) to prove safe to | |||
46 | // move ahead of the call, but doing so could allow many more TREs to be | |||
47 | // performed, for example in TreeAdd/TreeAlloc from the treeadd benchmark. | |||
48 | // 4. The algorithm we use to detect if callees access their caller stack | |||
49 | // frames is very primitive. | |||
50 | // | |||
51 | //===----------------------------------------------------------------------===// | |||
52 | ||||
53 | #include "llvm/Transforms/Scalar.h" | |||
54 | #include "llvm/ADT/STLExtras.h" | |||
55 | #include "llvm/ADT/SmallPtrSet.h" | |||
56 | #include "llvm/ADT/Statistic.h" | |||
57 | #include "llvm/Analysis/GlobalsModRef.h" | |||
58 | #include "llvm/Analysis/CFG.h" | |||
59 | #include "llvm/Analysis/CaptureTracking.h" | |||
60 | #include "llvm/Analysis/InlineCost.h" | |||
61 | #include "llvm/Analysis/InstructionSimplify.h" | |||
62 | #include "llvm/Analysis/Loads.h" | |||
63 | #include "llvm/Analysis/TargetTransformInfo.h" | |||
64 | #include "llvm/IR/CFG.h" | |||
65 | #include "llvm/IR/CallSite.h" | |||
66 | #include "llvm/IR/Constants.h" | |||
67 | #include "llvm/IR/DataLayout.h" | |||
68 | #include "llvm/IR/DerivedTypes.h" | |||
69 | #include "llvm/IR/DiagnosticInfo.h" | |||
70 | #include "llvm/IR/Function.h" | |||
71 | #include "llvm/IR/Instructions.h" | |||
72 | #include "llvm/IR/IntrinsicInst.h" | |||
73 | #include "llvm/IR/Module.h" | |||
74 | #include "llvm/IR/ValueHandle.h" | |||
75 | #include "llvm/Pass.h" | |||
76 | #include "llvm/Support/Debug.h" | |||
77 | #include "llvm/Support/raw_ostream.h" | |||
78 | #include "llvm/Transforms/Utils/BasicBlockUtils.h" | |||
79 | #include "llvm/Transforms/Utils/Local.h" | |||
80 | using namespace llvm; | |||
81 | ||||
82 | #define DEBUG_TYPE"tailcallelim" "tailcallelim" | |||
83 | ||||
84 | STATISTIC(NumEliminated, "Number of tail calls removed")static llvm::Statistic NumEliminated = { "tailcallelim", "Number of tail calls removed" , 0, 0 }; | |||
85 | STATISTIC(NumRetDuped, "Number of return duplicated")static llvm::Statistic NumRetDuped = { "tailcallelim", "Number of return duplicated" , 0, 0 }; | |||
86 | STATISTIC(NumAccumAdded, "Number of accumulators introduced")static llvm::Statistic NumAccumAdded = { "tailcallelim", "Number of accumulators introduced" , 0, 0 }; | |||
87 | ||||
88 | namespace { | |||
89 | struct TailCallElim : public FunctionPass { | |||
90 | const TargetTransformInfo *TTI; | |||
91 | ||||
92 | static char ID; // Pass identification, replacement for typeid | |||
93 | TailCallElim() : FunctionPass(ID) { | |||
94 | initializeTailCallElimPass(*PassRegistry::getPassRegistry()); | |||
95 | } | |||
96 | ||||
97 | void getAnalysisUsage(AnalysisUsage &AU) const override; | |||
98 | ||||
99 | bool runOnFunction(Function &F) override; | |||
100 | ||||
101 | private: | |||
102 | bool runTRE(Function &F); | |||
103 | bool markTails(Function &F, bool &AllCallsAreTailCalls); | |||
104 | ||||
105 | CallInst *FindTRECandidate(Instruction *I, | |||
106 | bool CannotTailCallElimCallsMarkedTail); | |||
107 | bool EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret, | |||
108 | BasicBlock *&OldEntry, | |||
109 | bool &TailCallsAreMarkedTail, | |||
110 | SmallVectorImpl<PHINode *> &ArgumentPHIs, | |||
111 | bool CannotTailCallElimCallsMarkedTail); | |||
112 | bool FoldReturnAndProcessPred(BasicBlock *BB, | |||
113 | ReturnInst *Ret, BasicBlock *&OldEntry, | |||
114 | bool &TailCallsAreMarkedTail, | |||
115 | SmallVectorImpl<PHINode *> &ArgumentPHIs, | |||
116 | bool CannotTailCallElimCallsMarkedTail); | |||
117 | bool ProcessReturningBlock(ReturnInst *RI, BasicBlock *&OldEntry, | |||
118 | bool &TailCallsAreMarkedTail, | |||
119 | SmallVectorImpl<PHINode *> &ArgumentPHIs, | |||
120 | bool CannotTailCallElimCallsMarkedTail); | |||
121 | bool CanMoveAboveCall(Instruction *I, CallInst *CI); | |||
122 | Value *CanTransformAccumulatorRecursion(Instruction *I, CallInst *CI); | |||
123 | }; | |||
124 | } | |||
125 | ||||
126 | char TailCallElim::ID = 0; | |||
127 | INITIALIZE_PASS_BEGIN(TailCallElim, "tailcallelim",static void* initializeTailCallElimPassOnce(PassRegistry & Registry) { | |||
128 | "Tail Call Elimination", false, false)static void* initializeTailCallElimPassOnce(PassRegistry & Registry) { | |||
129 | INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)initializeTargetTransformInfoWrapperPassPass(Registry); | |||
130 | INITIALIZE_PASS_END(TailCallElim, "tailcallelim",PassInfo *PI = new PassInfo("Tail Call Elimination", "tailcallelim" , & TailCallElim ::ID, PassInfo::NormalCtor_t(callDefaultCtor < TailCallElim >), false, false); Registry.registerPass (*PI, true); return PI; } void llvm::initializeTailCallElimPass (PassRegistry &Registry) { static volatile sys::cas_flag initialized = 0; sys::cas_flag old_val = sys::CompareAndSwap(&initialized , 1, 0); if (old_val == 0) { initializeTailCallElimPassOnce(Registry ); sys::MemoryFence(); ; ; initialized = 2; ; } else { sys::cas_flag tmp = initialized; sys::MemoryFence(); while (tmp != 2) { tmp = initialized; sys::MemoryFence(); } } ; } | |||
131 | "Tail Call Elimination", false, false)PassInfo *PI = new PassInfo("Tail Call Elimination", "tailcallelim" , & TailCallElim ::ID, PassInfo::NormalCtor_t(callDefaultCtor < TailCallElim >), false, false); Registry.registerPass (*PI, true); return PI; } void llvm::initializeTailCallElimPass (PassRegistry &Registry) { static volatile sys::cas_flag initialized = 0; sys::cas_flag old_val = sys::CompareAndSwap(&initialized , 1, 0); if (old_val == 0) { initializeTailCallElimPassOnce(Registry ); sys::MemoryFence(); ; ; initialized = 2; ; } else { sys::cas_flag tmp = initialized; sys::MemoryFence(); while (tmp != 2) { tmp = initialized; sys::MemoryFence(); } } ; } | |||
132 | ||||
133 | // Public interface to the TailCallElimination pass | |||
134 | FunctionPass *llvm::createTailCallEliminationPass() { | |||
135 | return new TailCallElim(); | |||
136 | } | |||
137 | ||||
138 | void TailCallElim::getAnalysisUsage(AnalysisUsage &AU) const { | |||
139 | AU.addRequired<TargetTransformInfoWrapperPass>(); | |||
140 | AU.addPreserved<GlobalsAAWrapperPass>(); | |||
141 | } | |||
142 | ||||
143 | /// \brief Scan the specified function for alloca instructions. | |||
144 | /// If it contains any dynamic allocas, returns false. | |||
145 | static bool CanTRE(Function &F) { | |||
146 | // Because of PR962, we don't TRE dynamic allocas. | |||
147 | for (auto &BB : F) { | |||
148 | for (auto &I : BB) { | |||
149 | if (AllocaInst *AI = dyn_cast<AllocaInst>(&I)) { | |||
150 | if (!AI->isStaticAlloca()) | |||
151 | return false; | |||
152 | } | |||
153 | } | |||
154 | } | |||
155 | ||||
156 | return true; | |||
157 | } | |||
158 | ||||
159 | bool TailCallElim::runOnFunction(Function &F) { | |||
160 | if (skipFunction(F)) | |||
| ||||
161 | return false; | |||
162 | ||||
163 | if (F.getFnAttribute("disable-tail-calls").getValueAsString() == "true") | |||
164 | return false; | |||
165 | ||||
166 | bool AllCallsAreTailCalls = false; | |||
167 | bool Modified = markTails(F, AllCallsAreTailCalls); | |||
168 | if (AllCallsAreTailCalls) | |||
169 | Modified |= runTRE(F); | |||
170 | return Modified; | |||
171 | } | |||
172 | ||||
173 | namespace { | |||
174 | struct AllocaDerivedValueTracker { | |||
175 | // Start at a root value and walk its use-def chain to mark calls that use the | |||
176 | // value or a derived value in AllocaUsers, and places where it may escape in | |||
177 | // EscapePoints. | |||
178 | void walk(Value *Root) { | |||
179 | SmallVector<Use *, 32> Worklist; | |||
180 | SmallPtrSet<Use *, 32> Visited; | |||
181 | ||||
182 | auto AddUsesToWorklist = [&](Value *V) { | |||
183 | for (auto &U : V->uses()) { | |||
184 | if (!Visited.insert(&U).second) | |||
185 | continue; | |||
186 | Worklist.push_back(&U); | |||
187 | } | |||
188 | }; | |||
189 | ||||
190 | AddUsesToWorklist(Root); | |||
191 | ||||
192 | while (!Worklist.empty()) { | |||
193 | Use *U = Worklist.pop_back_val(); | |||
194 | Instruction *I = cast<Instruction>(U->getUser()); | |||
195 | ||||
196 | switch (I->getOpcode()) { | |||
197 | case Instruction::Call: | |||
198 | case Instruction::Invoke: { | |||
199 | CallSite CS(I); | |||
200 | bool IsNocapture = | |||
201 | CS.isDataOperand(U) && CS.doesNotCapture(CS.getDataOperandNo(U)); | |||
202 | callUsesLocalStack(CS, IsNocapture); | |||
203 | if (IsNocapture) { | |||
204 | // If the alloca-derived argument is passed in as nocapture, then it | |||
205 | // can't propagate to the call's return. That would be capturing. | |||
206 | continue; | |||
207 | } | |||
208 | break; | |||
209 | } | |||
210 | case Instruction::Load: { | |||
211 | // The result of a load is not alloca-derived (unless an alloca has | |||
212 | // otherwise escaped, but this is a local analysis). | |||
213 | continue; | |||
214 | } | |||
215 | case Instruction::Store: { | |||
216 | if (U->getOperandNo() == 0) | |||
217 | EscapePoints.insert(I); | |||
218 | continue; // Stores have no users to analyze. | |||
219 | } | |||
220 | case Instruction::BitCast: | |||
221 | case Instruction::GetElementPtr: | |||
222 | case Instruction::PHI: | |||
223 | case Instruction::Select: | |||
224 | case Instruction::AddrSpaceCast: | |||
225 | break; | |||
226 | default: | |||
227 | EscapePoints.insert(I); | |||
228 | break; | |||
229 | } | |||
230 | ||||
231 | AddUsesToWorklist(I); | |||
232 | } | |||
233 | } | |||
234 | ||||
235 | void callUsesLocalStack(CallSite CS, bool IsNocapture) { | |||
236 | // Add it to the list of alloca users. | |||
237 | AllocaUsers.insert(CS.getInstruction()); | |||
238 | ||||
239 | // If it's nocapture then it can't capture this alloca. | |||
240 | if (IsNocapture) | |||
241 | return; | |||
242 | ||||
243 | // If it can write to memory, it can leak the alloca value. | |||
244 | if (!CS.onlyReadsMemory()) | |||
245 | EscapePoints.insert(CS.getInstruction()); | |||
246 | } | |||
247 | ||||
248 | SmallPtrSet<Instruction *, 32> AllocaUsers; | |||
249 | SmallPtrSet<Instruction *, 32> EscapePoints; | |||
250 | }; | |||
251 | } | |||
252 | ||||
253 | bool TailCallElim::markTails(Function &F, bool &AllCallsAreTailCalls) { | |||
254 | if (F.callsFunctionThatReturnsTwice()) | |||
255 | return false; | |||
256 | AllCallsAreTailCalls = true; | |||
257 | ||||
258 | // The local stack holds all alloca instructions and all byval arguments. | |||
259 | AllocaDerivedValueTracker Tracker; | |||
260 | for (Argument &Arg : F.args()) { | |||
261 | if (Arg.hasByValAttr()) | |||
262 | Tracker.walk(&Arg); | |||
263 | } | |||
264 | for (auto &BB : F) { | |||
265 | for (auto &I : BB) | |||
266 | if (AllocaInst *AI = dyn_cast<AllocaInst>(&I)) | |||
267 | Tracker.walk(AI); | |||
268 | } | |||
269 | ||||
270 | bool Modified = false; | |||
271 | ||||
272 | // Track whether a block is reachable after an alloca has escaped. Blocks that | |||
273 | // contain the escaping instruction will be marked as being visited without an | |||
274 | // escaped alloca, since that is how the block began. | |||
275 | enum VisitType { | |||
276 | UNVISITED, | |||
277 | UNESCAPED, | |||
278 | ESCAPED | |||
279 | }; | |||
280 | DenseMap<BasicBlock *, VisitType> Visited; | |||
281 | ||||
282 | // We propagate the fact that an alloca has escaped from block to successor. | |||
283 | // Visit the blocks that are propagating the escapedness first. To do this, we | |||
284 | // maintain two worklists. | |||
285 | SmallVector<BasicBlock *, 32> WorklistUnescaped, WorklistEscaped; | |||
286 | ||||
287 | // We may enter a block and visit it thinking that no alloca has escaped yet, | |||
288 | // then see an escape point and go back around a loop edge and come back to | |||
289 | // the same block twice. Because of this, we defer setting tail on calls when | |||
290 | // we first encounter them in a block. Every entry in this list does not | |||
291 | // statically use an alloca via use-def chain analysis, but may find an alloca | |||
292 | // through other means if the block turns out to be reachable after an escape | |||
293 | // point. | |||
294 | SmallVector<CallInst *, 32> DeferredTails; | |||
295 | ||||
296 | BasicBlock *BB = &F.getEntryBlock(); | |||
297 | VisitType Escaped = UNESCAPED; | |||
298 | do { | |||
299 | for (auto &I : *BB) { | |||
300 | if (Tracker.EscapePoints.count(&I)) | |||
301 | Escaped = ESCAPED; | |||
302 | ||||
303 | CallInst *CI = dyn_cast<CallInst>(&I); | |||
304 | if (!CI || CI->isTailCall()) | |||
305 | continue; | |||
306 | ||||
307 | bool IsNoTail = CI->isNoTailCall(); | |||
308 | ||||
309 | if (!IsNoTail && CI->doesNotAccessMemory()) { | |||
310 | // A call to a readnone function whose arguments are all things computed | |||
311 | // outside this function can be marked tail. Even if you stored the | |||
312 | // alloca address into a global, a readnone function can't load the | |||
313 | // global anyhow. | |||
314 | // | |||
315 | // Note that this runs whether we know an alloca has escaped or not. If | |||
316 | // it has, then we can't trust Tracker.AllocaUsers to be accurate. | |||
317 | bool SafeToTail = true; | |||
318 | for (auto &Arg : CI->arg_operands()) { | |||
319 | if (isa<Constant>(Arg.getUser())) | |||
320 | continue; | |||
321 | if (Argument *A = dyn_cast<Argument>(Arg.getUser())) | |||
322 | if (!A->hasByValAttr()) | |||
323 | continue; | |||
324 | SafeToTail = false; | |||
325 | break; | |||
326 | } | |||
327 | if (SafeToTail) { | |||
328 | emitOptimizationRemark( | |||
329 | F.getContext(), "tailcallelim", F, CI->getDebugLoc(), | |||
330 | "marked this readnone call a tail call candidate"); | |||
331 | CI->setTailCall(); | |||
332 | Modified = true; | |||
333 | continue; | |||
334 | } | |||
335 | } | |||
336 | ||||
337 | if (!IsNoTail && Escaped == UNESCAPED && !Tracker.AllocaUsers.count(CI)) { | |||
338 | DeferredTails.push_back(CI); | |||
339 | } else { | |||
340 | AllCallsAreTailCalls = false; | |||
341 | } | |||
342 | } | |||
343 | ||||
344 | for (auto *SuccBB : make_range(succ_begin(BB), succ_end(BB))) { | |||
345 | auto &State = Visited[SuccBB]; | |||
346 | if (State < Escaped) { | |||
347 | State = Escaped; | |||
348 | if (State == ESCAPED) | |||
349 | WorklistEscaped.push_back(SuccBB); | |||
350 | else | |||
351 | WorklistUnescaped.push_back(SuccBB); | |||
352 | } | |||
353 | } | |||
354 | ||||
355 | if (!WorklistEscaped.empty()) { | |||
356 | BB = WorklistEscaped.pop_back_val(); | |||
357 | Escaped = ESCAPED; | |||
358 | } else { | |||
359 | BB = nullptr; | |||
360 | while (!WorklistUnescaped.empty()) { | |||
361 | auto *NextBB = WorklistUnescaped.pop_back_val(); | |||
362 | if (Visited[NextBB] == UNESCAPED) { | |||
363 | BB = NextBB; | |||
364 | Escaped = UNESCAPED; | |||
365 | break; | |||
366 | } | |||
367 | } | |||
368 | } | |||
369 | } while (BB); | |||
370 | ||||
371 | for (CallInst *CI : DeferredTails) { | |||
372 | if (Visited[CI->getParent()] != ESCAPED) { | |||
373 | // If the escape point was part way through the block, calls after the | |||
374 | // escape point wouldn't have been put into DeferredTails. | |||
375 | emitOptimizationRemark(F.getContext(), "tailcallelim", F, | |||
376 | CI->getDebugLoc(), | |||
377 | "marked this call a tail call candidate"); | |||
378 | CI->setTailCall(); | |||
379 | Modified = true; | |||
380 | } else { | |||
381 | AllCallsAreTailCalls = false; | |||
382 | } | |||
383 | } | |||
384 | ||||
385 | return Modified; | |||
386 | } | |||
387 | ||||
388 | bool TailCallElim::runTRE(Function &F) { | |||
389 | // If this function is a varargs function, we won't be able to PHI the args | |||
390 | // right, so don't even try to convert it... | |||
391 | if (F.getFunctionType()->isVarArg()) return false; | |||
392 | ||||
393 | TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); | |||
394 | BasicBlock *OldEntry = nullptr; | |||
395 | bool TailCallsAreMarkedTail = false; | |||
396 | SmallVector<PHINode*, 8> ArgumentPHIs; | |||
397 | bool MadeChange = false; | |||
398 | ||||
399 | // If false, we cannot perform TRE on tail calls marked with the 'tail' | |||
400 | // attribute, because doing so would cause the stack size to increase (real | |||
401 | // TRE would deallocate variable sized allocas, TRE doesn't). | |||
402 | bool CanTRETailMarkedCall = CanTRE(F); | |||
403 | ||||
404 | // Change any tail recursive calls to loops. | |||
405 | // | |||
406 | // FIXME: The code generator produces really bad code when an 'escaping | |||
407 | // alloca' is changed from being a static alloca to being a dynamic alloca. | |||
408 | // Until this is resolved, disable this transformation if that would ever | |||
409 | // happen. This bug is PR962. | |||
410 | for (Function::iterator BBI = F.begin(), E = F.end(); BBI != E; /*in loop*/) { | |||
411 | BasicBlock *BB = &*BBI++; // FoldReturnAndProcessPred may delete BB. | |||
412 | if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB->getTerminator())) { | |||
413 | bool Change = ProcessReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail, | |||
414 | ArgumentPHIs, !CanTRETailMarkedCall); | |||
415 | if (!Change && BB->getFirstNonPHIOrDbg() == Ret) | |||
416 | Change = FoldReturnAndProcessPred(BB, Ret, OldEntry, | |||
417 | TailCallsAreMarkedTail, ArgumentPHIs, | |||
418 | !CanTRETailMarkedCall); | |||
419 | MadeChange |= Change; | |||
420 | } | |||
421 | } | |||
422 | ||||
423 | // If we eliminated any tail recursions, it's possible that we inserted some | |||
424 | // silly PHI nodes which just merge an initial value (the incoming operand) | |||
425 | // with themselves. Check to see if we did and clean up our mess if so. This | |||
426 | // occurs when a function passes an argument straight through to its tail | |||
427 | // call. | |||
428 | for (PHINode *PN : ArgumentPHIs) { | |||
429 | // If the PHI Node is a dynamic constant, replace it with the value it is. | |||
430 | if (Value *PNV = SimplifyInstruction(PN, F.getParent()->getDataLayout())) { | |||
431 | PN->replaceAllUsesWith(PNV); | |||
432 | PN->eraseFromParent(); | |||
433 | } | |||
434 | } | |||
435 | ||||
436 | return MadeChange; | |||
437 | } | |||
438 | ||||
439 | ||||
440 | /// Return true if it is safe to move the specified | |||
441 | /// instruction from after the call to before the call, assuming that all | |||
442 | /// instructions between the call and this instruction are movable. | |||
443 | /// | |||
444 | bool TailCallElim::CanMoveAboveCall(Instruction *I, CallInst *CI) { | |||
445 | // FIXME: We can move load/store/call/free instructions above the call if the | |||
446 | // call does not mod/ref the memory location being processed. | |||
447 | if (I->mayHaveSideEffects()) // This also handles volatile loads. | |||
448 | return false; | |||
449 | ||||
450 | if (LoadInst *L = dyn_cast<LoadInst>(I)) { | |||
451 | // Loads may always be moved above calls without side effects. | |||
452 | if (CI->mayHaveSideEffects()) { | |||
453 | // Non-volatile loads may be moved above a call with side effects if it | |||
454 | // does not write to memory and the load provably won't trap. | |||
455 | // FIXME: Writes to memory only matter if they may alias the pointer | |||
456 | // being loaded from. | |||
457 | const DataLayout &DL = L->getModule()->getDataLayout(); | |||
458 | if (CI->mayWriteToMemory() || | |||
459 | !isSafeToLoadUnconditionally(L->getPointerOperand(), | |||
460 | L->getAlignment(), DL, L)) | |||
461 | return false; | |||
462 | } | |||
463 | } | |||
464 | ||||
465 | // Otherwise, if this is a side-effect free instruction, check to make sure | |||
466 | // that it does not use the return value of the call. If it doesn't use the | |||
467 | // return value of the call, it must only use things that are defined before | |||
468 | // the call, or movable instructions between the call and the instruction | |||
469 | // itself. | |||
470 | return std::find(I->op_begin(), I->op_end(), CI) == I->op_end(); | |||
471 | } | |||
472 | ||||
473 | /// Return true if the specified value is the same when the return would exit | |||
474 | /// as it was when the initial iteration of the recursive function was executed. | |||
475 | /// | |||
476 | /// We currently handle static constants and arguments that are not modified as | |||
477 | /// part of the recursion. | |||
478 | static bool isDynamicConstant(Value *V, CallInst *CI, ReturnInst *RI) { | |||
479 | if (isa<Constant>(V)) return true; // Static constants are always dyn consts | |||
480 | ||||
481 | // Check to see if this is an immutable argument, if so, the value | |||
482 | // will be available to initialize the accumulator. | |||
483 | if (Argument *Arg = dyn_cast<Argument>(V)) { | |||
484 | // Figure out which argument number this is... | |||
485 | unsigned ArgNo = 0; | |||
486 | Function *F = CI->getParent()->getParent(); | |||
487 | for (Function::arg_iterator AI = F->arg_begin(); &*AI != Arg; ++AI) | |||
488 | ++ArgNo; | |||
489 | ||||
490 | // If we are passing this argument into call as the corresponding | |||
491 | // argument operand, then the argument is dynamically constant. | |||
492 | // Otherwise, we cannot transform this function safely. | |||
493 | if (CI->getArgOperand(ArgNo) == Arg) | |||
494 | return true; | |||
495 | } | |||
496 | ||||
497 | // Switch cases are always constant integers. If the value is being switched | |||
498 | // on and the return is only reachable from one of its cases, it's | |||
499 | // effectively constant. | |||
500 | if (BasicBlock *UniquePred = RI->getParent()->getUniquePredecessor()) | |||
501 | if (SwitchInst *SI = dyn_cast<SwitchInst>(UniquePred->getTerminator())) | |||
502 | if (SI->getCondition() == V) | |||
503 | return SI->getDefaultDest() != RI->getParent(); | |||
504 | ||||
505 | // Not a constant or immutable argument, we can't safely transform. | |||
506 | return false; | |||
507 | } | |||
508 | ||||
509 | /// Check to see if the function containing the specified tail call consistently | |||
510 | /// returns the same runtime-constant value at all exit points except for | |||
511 | /// IgnoreRI. If so, return the returned value. | |||
512 | static Value *getCommonReturnValue(ReturnInst *IgnoreRI, CallInst *CI) { | |||
513 | Function *F = CI->getParent()->getParent(); | |||
514 | Value *ReturnedValue = nullptr; | |||
515 | ||||
516 | for (Function::iterator BBI = F->begin(), E = F->end(); BBI != E; ++BBI) { | |||
517 | ReturnInst *RI = dyn_cast<ReturnInst>(BBI->getTerminator()); | |||
518 | if (RI == nullptr || RI == IgnoreRI) continue; | |||
519 | ||||
520 | // We can only perform this transformation if the value returned is | |||
521 | // evaluatable at the start of the initial invocation of the function, | |||
522 | // instead of at the end of the evaluation. | |||
523 | // | |||
524 | Value *RetOp = RI->getOperand(0); | |||
525 | if (!isDynamicConstant(RetOp, CI, RI)) | |||
526 | return nullptr; | |||
527 | ||||
528 | if (ReturnedValue && RetOp != ReturnedValue) | |||
529 | return nullptr; // Cannot transform if differing values are returned. | |||
530 | ReturnedValue = RetOp; | |||
531 | } | |||
532 | return ReturnedValue; | |||
533 | } | |||
534 | ||||
535 | /// If the specified instruction can be transformed using accumulator recursion | |||
536 | /// elimination, return the constant which is the start of the accumulator | |||
537 | /// value. Otherwise return null. | |||
538 | Value *TailCallElim::CanTransformAccumulatorRecursion(Instruction *I, | |||
539 | CallInst *CI) { | |||
540 | if (!I->isAssociative() || !I->isCommutative()) return nullptr; | |||
541 | assert(I->getNumOperands() == 2 &&((I->getNumOperands() == 2 && "Associative/commutative operations should have 2 args!" ) ? static_cast<void> (0) : __assert_fail ("I->getNumOperands() == 2 && \"Associative/commutative operations should have 2 args!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Transforms/Scalar/TailRecursionElimination.cpp" , 542, __PRETTY_FUNCTION__)) | |||
542 | "Associative/commutative operations should have 2 args!")((I->getNumOperands() == 2 && "Associative/commutative operations should have 2 args!" ) ? static_cast<void> (0) : __assert_fail ("I->getNumOperands() == 2 && \"Associative/commutative operations should have 2 args!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/Transforms/Scalar/TailRecursionElimination.cpp" , 542, __PRETTY_FUNCTION__)); | |||
543 | ||||
544 | // Exactly one operand should be the result of the call instruction. | |||
545 | if ((I->getOperand(0) == CI && I->getOperand(1) == CI) || | |||
546 | (I->getOperand(0) != CI && I->getOperand(1) != CI)) | |||
547 | return nullptr; | |||
548 | ||||
549 | // The only user of this instruction we allow is a single return instruction. | |||
550 | if (!I->hasOneUse() || !isa<ReturnInst>(I->user_back())) | |||
551 | return nullptr; | |||
552 | ||||
553 | // Ok, now we have to check all of the other return instructions in this | |||
554 | // function. If they return non-constants or differing values, then we cannot | |||
555 | // transform the function safely. | |||
556 | return getCommonReturnValue(cast<ReturnInst>(I->user_back()), CI); | |||
557 | } | |||
558 | ||||
559 | static Instruction *FirstNonDbg(BasicBlock::iterator I) { | |||
560 | while (isa<DbgInfoIntrinsic>(I)) | |||
561 | ++I; | |||
562 | return &*I; | |||
563 | } | |||
564 | ||||
565 | CallInst* | |||
566 | TailCallElim::FindTRECandidate(Instruction *TI, | |||
567 | bool CannotTailCallElimCallsMarkedTail) { | |||
568 | BasicBlock *BB = TI->getParent(); | |||
569 | Function *F = BB->getParent(); | |||
570 | ||||
571 | if (&BB->front() == TI) // Make sure there is something before the terminator. | |||
572 | return nullptr; | |||
573 | ||||
574 | // Scan backwards from the return, checking to see if there is a tail call in | |||
575 | // this block. If so, set CI to it. | |||
576 | CallInst *CI = nullptr; | |||
577 | BasicBlock::iterator BBI(TI); | |||
578 | while (true) { | |||
579 | CI = dyn_cast<CallInst>(BBI); | |||
580 | if (CI && CI->getCalledFunction() == F) | |||
581 | break; | |||
582 | ||||
583 | if (BBI == BB->begin()) | |||
584 | return nullptr; // Didn't find a potential tail call. | |||
585 | --BBI; | |||
586 | } | |||
587 | ||||
588 | // If this call is marked as a tail call, and if there are dynamic allocas in | |||
589 | // the function, we cannot perform this optimization. | |||
590 | if (CI->isTailCall() && CannotTailCallElimCallsMarkedTail) | |||
591 | return nullptr; | |||
592 | ||||
593 | // As a special case, detect code like this: | |||
594 | // double fabs(double f) { return __builtin_fabs(f); } // a 'fabs' call | |||
595 | // and disable this xform in this case, because the code generator will | |||
596 | // lower the call to fabs into inline code. | |||
597 | if (BB == &F->getEntryBlock() && | |||
| ||||
598 | FirstNonDbg(BB->front().getIterator()) == CI && | |||
599 | FirstNonDbg(std::next(BB->begin())) == TI && CI->getCalledFunction() && | |||
600 | !TTI->isLoweredToCall(CI->getCalledFunction())) { | |||
601 | // A single-block function with just a call and a return. Check that | |||
602 | // the arguments match. | |||
603 | CallSite::arg_iterator I = CallSite(CI).arg_begin(), | |||
604 | E = CallSite(CI).arg_end(); | |||
605 | Function::arg_iterator FI = F->arg_begin(), | |||
606 | FE = F->arg_end(); | |||
607 | for (; I != E && FI != FE; ++I, ++FI) | |||
608 | if (*I != &*FI) break; | |||
609 | if (I == E && FI == FE) | |||
610 | return nullptr; | |||
611 | } | |||
612 | ||||
613 | return CI; | |||
614 | } | |||
615 | ||||
616 | bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret, | |||
617 | BasicBlock *&OldEntry, | |||
618 | bool &TailCallsAreMarkedTail, | |||
619 | SmallVectorImpl<PHINode *> &ArgumentPHIs, | |||
620 | bool CannotTailCallElimCallsMarkedTail) { | |||
621 | // If we are introducing accumulator recursion to eliminate operations after | |||
622 | // the call instruction that are both associative and commutative, the initial | |||
623 | // value for the accumulator is placed in this variable. If this value is set | |||
624 | // then we actually perform accumulator recursion elimination instead of | |||
625 | // simple tail recursion elimination. If the operation is an LLVM instruction | |||
626 | // (eg: "add") then it is recorded in AccumulatorRecursionInstr. If not, then | |||
627 | // we are handling the case when the return instruction returns a constant C | |||
628 | // which is different to the constant returned by other return instructions | |||
629 | // (which is recorded in AccumulatorRecursionEliminationInitVal). This is a | |||
630 | // special case of accumulator recursion, the operation being "return C". | |||
631 | Value *AccumulatorRecursionEliminationInitVal = nullptr; | |||
632 | Instruction *AccumulatorRecursionInstr = nullptr; | |||
633 | ||||
634 | // Ok, we found a potential tail call. We can currently only transform the | |||
635 | // tail call if all of the instructions between the call and the return are | |||
636 | // movable to above the call itself, leaving the call next to the return. | |||
637 | // Check that this is the case now. | |||
638 | BasicBlock::iterator BBI(CI); | |||
639 | for (++BBI; &*BBI != Ret; ++BBI) { | |||
640 | if (CanMoveAboveCall(&*BBI, CI)) continue; | |||
641 | ||||
642 | // If we can't move the instruction above the call, it might be because it | |||
643 | // is an associative and commutative operation that could be transformed | |||
644 | // using accumulator recursion elimination. Check to see if this is the | |||
645 | // case, and if so, remember the initial accumulator value for later. | |||
646 | if ((AccumulatorRecursionEliminationInitVal = | |||
647 | CanTransformAccumulatorRecursion(&*BBI, CI))) { | |||
648 | // Yes, this is accumulator recursion. Remember which instruction | |||
649 | // accumulates. | |||
650 | AccumulatorRecursionInstr = &*BBI; | |||
651 | } else { | |||
652 | return false; // Otherwise, we cannot eliminate the tail recursion! | |||
653 | } | |||
654 | } | |||
655 | ||||
656 | // We can only transform call/return pairs that either ignore the return value | |||
657 | // of the call and return void, ignore the value of the call and return a | |||
658 | // constant, return the value returned by the tail call, or that are being | |||
659 | // accumulator recursion variable eliminated. | |||
660 | if (Ret->getNumOperands() == 1 && Ret->getReturnValue() != CI && | |||
661 | !isa<UndefValue>(Ret->getReturnValue()) && | |||
662 | AccumulatorRecursionEliminationInitVal == nullptr && | |||
663 | !getCommonReturnValue(nullptr, CI)) { | |||
664 | // One case remains that we are able to handle: the current return | |||
665 | // instruction returns a constant, and all other return instructions | |||
666 | // return a different constant. | |||
667 | if (!isDynamicConstant(Ret->getReturnValue(), CI, Ret)) | |||
668 | return false; // Current return instruction does not return a constant. | |||
669 | // Check that all other return instructions return a common constant. If | |||
670 | // so, record it in AccumulatorRecursionEliminationInitVal. | |||
671 | AccumulatorRecursionEliminationInitVal = getCommonReturnValue(Ret, CI); | |||
672 | if (!AccumulatorRecursionEliminationInitVal) | |||
673 | return false; | |||
674 | } | |||
675 | ||||
676 | BasicBlock *BB = Ret->getParent(); | |||
677 | Function *F = BB->getParent(); | |||
678 | ||||
679 | emitOptimizationRemark(F->getContext(), "tailcallelim", *F, CI->getDebugLoc(), | |||
680 | "transforming tail recursion to loop"); | |||
681 | ||||
682 | // OK! We can transform this tail call. If this is the first one found, | |||
683 | // create the new entry block, allowing us to branch back to the old entry. | |||
684 | if (!OldEntry) { | |||
685 | OldEntry = &F->getEntryBlock(); | |||
686 | BasicBlock *NewEntry = BasicBlock::Create(F->getContext(), "", F, OldEntry); | |||
687 | NewEntry->takeName(OldEntry); | |||
688 | OldEntry->setName("tailrecurse"); | |||
689 | BranchInst::Create(OldEntry, NewEntry); | |||
690 | ||||
691 | // If this tail call is marked 'tail' and if there are any allocas in the | |||
692 | // entry block, move them up to the new entry block. | |||
693 | TailCallsAreMarkedTail = CI->isTailCall(); | |||
694 | if (TailCallsAreMarkedTail) | |||
695 | // Move all fixed sized allocas from OldEntry to NewEntry. | |||
696 | for (BasicBlock::iterator OEBI = OldEntry->begin(), E = OldEntry->end(), | |||
697 | NEBI = NewEntry->begin(); OEBI != E; ) | |||
698 | if (AllocaInst *AI = dyn_cast<AllocaInst>(OEBI++)) | |||
699 | if (isa<ConstantInt>(AI->getArraySize())) | |||
700 | AI->moveBefore(&*NEBI); | |||
701 | ||||
702 | // Now that we have created a new block, which jumps to the entry | |||
703 | // block, insert a PHI node for each argument of the function. | |||
704 | // For now, we initialize each PHI to only have the real arguments | |||
705 | // which are passed in. | |||
706 | Instruction *InsertPos = &OldEntry->front(); | |||
707 | for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); | |||
708 | I != E; ++I) { | |||
709 | PHINode *PN = PHINode::Create(I->getType(), 2, | |||
710 | I->getName() + ".tr", InsertPos); | |||
711 | I->replaceAllUsesWith(PN); // Everyone use the PHI node now! | |||
712 | PN->addIncoming(&*I, NewEntry); | |||
713 | ArgumentPHIs.push_back(PN); | |||
714 | } | |||
715 | } | |||
716 | ||||
717 | // If this function has self recursive calls in the tail position where some | |||
718 | // are marked tail and some are not, only transform one flavor or another. We | |||
719 | // have to choose whether we move allocas in the entry block to the new entry | |||
720 | // block or not, so we can't make a good choice for both. NOTE: We could do | |||
721 | // slightly better here in the case that the function has no entry block | |||
722 | // allocas. | |||
723 | if (TailCallsAreMarkedTail && !CI->isTailCall()) | |||
724 | return false; | |||
725 | ||||
726 | // Ok, now that we know we have a pseudo-entry block WITH all of the | |||
727 | // required PHI nodes, add entries into the PHI node for the actual | |||
728 | // parameters passed into the tail-recursive call. | |||
729 | for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i) | |||
730 | ArgumentPHIs[i]->addIncoming(CI->getArgOperand(i), BB); | |||
731 | ||||
732 | // If we are introducing an accumulator variable to eliminate the recursion, | |||
733 | // do so now. Note that we _know_ that no subsequent tail recursion | |||
734 | // eliminations will happen on this function because of the way the | |||
735 | // accumulator recursion predicate is set up. | |||
736 | // | |||
737 | if (AccumulatorRecursionEliminationInitVal) { | |||
738 | Instruction *AccRecInstr = AccumulatorRecursionInstr; | |||
739 | // Start by inserting a new PHI node for the accumulator. | |||
740 | pred_iterator PB = pred_begin(OldEntry), PE = pred_end(OldEntry); | |||
741 | PHINode *AccPN = PHINode::Create( | |||
742 | AccumulatorRecursionEliminationInitVal->getType(), | |||
743 | std::distance(PB, PE) + 1, "accumulator.tr", &OldEntry->front()); | |||
744 | ||||
745 | // Loop over all of the predecessors of the tail recursion block. For the | |||
746 | // real entry into the function we seed the PHI with the initial value, | |||
747 | // computed earlier. For any other existing branches to this block (due to | |||
748 | // other tail recursions eliminated) the accumulator is not modified. | |||
749 | // Because we haven't added the branch in the current block to OldEntry yet, | |||
750 | // it will not show up as a predecessor. | |||
751 | for (pred_iterator PI = PB; PI != PE; ++PI) { | |||
752 | BasicBlock *P = *PI; | |||
753 | if (P == &F->getEntryBlock()) | |||
754 | AccPN->addIncoming(AccumulatorRecursionEliminationInitVal, P); | |||
755 | else | |||
756 | AccPN->addIncoming(AccPN, P); | |||
757 | } | |||
758 | ||||
759 | if (AccRecInstr) { | |||
760 | // Add an incoming argument for the current block, which is computed by | |||
761 | // our associative and commutative accumulator instruction. | |||
762 | AccPN->addIncoming(AccRecInstr, BB); | |||
763 | ||||
764 | // Next, rewrite the accumulator recursion instruction so that it does not | |||
765 | // use the result of the call anymore, instead, use the PHI node we just | |||
766 | // inserted. | |||
767 | AccRecInstr->setOperand(AccRecInstr->getOperand(0) != CI, AccPN); | |||
768 | } else { | |||
769 | // Add an incoming argument for the current block, which is just the | |||
770 | // constant returned by the current return instruction. | |||
771 | AccPN->addIncoming(Ret->getReturnValue(), BB); | |||
772 | } | |||
773 | ||||
774 | // Finally, rewrite any return instructions in the program to return the PHI | |||
775 | // node instead of the "initval" that they do currently. This loop will | |||
776 | // actually rewrite the return value we are destroying, but that's ok. | |||
777 | for (Function::iterator BBI = F->begin(), E = F->end(); BBI != E; ++BBI) | |||
778 | if (ReturnInst *RI = dyn_cast<ReturnInst>(BBI->getTerminator())) | |||
779 | RI->setOperand(0, AccPN); | |||
780 | ++NumAccumAdded; | |||
781 | } | |||
782 | ||||
783 | // Now that all of the PHI nodes are in place, remove the call and | |||
784 | // ret instructions, replacing them with an unconditional branch. | |||
785 | BranchInst *NewBI = BranchInst::Create(OldEntry, Ret); | |||
786 | NewBI->setDebugLoc(CI->getDebugLoc()); | |||
787 | ||||
788 | BB->getInstList().erase(Ret); // Remove return. | |||
789 | BB->getInstList().erase(CI); // Remove call. | |||
790 | ++NumEliminated; | |||
791 | return true; | |||
792 | } | |||
793 | ||||
794 | bool TailCallElim::FoldReturnAndProcessPred(BasicBlock *BB, | |||
795 | ReturnInst *Ret, BasicBlock *&OldEntry, | |||
796 | bool &TailCallsAreMarkedTail, | |||
797 | SmallVectorImpl<PHINode *> &ArgumentPHIs, | |||
798 | bool CannotTailCallElimCallsMarkedTail) { | |||
799 | bool Change = false; | |||
800 | ||||
801 | // If the return block contains nothing but the return and PHI's, | |||
802 | // there might be an opportunity to duplicate the return in its | |||
803 | // predecessors and perform TRC there. Look for predecessors that end | |||
804 | // in unconditional branch and recursive call(s). | |||
805 | SmallVector<BranchInst*, 8> UncondBranchPreds; | |||
806 | for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { | |||
807 | BasicBlock *Pred = *PI; | |||
808 | TerminatorInst *PTI = Pred->getTerminator(); | |||
809 | if (BranchInst *BI = dyn_cast<BranchInst>(PTI)) | |||
810 | if (BI->isUnconditional()) | |||
811 | UncondBranchPreds.push_back(BI); | |||
812 | } | |||
813 | ||||
814 | while (!UncondBranchPreds.empty()) { | |||
815 | BranchInst *BI = UncondBranchPreds.pop_back_val(); | |||
816 | BasicBlock *Pred = BI->getParent(); | |||
817 | if (CallInst *CI = FindTRECandidate(BI, CannotTailCallElimCallsMarkedTail)){ | |||
818 | DEBUG(dbgs() << "FOLDING: " << *BBdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("tailcallelim")) { dbgs() << "FOLDING: " << *BB << "INTO UNCOND BRANCH PRED: " << *Pred; } } while (0) | |||
819 | << "INTO UNCOND BRANCH PRED: " << *Pred)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("tailcallelim")) { dbgs() << "FOLDING: " << *BB << "INTO UNCOND BRANCH PRED: " << *Pred; } } while (0); | |||
820 | ReturnInst *RI = FoldReturnIntoUncondBranch(Ret, BB, Pred); | |||
821 | ||||
822 | // Cleanup: if all predecessors of BB have been eliminated by | |||
823 | // FoldReturnIntoUncondBranch, delete it. It is important to empty it, | |||
824 | // because the ret instruction in there is still using a value which | |||
825 | // EliminateRecursiveTailCall will attempt to remove. | |||
826 | if (!BB->hasAddressTaken() && pred_begin(BB) == pred_end(BB)) | |||
827 | BB->eraseFromParent(); | |||
828 | ||||
829 | EliminateRecursiveTailCall(CI, RI, OldEntry, TailCallsAreMarkedTail, | |||
830 | ArgumentPHIs, | |||
831 | CannotTailCallElimCallsMarkedTail); | |||
832 | ++NumRetDuped; | |||
833 | Change = true; | |||
834 | } | |||
835 | } | |||
836 | ||||
837 | return Change; | |||
838 | } | |||
839 | ||||
840 | bool | |||
841 | TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry, | |||
842 | bool &TailCallsAreMarkedTail, | |||
843 | SmallVectorImpl<PHINode *> &ArgumentPHIs, | |||
844 | bool CannotTailCallElimCallsMarkedTail) { | |||
845 | CallInst *CI = FindTRECandidate(Ret, CannotTailCallElimCallsMarkedTail); | |||
846 | if (!CI) | |||
847 | return false; | |||
848 | ||||
849 | return EliminateRecursiveTailCall(CI, Ret, OldEntry, TailCallsAreMarkedTail, | |||
850 | ArgumentPHIs, | |||
851 | CannotTailCallElimCallsMarkedTail); | |||
852 | } |