Bug Summary

File:lib/Transforms/Scalar/TailRecursionElimination.cpp
Warning:line 476, column 14
Called C++ object pointer is null

Annotated Source Code

1//===- TailRecursionElimination.cpp - Eliminate Tail Calls ----------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file transforms calls of the current function (self recursion) followed
11// by a return instruction with a branch to the entry of the function, creating
12// a loop. This pass also implements the following extensions to the basic
13// algorithm:
14//
15// 1. Trivial instructions between the call and return do not prevent the
16// transformation from taking place, though currently the analysis cannot
17// support moving any really useful instructions (only dead ones).
18// 2. This pass transforms functions that are prevented from being tail
19// recursive by an associative and commutative expression to use an
20// accumulator variable, thus compiling the typical naive factorial or
21// 'fib' implementation into efficient code.
22// 3. TRE is performed if the function returns void, if the return
23// returns the result returned by the call, or if the function returns a
24// run-time constant on all exits from the function. It is possible, though
25// unlikely, that the return returns something else (like constant 0), and
26// can still be TRE'd. It can be TRE'd if ALL OTHER return instructions in
27// the function return the exact same value.
28// 4. If it can prove that callees do not access their caller stack frame,
29// they are marked as eligible for tail call elimination (by the code
30// generator).
31//
32// There are several improvements that could be made:
33//
34// 1. If the function has any alloca instructions, these instructions will be
35// moved out of the entry block of the function, causing them to be
36// evaluated each time through the tail recursion. Safely keeping allocas
37// in the entry block requires analysis to proves that the tail-called
38// function does not read or write the stack object.
39// 2. Tail recursion is only performed if the call immediately precedes the
40// return instruction. It's possible that there could be a jump between
41// the call and the return.
42// 3. There can be intervening operations between the call and the return that
43// prevent the TRE from occurring. For example, there could be GEP's and
44// stores to memory that will not be read or written by the call. This
45// requires some substantial analysis (such as with DSA) to prove safe to
46// move ahead of the call, but doing so could allow many more TREs to be
47// performed, for example in TreeAdd/TreeAlloc from the treeadd benchmark.
48// 4. The algorithm we use to detect if callees access their caller stack
49// frames is very primitive.
50//
51//===----------------------------------------------------------------------===//
52
53#include "llvm/Transforms/Scalar/TailRecursionElimination.h"
54#include "llvm/Transforms/Scalar.h"
55#include "llvm/ADT/STLExtras.h"
56#include "llvm/ADT/SmallPtrSet.h"
57#include "llvm/ADT/Statistic.h"
58#include "llvm/Analysis/GlobalsModRef.h"
59#include "llvm/Analysis/CFG.h"
60#include "llvm/Analysis/CaptureTracking.h"
61#include "llvm/Analysis/InlineCost.h"
62#include "llvm/Analysis/InstructionSimplify.h"
63#include "llvm/Analysis/Loads.h"
64#include "llvm/Analysis/TargetTransformInfo.h"
65#include "llvm/IR/CFG.h"
66#include "llvm/IR/CallSite.h"
67#include "llvm/IR/Constants.h"
68#include "llvm/IR/DataLayout.h"
69#include "llvm/IR/DerivedTypes.h"
70#include "llvm/IR/DiagnosticInfo.h"
71#include "llvm/IR/Function.h"
72#include "llvm/IR/Instructions.h"
73#include "llvm/IR/IntrinsicInst.h"
74#include "llvm/IR/Module.h"
75#include "llvm/IR/ValueHandle.h"
76#include "llvm/Pass.h"
77#include "llvm/Support/Debug.h"
78#include "llvm/Support/raw_ostream.h"
79#include "llvm/Transforms/Utils/BasicBlockUtils.h"
80#include "llvm/Transforms/Utils/Local.h"
81using namespace llvm;
82
83#define DEBUG_TYPE"tailcallelim" "tailcallelim"
84
85STATISTIC(NumEliminated, "Number of tail calls removed")static llvm::Statistic NumEliminated = {"tailcallelim", "NumEliminated"
, "Number of tail calls removed", {0}, false}
;
86STATISTIC(NumRetDuped, "Number of return duplicated")static llvm::Statistic NumRetDuped = {"tailcallelim", "NumRetDuped"
, "Number of return duplicated", {0}, false}
;
87STATISTIC(NumAccumAdded, "Number of accumulators introduced")static llvm::Statistic NumAccumAdded = {"tailcallelim", "NumAccumAdded"
, "Number of accumulators introduced", {0}, false}
;
88
89/// \brief Scan the specified function for alloca instructions.
90/// If it contains any dynamic allocas, returns false.
91static bool canTRE(Function &F) {
92 // Because of PR962, we don't TRE dynamic allocas.
93 for (auto &BB : F) {
94 for (auto &I : BB) {
95 if (AllocaInst *AI = dyn_cast<AllocaInst>(&I)) {
96 if (!AI->isStaticAlloca())
97 return false;
98 }
99 }
100 }
101
102 return true;
103}
104
105namespace {
106struct AllocaDerivedValueTracker {
107 // Start at a root value and walk its use-def chain to mark calls that use the
108 // value or a derived value in AllocaUsers, and places where it may escape in
109 // EscapePoints.
110 void walk(Value *Root) {
111 SmallVector<Use *, 32> Worklist;
112 SmallPtrSet<Use *, 32> Visited;
113
114 auto AddUsesToWorklist = [&](Value *V) {
115 for (auto &U : V->uses()) {
116 if (!Visited.insert(&U).second)
117 continue;
118 Worklist.push_back(&U);
119 }
120 };
121
122 AddUsesToWorklist(Root);
123
124 while (!Worklist.empty()) {
125 Use *U = Worklist.pop_back_val();
126 Instruction *I = cast<Instruction>(U->getUser());
127
128 switch (I->getOpcode()) {
129 case Instruction::Call:
130 case Instruction::Invoke: {
131 CallSite CS(I);
132 bool IsNocapture =
133 CS.isDataOperand(U) && CS.doesNotCapture(CS.getDataOperandNo(U));
134 callUsesLocalStack(CS, IsNocapture);
135 if (IsNocapture) {
136 // If the alloca-derived argument is passed in as nocapture, then it
137 // can't propagate to the call's return. That would be capturing.
138 continue;
139 }
140 break;
141 }
142 case Instruction::Load: {
143 // The result of a load is not alloca-derived (unless an alloca has
144 // otherwise escaped, but this is a local analysis).
145 continue;
146 }
147 case Instruction::Store: {
148 if (U->getOperandNo() == 0)
149 EscapePoints.insert(I);
150 continue; // Stores have no users to analyze.
151 }
152 case Instruction::BitCast:
153 case Instruction::GetElementPtr:
154 case Instruction::PHI:
155 case Instruction::Select:
156 case Instruction::AddrSpaceCast:
157 break;
158 default:
159 EscapePoints.insert(I);
160 break;
161 }
162
163 AddUsesToWorklist(I);
164 }
165 }
166
167 void callUsesLocalStack(CallSite CS, bool IsNocapture) {
168 // Add it to the list of alloca users.
169 AllocaUsers.insert(CS.getInstruction());
170
171 // If it's nocapture then it can't capture this alloca.
172 if (IsNocapture)
173 return;
174
175 // If it can write to memory, it can leak the alloca value.
176 if (!CS.onlyReadsMemory())
177 EscapePoints.insert(CS.getInstruction());
178 }
179
180 SmallPtrSet<Instruction *, 32> AllocaUsers;
181 SmallPtrSet<Instruction *, 32> EscapePoints;
182};
183}
184
185static bool markTails(Function &F, bool &AllCallsAreTailCalls) {
186 if (F.callsFunctionThatReturnsTwice())
187 return false;
188 AllCallsAreTailCalls = true;
189
190 // The local stack holds all alloca instructions and all byval arguments.
191 AllocaDerivedValueTracker Tracker;
192 for (Argument &Arg : F.args()) {
193 if (Arg.hasByValAttr())
194 Tracker.walk(&Arg);
195 }
196 for (auto &BB : F) {
197 for (auto &I : BB)
198 if (AllocaInst *AI = dyn_cast<AllocaInst>(&I))
199 Tracker.walk(AI);
200 }
201
202 bool Modified = false;
203
204 // Track whether a block is reachable after an alloca has escaped. Blocks that
205 // contain the escaping instruction will be marked as being visited without an
206 // escaped alloca, since that is how the block began.
207 enum VisitType {
208 UNVISITED,
209 UNESCAPED,
210 ESCAPED
211 };
212 DenseMap<BasicBlock *, VisitType> Visited;
213
214 // We propagate the fact that an alloca has escaped from block to successor.
215 // Visit the blocks that are propagating the escapedness first. To do this, we
216 // maintain two worklists.
217 SmallVector<BasicBlock *, 32> WorklistUnescaped, WorklistEscaped;
218
219 // We may enter a block and visit it thinking that no alloca has escaped yet,
220 // then see an escape point and go back around a loop edge and come back to
221 // the same block twice. Because of this, we defer setting tail on calls when
222 // we first encounter them in a block. Every entry in this list does not
223 // statically use an alloca via use-def chain analysis, but may find an alloca
224 // through other means if the block turns out to be reachable after an escape
225 // point.
226 SmallVector<CallInst *, 32> DeferredTails;
227
228 BasicBlock *BB = &F.getEntryBlock();
229 VisitType Escaped = UNESCAPED;
230 do {
231 for (auto &I : *BB) {
232 if (Tracker.EscapePoints.count(&I))
233 Escaped = ESCAPED;
234
235 CallInst *CI = dyn_cast<CallInst>(&I);
236 if (!CI || CI->isTailCall())
237 continue;
238
239 bool IsNoTail = CI->isNoTailCall() || CI->hasOperandBundles();
240
241 if (!IsNoTail && CI->doesNotAccessMemory()) {
242 // A call to a readnone function whose arguments are all things computed
243 // outside this function can be marked tail. Even if you stored the
244 // alloca address into a global, a readnone function can't load the
245 // global anyhow.
246 //
247 // Note that this runs whether we know an alloca has escaped or not. If
248 // it has, then we can't trust Tracker.AllocaUsers to be accurate.
249 bool SafeToTail = true;
250 for (auto &Arg : CI->arg_operands()) {
251 if (isa<Constant>(Arg.getUser()))
252 continue;
253 if (Argument *A = dyn_cast<Argument>(Arg.getUser()))
254 if (!A->hasByValAttr())
255 continue;
256 SafeToTail = false;
257 break;
258 }
259 if (SafeToTail) {
260 emitOptimizationRemark(
261 F.getContext(), "tailcallelim", F, CI->getDebugLoc(),
262 "marked this readnone call a tail call candidate");
263 CI->setTailCall();
264 Modified = true;
265 continue;
266 }
267 }
268
269 if (!IsNoTail && Escaped == UNESCAPED && !Tracker.AllocaUsers.count(CI)) {
270 DeferredTails.push_back(CI);
271 } else {
272 AllCallsAreTailCalls = false;
273 }
274 }
275
276 for (auto *SuccBB : make_range(succ_begin(BB), succ_end(BB))) {
277 auto &State = Visited[SuccBB];
278 if (State < Escaped) {
279 State = Escaped;
280 if (State == ESCAPED)
281 WorklistEscaped.push_back(SuccBB);
282 else
283 WorklistUnescaped.push_back(SuccBB);
284 }
285 }
286
287 if (!WorklistEscaped.empty()) {
288 BB = WorklistEscaped.pop_back_val();
289 Escaped = ESCAPED;
290 } else {
291 BB = nullptr;
292 while (!WorklistUnescaped.empty()) {
293 auto *NextBB = WorklistUnescaped.pop_back_val();
294 if (Visited[NextBB] == UNESCAPED) {
295 BB = NextBB;
296 Escaped = UNESCAPED;
297 break;
298 }
299 }
300 }
301 } while (BB);
302
303 for (CallInst *CI : DeferredTails) {
304 if (Visited[CI->getParent()] != ESCAPED) {
305 // If the escape point was part way through the block, calls after the
306 // escape point wouldn't have been put into DeferredTails.
307 emitOptimizationRemark(F.getContext(), "tailcallelim", F,
308 CI->getDebugLoc(),
309 "marked this call a tail call candidate");
310 CI->setTailCall();
311 Modified = true;
312 } else {
313 AllCallsAreTailCalls = false;
314 }
315 }
316
317 return Modified;
318}
319
320/// Return true if it is safe to move the specified
321/// instruction from after the call to before the call, assuming that all
322/// instructions between the call and this instruction are movable.
323///
324static bool canMoveAboveCall(Instruction *I, CallInst *CI) {
325 // FIXME: We can move load/store/call/free instructions above the call if the
326 // call does not mod/ref the memory location being processed.
327 if (I->mayHaveSideEffects()) // This also handles volatile loads.
328 return false;
329
330 if (LoadInst *L = dyn_cast<LoadInst>(I)) {
331 // Loads may always be moved above calls without side effects.
332 if (CI->mayHaveSideEffects()) {
333 // Non-volatile loads may be moved above a call with side effects if it
334 // does not write to memory and the load provably won't trap.
335 // FIXME: Writes to memory only matter if they may alias the pointer
336 // being loaded from.
337 const DataLayout &DL = L->getModule()->getDataLayout();
338 if (CI->mayWriteToMemory() ||
339 !isSafeToLoadUnconditionally(L->getPointerOperand(),
340 L->getAlignment(), DL, L))
341 return false;
342 }
343 }
344
345 // Otherwise, if this is a side-effect free instruction, check to make sure
346 // that it does not use the return value of the call. If it doesn't use the
347 // return value of the call, it must only use things that are defined before
348 // the call, or movable instructions between the call and the instruction
349 // itself.
350 return !is_contained(I->operands(), CI);
351}
352
353/// Return true if the specified value is the same when the return would exit
354/// as it was when the initial iteration of the recursive function was executed.
355///
356/// We currently handle static constants and arguments that are not modified as
357/// part of the recursion.
358static bool isDynamicConstant(Value *V, CallInst *CI, ReturnInst *RI) {
359 if (isa<Constant>(V)) return true; // Static constants are always dyn consts
360
361 // Check to see if this is an immutable argument, if so, the value
362 // will be available to initialize the accumulator.
363 if (Argument *Arg = dyn_cast<Argument>(V)) {
364 // Figure out which argument number this is...
365 unsigned ArgNo = 0;
366 Function *F = CI->getParent()->getParent();
367 for (Function::arg_iterator AI = F->arg_begin(); &*AI != Arg; ++AI)
368 ++ArgNo;
369
370 // If we are passing this argument into call as the corresponding
371 // argument operand, then the argument is dynamically constant.
372 // Otherwise, we cannot transform this function safely.
373 if (CI->getArgOperand(ArgNo) == Arg)
374 return true;
375 }
376
377 // Switch cases are always constant integers. If the value is being switched
378 // on and the return is only reachable from one of its cases, it's
379 // effectively constant.
380 if (BasicBlock *UniquePred = RI->getParent()->getUniquePredecessor())
381 if (SwitchInst *SI = dyn_cast<SwitchInst>(UniquePred->getTerminator()))
382 if (SI->getCondition() == V)
383 return SI->getDefaultDest() != RI->getParent();
384
385 // Not a constant or immutable argument, we can't safely transform.
386 return false;
387}
388
389/// Check to see if the function containing the specified tail call consistently
390/// returns the same runtime-constant value at all exit points except for
391/// IgnoreRI. If so, return the returned value.
392static Value *getCommonReturnValue(ReturnInst *IgnoreRI, CallInst *CI) {
393 Function *F = CI->getParent()->getParent();
394 Value *ReturnedValue = nullptr;
395
396 for (BasicBlock &BBI : *F) {
397 ReturnInst *RI = dyn_cast<ReturnInst>(BBI.getTerminator());
398 if (RI == nullptr || RI == IgnoreRI) continue;
399
400 // We can only perform this transformation if the value returned is
401 // evaluatable at the start of the initial invocation of the function,
402 // instead of at the end of the evaluation.
403 //
404 Value *RetOp = RI->getOperand(0);
405 if (!isDynamicConstant(RetOp, CI, RI))
406 return nullptr;
407
408 if (ReturnedValue && RetOp != ReturnedValue)
409 return nullptr; // Cannot transform if differing values are returned.
410 ReturnedValue = RetOp;
411 }
412 return ReturnedValue;
413}
414
415/// If the specified instruction can be transformed using accumulator recursion
416/// elimination, return the constant which is the start of the accumulator
417/// value. Otherwise return null.
418static Value *canTransformAccumulatorRecursion(Instruction *I, CallInst *CI) {
419 if (!I->isAssociative() || !I->isCommutative()) return nullptr;
420 assert(I->getNumOperands() == 2 &&((I->getNumOperands() == 2 && "Associative/commutative operations should have 2 args!"
) ? static_cast<void> (0) : __assert_fail ("I->getNumOperands() == 2 && \"Associative/commutative operations should have 2 args!\""
, "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn290870/lib/Transforms/Scalar/TailRecursionElimination.cpp"
, 421, __PRETTY_FUNCTION__))
421 "Associative/commutative operations should have 2 args!")((I->getNumOperands() == 2 && "Associative/commutative operations should have 2 args!"
) ? static_cast<void> (0) : __assert_fail ("I->getNumOperands() == 2 && \"Associative/commutative operations should have 2 args!\""
, "/tmp/buildd/llvm-toolchain-snapshot-4.0~svn290870/lib/Transforms/Scalar/TailRecursionElimination.cpp"
, 421, __PRETTY_FUNCTION__))
;
422
423 // Exactly one operand should be the result of the call instruction.
424 if ((I->getOperand(0) == CI && I->getOperand(1) == CI) ||
425 (I->getOperand(0) != CI && I->getOperand(1) != CI))
426 return nullptr;
427
428 // The only user of this instruction we allow is a single return instruction.
429 if (!I->hasOneUse() || !isa<ReturnInst>(I->user_back()))
430 return nullptr;
431
432 // Ok, now we have to check all of the other return instructions in this
433 // function. If they return non-constants or differing values, then we cannot
434 // transform the function safely.
435 return getCommonReturnValue(cast<ReturnInst>(I->user_back()), CI);
436}
437
438static Instruction *firstNonDbg(BasicBlock::iterator I) {
439 while (isa<DbgInfoIntrinsic>(I))
440 ++I;
441 return &*I;
442}
443
444static CallInst *findTRECandidate(Instruction *TI,
445 bool CannotTailCallElimCallsMarkedTail,
446 const TargetTransformInfo *TTI) {
447 BasicBlock *BB = TI->getParent();
448 Function *F = BB->getParent();
20
'F' initialized here
449
450 if (&BB->front() == TI) // Make sure there is something before the terminator.
21
Assuming the condition is false
22
Taking false branch
451 return nullptr;
452
453 // Scan backwards from the return, checking to see if there is a tail call in
454 // this block. If so, set CI to it.
455 CallInst *CI = nullptr;
456 BasicBlock::iterator BBI(TI);
457 while (true) {
23
Loop condition is true. Entering loop body
458 CI = dyn_cast<CallInst>(BBI);
459 if (CI && CI->getCalledFunction() == F)
24
Assuming 'CI' is non-null
25
Assuming the condition is true
26
Assuming pointer value is null
27
Taking true branch
460 break;
28
Execution continues on line 469
461
462 if (BBI == BB->begin())
463 return nullptr; // Didn't find a potential tail call.
464 --BBI;
465 }
466
467 // If this call is marked as a tail call, and if there are dynamic allocas in
468 // the function, we cannot perform this optimization.
469 if (CI->isTailCall() && CannotTailCallElimCallsMarkedTail)
29
Taking false branch
470 return nullptr;
471
472 // As a special case, detect code like this:
473 // double fabs(double f) { return __builtin_fabs(f); } // a 'fabs' call
474 // and disable this xform in this case, because the code generator will
475 // lower the call to fabs into inline code.
476 if (BB == &F->getEntryBlock() &&
30
Called C++ object pointer is null
477 firstNonDbg(BB->front().getIterator()) == CI &&
478 firstNonDbg(std::next(BB->begin())) == TI && CI->getCalledFunction() &&
479 !TTI->isLoweredToCall(CI->getCalledFunction())) {
480 // A single-block function with just a call and a return. Check that
481 // the arguments match.
482 CallSite::arg_iterator I = CallSite(CI).arg_begin(),
483 E = CallSite(CI).arg_end();
484 Function::arg_iterator FI = F->arg_begin(),
485 FE = F->arg_end();
486 for (; I != E && FI != FE; ++I, ++FI)
487 if (*I != &*FI) break;
488 if (I == E && FI == FE)
489 return nullptr;
490 }
491
492 return CI;
493}
494
495static bool eliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
496 BasicBlock *&OldEntry,
497 bool &TailCallsAreMarkedTail,
498 SmallVectorImpl<PHINode *> &ArgumentPHIs,
499 bool CannotTailCallElimCallsMarkedTail) {
500 // If we are introducing accumulator recursion to eliminate operations after
501 // the call instruction that are both associative and commutative, the initial
502 // value for the accumulator is placed in this variable. If this value is set
503 // then we actually perform accumulator recursion elimination instead of
504 // simple tail recursion elimination. If the operation is an LLVM instruction
505 // (eg: "add") then it is recorded in AccumulatorRecursionInstr. If not, then
506 // we are handling the case when the return instruction returns a constant C
507 // which is different to the constant returned by other return instructions
508 // (which is recorded in AccumulatorRecursionEliminationInitVal). This is a
509 // special case of accumulator recursion, the operation being "return C".
510 Value *AccumulatorRecursionEliminationInitVal = nullptr;
511 Instruction *AccumulatorRecursionInstr = nullptr;
512
513 // Ok, we found a potential tail call. We can currently only transform the
514 // tail call if all of the instructions between the call and the return are
515 // movable to above the call itself, leaving the call next to the return.
516 // Check that this is the case now.
517 BasicBlock::iterator BBI(CI);
518 for (++BBI; &*BBI != Ret; ++BBI) {
519 if (canMoveAboveCall(&*BBI, CI)) continue;
520
521 // If we can't move the instruction above the call, it might be because it
522 // is an associative and commutative operation that could be transformed
523 // using accumulator recursion elimination. Check to see if this is the
524 // case, and if so, remember the initial accumulator value for later.
525 if ((AccumulatorRecursionEliminationInitVal =
526 canTransformAccumulatorRecursion(&*BBI, CI))) {
527 // Yes, this is accumulator recursion. Remember which instruction
528 // accumulates.
529 AccumulatorRecursionInstr = &*BBI;
530 } else {
531 return false; // Otherwise, we cannot eliminate the tail recursion!
532 }
533 }
534
535 // We can only transform call/return pairs that either ignore the return value
536 // of the call and return void, ignore the value of the call and return a
537 // constant, return the value returned by the tail call, or that are being
538 // accumulator recursion variable eliminated.
539 if (Ret->getNumOperands() == 1 && Ret->getReturnValue() != CI &&
540 !isa<UndefValue>(Ret->getReturnValue()) &&
541 AccumulatorRecursionEliminationInitVal == nullptr &&
542 !getCommonReturnValue(nullptr, CI)) {
543 // One case remains that we are able to handle: the current return
544 // instruction returns a constant, and all other return instructions
545 // return a different constant.
546 if (!isDynamicConstant(Ret->getReturnValue(), CI, Ret))
547 return false; // Current return instruction does not return a constant.
548 // Check that all other return instructions return a common constant. If
549 // so, record it in AccumulatorRecursionEliminationInitVal.
550 AccumulatorRecursionEliminationInitVal = getCommonReturnValue(Ret, CI);
551 if (!AccumulatorRecursionEliminationInitVal)
552 return false;
553 }
554
555 BasicBlock *BB = Ret->getParent();
556 Function *F = BB->getParent();
557
558 emitOptimizationRemark(F->getContext(), "tailcallelim", *F, CI->getDebugLoc(),
559 "transforming tail recursion to loop");
560
561 // OK! We can transform this tail call. If this is the first one found,
562 // create the new entry block, allowing us to branch back to the old entry.
563 if (!OldEntry) {
564 OldEntry = &F->getEntryBlock();
565 BasicBlock *NewEntry = BasicBlock::Create(F->getContext(), "", F, OldEntry);
566 NewEntry->takeName(OldEntry);
567 OldEntry->setName("tailrecurse");
568 BranchInst::Create(OldEntry, NewEntry);
569
570 // If this tail call is marked 'tail' and if there are any allocas in the
571 // entry block, move them up to the new entry block.
572 TailCallsAreMarkedTail = CI->isTailCall();
573 if (TailCallsAreMarkedTail)
574 // Move all fixed sized allocas from OldEntry to NewEntry.
575 for (BasicBlock::iterator OEBI = OldEntry->begin(), E = OldEntry->end(),
576 NEBI = NewEntry->begin(); OEBI != E; )
577 if (AllocaInst *AI = dyn_cast<AllocaInst>(OEBI++))
578 if (isa<ConstantInt>(AI->getArraySize()))
579 AI->moveBefore(&*NEBI);
580
581 // Now that we have created a new block, which jumps to the entry
582 // block, insert a PHI node for each argument of the function.
583 // For now, we initialize each PHI to only have the real arguments
584 // which are passed in.
585 Instruction *InsertPos = &OldEntry->front();
586 for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
587 I != E; ++I) {
588 PHINode *PN = PHINode::Create(I->getType(), 2,
589 I->getName() + ".tr", InsertPos);
590 I->replaceAllUsesWith(PN); // Everyone use the PHI node now!
591 PN->addIncoming(&*I, NewEntry);
592 ArgumentPHIs.push_back(PN);
593 }
594 }
595
596 // If this function has self recursive calls in the tail position where some
597 // are marked tail and some are not, only transform one flavor or another. We
598 // have to choose whether we move allocas in the entry block to the new entry
599 // block or not, so we can't make a good choice for both. NOTE: We could do
600 // slightly better here in the case that the function has no entry block
601 // allocas.
602 if (TailCallsAreMarkedTail && !CI->isTailCall())
603 return false;
604
605 // Ok, now that we know we have a pseudo-entry block WITH all of the
606 // required PHI nodes, add entries into the PHI node for the actual
607 // parameters passed into the tail-recursive call.
608 for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i)
609 ArgumentPHIs[i]->addIncoming(CI->getArgOperand(i), BB);
610
611 // If we are introducing an accumulator variable to eliminate the recursion,
612 // do so now. Note that we _know_ that no subsequent tail recursion
613 // eliminations will happen on this function because of the way the
614 // accumulator recursion predicate is set up.
615 //
616 if (AccumulatorRecursionEliminationInitVal) {
617 Instruction *AccRecInstr = AccumulatorRecursionInstr;
618 // Start by inserting a new PHI node for the accumulator.
619 pred_iterator PB = pred_begin(OldEntry), PE = pred_end(OldEntry);
620 PHINode *AccPN = PHINode::Create(
621 AccumulatorRecursionEliminationInitVal->getType(),
622 std::distance(PB, PE) + 1, "accumulator.tr", &OldEntry->front());
623
624 // Loop over all of the predecessors of the tail recursion block. For the
625 // real entry into the function we seed the PHI with the initial value,
626 // computed earlier. For any other existing branches to this block (due to
627 // other tail recursions eliminated) the accumulator is not modified.
628 // Because we haven't added the branch in the current block to OldEntry yet,
629 // it will not show up as a predecessor.
630 for (pred_iterator PI = PB; PI != PE; ++PI) {
631 BasicBlock *P = *PI;
632 if (P == &F->getEntryBlock())
633 AccPN->addIncoming(AccumulatorRecursionEliminationInitVal, P);
634 else
635 AccPN->addIncoming(AccPN, P);
636 }
637
638 if (AccRecInstr) {
639 // Add an incoming argument for the current block, which is computed by
640 // our associative and commutative accumulator instruction.
641 AccPN->addIncoming(AccRecInstr, BB);
642
643 // Next, rewrite the accumulator recursion instruction so that it does not
644 // use the result of the call anymore, instead, use the PHI node we just
645 // inserted.
646 AccRecInstr->setOperand(AccRecInstr->getOperand(0) != CI, AccPN);
647 } else {
648 // Add an incoming argument for the current block, which is just the
649 // constant returned by the current return instruction.
650 AccPN->addIncoming(Ret->getReturnValue(), BB);
651 }
652
653 // Finally, rewrite any return instructions in the program to return the PHI
654 // node instead of the "initval" that they do currently. This loop will
655 // actually rewrite the return value we are destroying, but that's ok.
656 for (BasicBlock &BBI : *F)
657 if (ReturnInst *RI = dyn_cast<ReturnInst>(BBI.getTerminator()))
658 RI->setOperand(0, AccPN);
659 ++NumAccumAdded;
660 }
661
662 // Now that all of the PHI nodes are in place, remove the call and
663 // ret instructions, replacing them with an unconditional branch.
664 BranchInst *NewBI = BranchInst::Create(OldEntry, Ret);
665 NewBI->setDebugLoc(CI->getDebugLoc());
666
667 BB->getInstList().erase(Ret); // Remove return.
668 BB->getInstList().erase(CI); // Remove call.
669 ++NumEliminated;
670 return true;
671}
672
673static bool foldReturnAndProcessPred(BasicBlock *BB, ReturnInst *Ret,
674 BasicBlock *&OldEntry,
675 bool &TailCallsAreMarkedTail,
676 SmallVectorImpl<PHINode *> &ArgumentPHIs,
677 bool CannotTailCallElimCallsMarkedTail,
678 const TargetTransformInfo *TTI) {
679 bool Change = false;
680
681 // If the return block contains nothing but the return and PHI's,
682 // there might be an opportunity to duplicate the return in its
683 // predecessors and perform TRC there. Look for predecessors that end
684 // in unconditional branch and recursive call(s).
685 SmallVector<BranchInst*, 8> UncondBranchPreds;
686 for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
687 BasicBlock *Pred = *PI;
688 TerminatorInst *PTI = Pred->getTerminator();
689 if (BranchInst *BI = dyn_cast<BranchInst>(PTI))
690 if (BI->isUnconditional())
691 UncondBranchPreds.push_back(BI);
692 }
693
694 while (!UncondBranchPreds.empty()) {
695 BranchInst *BI = UncondBranchPreds.pop_back_val();
696 BasicBlock *Pred = BI->getParent();
697 if (CallInst *CI = findTRECandidate(BI, CannotTailCallElimCallsMarkedTail, TTI)){
698 DEBUG(dbgs() << "FOLDING: " << *BBdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("tailcallelim")) { dbgs() << "FOLDING: " << *BB <<
"INTO UNCOND BRANCH PRED: " << *Pred; } } while (false
)
699 << "INTO UNCOND BRANCH PRED: " << *Pred)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("tailcallelim")) { dbgs() << "FOLDING: " << *BB <<
"INTO UNCOND BRANCH PRED: " << *Pred; } } while (false
)
;
700 ReturnInst *RI = FoldReturnIntoUncondBranch(Ret, BB, Pred);
701
702 // Cleanup: if all predecessors of BB have been eliminated by
703 // FoldReturnIntoUncondBranch, delete it. It is important to empty it,
704 // because the ret instruction in there is still using a value which
705 // eliminateRecursiveTailCall will attempt to remove.
706 if (!BB->hasAddressTaken() && pred_begin(BB) == pred_end(BB))
707 BB->eraseFromParent();
708
709 eliminateRecursiveTailCall(CI, RI, OldEntry, TailCallsAreMarkedTail,
710 ArgumentPHIs,
711 CannotTailCallElimCallsMarkedTail);
712 ++NumRetDuped;
713 Change = true;
714 }
715 }
716
717 return Change;
718}
719
720static bool processReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry,
721 bool &TailCallsAreMarkedTail,
722 SmallVectorImpl<PHINode *> &ArgumentPHIs,
723 bool CannotTailCallElimCallsMarkedTail,
724 const TargetTransformInfo *TTI) {
725 CallInst *CI = findTRECandidate(Ret, CannotTailCallElimCallsMarkedTail, TTI);
19
Calling 'findTRECandidate'
726 if (!CI)
727 return false;
728
729 return eliminateRecursiveTailCall(CI, Ret, OldEntry, TailCallsAreMarkedTail,
730 ArgumentPHIs,
731 CannotTailCallElimCallsMarkedTail);
732}
733
734static bool eliminateTailRecursion(Function &F, const TargetTransformInfo *TTI) {
735 if (F.getFnAttribute("disable-tail-calls").getValueAsString() == "true")
4
Assuming the condition is false
5
Taking false branch
736 return false;
737
738 bool MadeChange = false;
739 bool AllCallsAreTailCalls = false;
740 MadeChange |= markTails(F, AllCallsAreTailCalls);
741 if (!AllCallsAreTailCalls)
6
Assuming 'AllCallsAreTailCalls' is not equal to 0
7
Taking false branch
742 return MadeChange;
743
744 // If this function is a varargs function, we won't be able to PHI the args
745 // right, so don't even try to convert it...
746 if (F.getFunctionType()->isVarArg())
8
Taking false branch
747 return false;
748
749 BasicBlock *OldEntry = nullptr;
750 bool TailCallsAreMarkedTail = false;
751 SmallVector<PHINode*, 8> ArgumentPHIs;
752
753 // If false, we cannot perform TRE on tail calls marked with the 'tail'
754 // attribute, because doing so would cause the stack size to increase (real
755 // TRE would deallocate variable sized allocas, TRE doesn't).
756 bool CanTRETailMarkedCall = canTRE(F);
757
758 // Change any tail recursive calls to loops.
759 //
760 // FIXME: The code generator produces really bad code when an 'escaping
761 // alloca' is changed from being a static alloca to being a dynamic alloca.
762 // Until this is resolved, disable this transformation if that would ever
763 // happen. This bug is PR962.
764 for (Function::iterator BBI = F.begin(), E = F.end(); BBI != E; /*in loop*/) {
9
Loop condition is true. Entering loop body
11
Loop condition is true. Entering loop body
13
Loop condition is true. Entering loop body
15
Loop condition is true. Entering loop body
765 BasicBlock *BB = &*BBI++; // foldReturnAndProcessPred may delete BB.
766 if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB->getTerminator())) {
10
Taking false branch
12
Taking false branch
14
Taking false branch
16
Assuming 'Ret' is non-null
17
Taking true branch
767 bool Change =
768 processReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail,
18
Calling 'processReturningBlock'
769 ArgumentPHIs, !CanTRETailMarkedCall, TTI);
770 if (!Change && BB->getFirstNonPHIOrDbg() == Ret)
771 Change =
772 foldReturnAndProcessPred(BB, Ret, OldEntry, TailCallsAreMarkedTail,
773 ArgumentPHIs, !CanTRETailMarkedCall, TTI);
774 MadeChange |= Change;
775 }
776 }
777
778 // If we eliminated any tail recursions, it's possible that we inserted some
779 // silly PHI nodes which just merge an initial value (the incoming operand)
780 // with themselves. Check to see if we did and clean up our mess if so. This
781 // occurs when a function passes an argument straight through to its tail
782 // call.
783 for (PHINode *PN : ArgumentPHIs) {
784 // If the PHI Node is a dynamic constant, replace it with the value it is.
785 if (Value *PNV = SimplifyInstruction(PN, F.getParent()->getDataLayout())) {
786 PN->replaceAllUsesWith(PNV);
787 PN->eraseFromParent();
788 }
789 }
790
791 return MadeChange;
792}
793
794namespace {
795struct TailCallElim : public FunctionPass {
796 static char ID; // Pass identification, replacement for typeid
797 TailCallElim() : FunctionPass(ID) {
798 initializeTailCallElimPass(*PassRegistry::getPassRegistry());
799 }
800
801 void getAnalysisUsage(AnalysisUsage &AU) const override {
802 AU.addRequired<TargetTransformInfoWrapperPass>();
803 AU.addPreserved<GlobalsAAWrapperPass>();
804 }
805
806 bool runOnFunction(Function &F) override {
807 if (skipFunction(F))
1
Assuming the condition is false
2
Taking false branch
808 return false;
809
810 return eliminateTailRecursion(
3
Calling 'eliminateTailRecursion'
811 F, &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F));
812 }
813};
814}
815
816char TailCallElim::ID = 0;
817INITIALIZE_PASS_BEGIN(TailCallElim, "tailcallelim", "Tail Call Elimination",static void *initializeTailCallElimPassOnce(PassRegistry &
Registry) {
818 false, false)static void *initializeTailCallElimPassOnce(PassRegistry &
Registry) {
819INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)initializeTargetTransformInfoWrapperPassPass(Registry);
820INITIALIZE_PASS_END(TailCallElim, "tailcallelim", "Tail Call Elimination",PassInfo *PI = new PassInfo( "Tail Call Elimination", "tailcallelim"
, &TailCallElim::ID, PassInfo::NormalCtor_t(callDefaultCtor
<TailCallElim>), false, false); Registry.registerPass(*
PI, true); return PI; } static once_flag InitializeTailCallElimPassFlag
; void llvm::initializeTailCallElimPass(PassRegistry &Registry
) { llvm::call_once(InitializeTailCallElimPassFlag, initializeTailCallElimPassOnce
, std::ref(Registry)); }
821 false, false)PassInfo *PI = new PassInfo( "Tail Call Elimination", "tailcallelim"
, &TailCallElim::ID, PassInfo::NormalCtor_t(callDefaultCtor
<TailCallElim>), false, false); Registry.registerPass(*
PI, true); return PI; } static once_flag InitializeTailCallElimPassFlag
; void llvm::initializeTailCallElimPass(PassRegistry &Registry
) { llvm::call_once(InitializeTailCallElimPassFlag, initializeTailCallElimPassOnce
, std::ref(Registry)); }
822
823// Public interface to the TailCallElimination pass
824FunctionPass *llvm::createTailCallEliminationPass() {
825 return new TailCallElim();
826}
827
828PreservedAnalyses TailCallElimPass::run(Function &F,
829 FunctionAnalysisManager &AM) {
830
831 TargetTransformInfo &TTI = AM.getResult<TargetIRAnalysis>(F);
832
833 bool Changed = eliminateTailRecursion(F, &TTI);
834
835 if (!Changed)
836 return PreservedAnalyses::all();
837 PreservedAnalyses PA;
838 PA.preserve<GlobalsAA>();
839 return PA;
840}