LLVM  10.0.0svn
InlineFunction.cpp
Go to the documentation of this file.
1 //===- InlineFunction.cpp - Code to perform function inlining -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements inlining of a function into a call site, resolving
10 // parameters and the return value as appropriate.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/ADT/DenseMap.h"
15 #include "llvm/ADT/None.h"
16 #include "llvm/ADT/Optional.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/ADT/SetVector.h"
19 #include "llvm/ADT/SmallPtrSet.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/StringExtras.h"
34 #include "llvm/IR/Argument.h"
35 #include "llvm/IR/BasicBlock.h"
36 #include "llvm/IR/CFG.h"
37 #include "llvm/IR/CallSite.h"
38 #include "llvm/IR/Constant.h"
39 #include "llvm/IR/Constants.h"
40 #include "llvm/IR/DIBuilder.h"
41 #include "llvm/IR/DataLayout.h"
43 #include "llvm/IR/DebugLoc.h"
44 #include "llvm/IR/DerivedTypes.h"
45 #include "llvm/IR/Dominators.h"
46 #include "llvm/IR/Function.h"
47 #include "llvm/IR/IRBuilder.h"
48 #include "llvm/IR/InstrTypes.h"
49 #include "llvm/IR/Instruction.h"
50 #include "llvm/IR/Instructions.h"
51 #include "llvm/IR/IntrinsicInst.h"
52 #include "llvm/IR/Intrinsics.h"
53 #include "llvm/IR/LLVMContext.h"
54 #include "llvm/IR/MDBuilder.h"
55 #include "llvm/IR/Metadata.h"
56 #include "llvm/IR/Module.h"
57 #include "llvm/IR/Type.h"
58 #include "llvm/IR/User.h"
59 #include "llvm/IR/Value.h"
60 #include "llvm/Support/Casting.h"
65 #include <algorithm>
66 #include <cassert>
67 #include <cstdint>
68 #include <iterator>
69 #include <limits>
70 #include <string>
71 #include <utility>
72 #include <vector>
73 
74 using namespace llvm;
76 
77 static cl::opt<bool>
78 EnableNoAliasConversion("enable-noalias-to-md-conversion", cl::init(true),
79  cl::Hidden,
80  cl::desc("Convert noalias attributes to metadata during inlining."));
81 
82 static cl::opt<bool>
83 PreserveAlignmentAssumptions("preserve-alignment-assumptions-during-inlining",
84  cl::init(true), cl::Hidden,
85  cl::desc("Convert align attributes to assumptions during inlining."));
86 
88  AAResults *CalleeAAR,
89  bool InsertLifetime) {
90  return InlineFunction(CallSite(CB), IFI, CalleeAAR, InsertLifetime);
91 }
92 
93 namespace {
94 
95  /// A class for recording information about inlining a landing pad.
96  class LandingPadInliningInfo {
97  /// Destination of the invoke's unwind.
98  BasicBlock *OuterResumeDest;
99 
100  /// Destination for the callee's resume.
101  BasicBlock *InnerResumeDest = nullptr;
102 
103  /// LandingPadInst associated with the invoke.
104  LandingPadInst *CallerLPad = nullptr;
105 
106  /// PHI for EH values from landingpad insts.
107  PHINode *InnerEHValuesPHI = nullptr;
108 
109  SmallVector<Value*, 8> UnwindDestPHIValues;
110 
111  public:
112  LandingPadInliningInfo(InvokeInst *II)
113  : OuterResumeDest(II->getUnwindDest()) {
114  // If there are PHI nodes in the unwind destination block, we need to keep
115  // track of which values came into them from the invoke before removing
116  // the edge from this block.
117  BasicBlock *InvokeBB = II->getParent();
118  BasicBlock::iterator I = OuterResumeDest->begin();
119  for (; isa<PHINode>(I); ++I) {
120  // Save the value to use for this edge.
121  PHINode *PHI = cast<PHINode>(I);
122  UnwindDestPHIValues.push_back(PHI->getIncomingValueForBlock(InvokeBB));
123  }
124 
125  CallerLPad = cast<LandingPadInst>(I);
126  }
127 
128  /// The outer unwind destination is the target of
129  /// unwind edges introduced for calls within the inlined function.
130  BasicBlock *getOuterResumeDest() const {
131  return OuterResumeDest;
132  }
133 
134  BasicBlock *getInnerResumeDest();
135 
136  LandingPadInst *getLandingPadInst() const { return CallerLPad; }
137 
138  /// Forward the 'resume' instruction to the caller's landing pad block.
139  /// When the landing pad block has only one predecessor, this is
140  /// a simple branch. When there is more than one predecessor, we need to
141  /// split the landing pad block after the landingpad instruction and jump
142  /// to there.
143  void forwardResume(ResumeInst *RI,
144  SmallPtrSetImpl<LandingPadInst*> &InlinedLPads);
145 
146  /// Add incoming-PHI values to the unwind destination block for the given
147  /// basic block, using the values for the original invoke's source block.
148  void addIncomingPHIValuesFor(BasicBlock *BB) const {
149  addIncomingPHIValuesForInto(BB, OuterResumeDest);
150  }
151 
152  void addIncomingPHIValuesForInto(BasicBlock *src, BasicBlock *dest) const {
153  BasicBlock::iterator I = dest->begin();
154  for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) {
155  PHINode *phi = cast<PHINode>(I);
156  phi->addIncoming(UnwindDestPHIValues[i], src);
157  }
158  }
159  };
160 
161 } // end anonymous namespace
162 
163 /// Get or create a target for the branch from ResumeInsts.
164 BasicBlock *LandingPadInliningInfo::getInnerResumeDest() {
165  if (InnerResumeDest) return InnerResumeDest;
166 
167  // Split the landing pad.
168  BasicBlock::iterator SplitPoint = ++CallerLPad->getIterator();
169  InnerResumeDest =
170  OuterResumeDest->splitBasicBlock(SplitPoint,
171  OuterResumeDest->getName() + ".body");
172 
173  // The number of incoming edges we expect to the inner landing pad.
174  const unsigned PHICapacity = 2;
175 
176  // Create corresponding new PHIs for all the PHIs in the outer landing pad.
177  Instruction *InsertPoint = &InnerResumeDest->front();
178  BasicBlock::iterator I = OuterResumeDest->begin();
179  for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) {
180  PHINode *OuterPHI = cast<PHINode>(I);
181  PHINode *InnerPHI = PHINode::Create(OuterPHI->getType(), PHICapacity,
182  OuterPHI->getName() + ".lpad-body",
183  InsertPoint);
184  OuterPHI->replaceAllUsesWith(InnerPHI);
185  InnerPHI->addIncoming(OuterPHI, OuterResumeDest);
186  }
187 
188  // Create a PHI for the exception values.
189  InnerEHValuesPHI = PHINode::Create(CallerLPad->getType(), PHICapacity,
190  "eh.lpad-body", InsertPoint);
191  CallerLPad->replaceAllUsesWith(InnerEHValuesPHI);
192  InnerEHValuesPHI->addIncoming(CallerLPad, OuterResumeDest);
193 
194  // All done.
195  return InnerResumeDest;
196 }
197 
198 /// Forward the 'resume' instruction to the caller's landing pad block.
199 /// When the landing pad block has only one predecessor, this is a simple
200 /// branch. When there is more than one predecessor, we need to split the
201 /// landing pad block after the landingpad instruction and jump to there.
202 void LandingPadInliningInfo::forwardResume(
203  ResumeInst *RI, SmallPtrSetImpl<LandingPadInst *> &InlinedLPads) {
204  BasicBlock *Dest = getInnerResumeDest();
205  BasicBlock *Src = RI->getParent();
206 
207  BranchInst::Create(Dest, Src);
208 
209  // Update the PHIs in the destination. They were inserted in an order which
210  // makes this work.
211  addIncomingPHIValuesForInto(Src, Dest);
212 
213  InnerEHValuesPHI->addIncoming(RI->getOperand(0), Src);
214  RI->eraseFromParent();
215 }
216 
217 /// Helper for getUnwindDestToken/getUnwindDestTokenHelper.
218 static Value *getParentPad(Value *EHPad) {
219  if (auto *FPI = dyn_cast<FuncletPadInst>(EHPad))
220  return FPI->getParentPad();
221  return cast<CatchSwitchInst>(EHPad)->getParentPad();
222 }
223 
225 
226 /// Helper for getUnwindDestToken that does the descendant-ward part of
227 /// the search.
229  UnwindDestMemoTy &MemoMap) {
230  SmallVector<Instruction *, 8> Worklist(1, EHPad);
231 
232  while (!Worklist.empty()) {
233  Instruction *CurrentPad = Worklist.pop_back_val();
234  // We only put pads on the worklist that aren't in the MemoMap. When
235  // we find an unwind dest for a pad we may update its ancestors, but
236  // the queue only ever contains uncles/great-uncles/etc. of CurrentPad,
237  // so they should never get updated while queued on the worklist.
238  assert(!MemoMap.count(CurrentPad));
239  Value *UnwindDestToken = nullptr;
240  if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(CurrentPad)) {
241  if (CatchSwitch->hasUnwindDest()) {
242  UnwindDestToken = CatchSwitch->getUnwindDest()->getFirstNonPHI();
243  } else {
244  // Catchswitch doesn't have a 'nounwind' variant, and one might be
245  // annotated as "unwinds to caller" when really it's nounwind (see
246  // e.g. SimplifyCFGOpt::SimplifyUnreachable), so we can't infer the
247  // parent's unwind dest from this. We can check its catchpads'
248  // descendants, since they might include a cleanuppad with an
249  // "unwinds to caller" cleanupret, which can be trusted.
250  for (auto HI = CatchSwitch->handler_begin(),
251  HE = CatchSwitch->handler_end();
252  HI != HE && !UnwindDestToken; ++HI) {
253  BasicBlock *HandlerBlock = *HI;
254  auto *CatchPad = cast<CatchPadInst>(HandlerBlock->getFirstNonPHI());
255  for (User *Child : CatchPad->users()) {
256  // Intentionally ignore invokes here -- since the catchswitch is
257  // marked "unwind to caller", it would be a verifier error if it
258  // contained an invoke which unwinds out of it, so any invoke we'd
259  // encounter must unwind to some child of the catch.
260  if (!isa<CleanupPadInst>(Child) && !isa<CatchSwitchInst>(Child))
261  continue;
262 
263  Instruction *ChildPad = cast<Instruction>(Child);
264  auto Memo = MemoMap.find(ChildPad);
265  if (Memo == MemoMap.end()) {
266  // Haven't figured out this child pad yet; queue it.
267  Worklist.push_back(ChildPad);
268  continue;
269  }
270  // We've already checked this child, but might have found that
271  // it offers no proof either way.
272  Value *ChildUnwindDestToken = Memo->second;
273  if (!ChildUnwindDestToken)
274  continue;
275  // We already know the child's unwind dest, which can either
276  // be ConstantTokenNone to indicate unwind to caller, or can
277  // be another child of the catchpad. Only the former indicates
278  // the unwind dest of the catchswitch.
279  if (isa<ConstantTokenNone>(ChildUnwindDestToken)) {
280  UnwindDestToken = ChildUnwindDestToken;
281  break;
282  }
283  assert(getParentPad(ChildUnwindDestToken) == CatchPad);
284  }
285  }
286  }
287  } else {
288  auto *CleanupPad = cast<CleanupPadInst>(CurrentPad);
289  for (User *U : CleanupPad->users()) {
290  if (auto *CleanupRet = dyn_cast<CleanupReturnInst>(U)) {
291  if (BasicBlock *RetUnwindDest = CleanupRet->getUnwindDest())
292  UnwindDestToken = RetUnwindDest->getFirstNonPHI();
293  else
294  UnwindDestToken = ConstantTokenNone::get(CleanupPad->getContext());
295  break;
296  }
297  Value *ChildUnwindDestToken;
298  if (auto *Invoke = dyn_cast<InvokeInst>(U)) {
299  ChildUnwindDestToken = Invoke->getUnwindDest()->getFirstNonPHI();
300  } else if (isa<CleanupPadInst>(U) || isa<CatchSwitchInst>(U)) {
301  Instruction *ChildPad = cast<Instruction>(U);
302  auto Memo = MemoMap.find(ChildPad);
303  if (Memo == MemoMap.end()) {
304  // Haven't resolved this child yet; queue it and keep searching.
305  Worklist.push_back(ChildPad);
306  continue;
307  }
308  // We've checked this child, but still need to ignore it if it
309  // had no proof either way.
310  ChildUnwindDestToken = Memo->second;
311  if (!ChildUnwindDestToken)
312  continue;
313  } else {
314  // Not a relevant user of the cleanuppad
315  continue;
316  }
317  // In a well-formed program, the child/invoke must either unwind to
318  // an(other) child of the cleanup, or exit the cleanup. In the
319  // first case, continue searching.
320  if (isa<Instruction>(ChildUnwindDestToken) &&
321  getParentPad(ChildUnwindDestToken) == CleanupPad)
322  continue;
323  UnwindDestToken = ChildUnwindDestToken;
324  break;
325  }
326  }
327  // If we haven't found an unwind dest for CurrentPad, we may have queued its
328  // children, so move on to the next in the worklist.
329  if (!UnwindDestToken)
330  continue;
331 
332  // Now we know that CurrentPad unwinds to UnwindDestToken. It also exits
333  // any ancestors of CurrentPad up to but not including UnwindDestToken's
334  // parent pad. Record this in the memo map, and check to see if the
335  // original EHPad being queried is one of the ones exited.
336  Value *UnwindParent;
337  if (auto *UnwindPad = dyn_cast<Instruction>(UnwindDestToken))
338  UnwindParent = getParentPad(UnwindPad);
339  else
340  UnwindParent = nullptr;
341  bool ExitedOriginalPad = false;
342  for (Instruction *ExitedPad = CurrentPad;
343  ExitedPad && ExitedPad != UnwindParent;
344  ExitedPad = dyn_cast<Instruction>(getParentPad(ExitedPad))) {
345  // Skip over catchpads since they just follow their catchswitches.
346  if (isa<CatchPadInst>(ExitedPad))
347  continue;
348  MemoMap[ExitedPad] = UnwindDestToken;
349  ExitedOriginalPad |= (ExitedPad == EHPad);
350  }
351 
352  if (ExitedOriginalPad)
353  return UnwindDestToken;
354 
355  // Continue the search.
356  }
357 
358  // No definitive information is contained within this funclet.
359  return nullptr;
360 }
361 
362 /// Given an EH pad, find where it unwinds. If it unwinds to an EH pad,
363 /// return that pad instruction. If it unwinds to caller, return
364 /// ConstantTokenNone. If it does not have a definitive unwind destination,
365 /// return nullptr.
366 ///
367 /// This routine gets invoked for calls in funclets in inlinees when inlining
368 /// an invoke. Since many funclets don't have calls inside them, it's queried
369 /// on-demand rather than building a map of pads to unwind dests up front.
370 /// Determining a funclet's unwind dest may require recursively searching its
371 /// descendants, and also ancestors and cousins if the descendants don't provide
372 /// an answer. Since most funclets will have their unwind dest immediately
373 /// available as the unwind dest of a catchswitch or cleanupret, this routine
374 /// searches top-down from the given pad and then up. To avoid worst-case
375 /// quadratic run-time given that approach, it uses a memo map to avoid
376 /// re-processing funclet trees. The callers that rewrite the IR as they go
377 /// take advantage of this, for correctness, by checking/forcing rewritten
378 /// pads' entries to match the original callee view.
380  UnwindDestMemoTy &MemoMap) {
381  // Catchpads unwind to the same place as their catchswitch;
382  // redirct any queries on catchpads so the code below can
383  // deal with just catchswitches and cleanuppads.
384  if (auto *CPI = dyn_cast<CatchPadInst>(EHPad))
385  EHPad = CPI->getCatchSwitch();
386 
387  // Check if we've already determined the unwind dest for this pad.
388  auto Memo = MemoMap.find(EHPad);
389  if (Memo != MemoMap.end())
390  return Memo->second;
391 
392  // Search EHPad and, if necessary, its descendants.
393  Value *UnwindDestToken = getUnwindDestTokenHelper(EHPad, MemoMap);
394  assert((UnwindDestToken == nullptr) != (MemoMap.count(EHPad) != 0));
395  if (UnwindDestToken)
396  return UnwindDestToken;
397 
398  // No information is available for this EHPad from itself or any of its
399  // descendants. An unwind all the way out to a pad in the caller would
400  // need also to agree with the unwind dest of the parent funclet, so
401  // search up the chain to try to find a funclet with information. Put
402  // null entries in the memo map to avoid re-processing as we go up.
403  MemoMap[EHPad] = nullptr;
404 #ifndef NDEBUG
406  TempMemos.insert(EHPad);
407 #endif
408  Instruction *LastUselessPad = EHPad;
409  Value *AncestorToken;
410  for (AncestorToken = getParentPad(EHPad);
411  auto *AncestorPad = dyn_cast<Instruction>(AncestorToken);
412  AncestorToken = getParentPad(AncestorToken)) {
413  // Skip over catchpads since they just follow their catchswitches.
414  if (isa<CatchPadInst>(AncestorPad))
415  continue;
416  // If the MemoMap had an entry mapping AncestorPad to nullptr, since we
417  // haven't yet called getUnwindDestTokenHelper for AncestorPad in this
418  // call to getUnwindDestToken, that would mean that AncestorPad had no
419  // information in itself, its descendants, or its ancestors. If that
420  // were the case, then we should also have recorded the lack of information
421  // for the descendant that we're coming from. So assert that we don't
422  // find a null entry in the MemoMap for AncestorPad.
423  assert(!MemoMap.count(AncestorPad) || MemoMap[AncestorPad]);
424  auto AncestorMemo = MemoMap.find(AncestorPad);
425  if (AncestorMemo == MemoMap.end()) {
426  UnwindDestToken = getUnwindDestTokenHelper(AncestorPad, MemoMap);
427  } else {
428  UnwindDestToken = AncestorMemo->second;
429  }
430  if (UnwindDestToken)
431  break;
432  LastUselessPad = AncestorPad;
433  MemoMap[LastUselessPad] = nullptr;
434 #ifndef NDEBUG
435  TempMemos.insert(LastUselessPad);
436 #endif
437  }
438 
439  // We know that getUnwindDestTokenHelper was called on LastUselessPad and
440  // returned nullptr (and likewise for EHPad and any of its ancestors up to
441  // LastUselessPad), so LastUselessPad has no information from below. Since
442  // getUnwindDestTokenHelper must investigate all downward paths through
443  // no-information nodes to prove that a node has no information like this,
444  // and since any time it finds information it records it in the MemoMap for
445  // not just the immediately-containing funclet but also any ancestors also
446  // exited, it must be the case that, walking downward from LastUselessPad,
447  // visiting just those nodes which have not been mapped to an unwind dest
448  // by getUnwindDestTokenHelper (the nullptr TempMemos notwithstanding, since
449  // they are just used to keep getUnwindDestTokenHelper from repeating work),
450  // any node visited must have been exhaustively searched with no information
451  // for it found.
452  SmallVector<Instruction *, 8> Worklist(1, LastUselessPad);
453  while (!Worklist.empty()) {
454  Instruction *UselessPad = Worklist.pop_back_val();
455  auto Memo = MemoMap.find(UselessPad);
456  if (Memo != MemoMap.end() && Memo->second) {
457  // Here the name 'UselessPad' is a bit of a misnomer, because we've found
458  // that it is a funclet that does have information about unwinding to
459  // a particular destination; its parent was a useless pad.
460  // Since its parent has no information, the unwind edge must not escape
461  // the parent, and must target a sibling of this pad. This local unwind
462  // gives us no information about EHPad. Leave it and the subtree rooted
463  // at it alone.
464  assert(getParentPad(Memo->second) == getParentPad(UselessPad));
465  continue;
466  }
467  // We know we don't have information for UselesPad. If it has an entry in
468  // the MemoMap (mapping it to nullptr), it must be one of the TempMemos
469  // added on this invocation of getUnwindDestToken; if a previous invocation
470  // recorded nullptr, it would have had to prove that the ancestors of
471  // UselessPad, which include LastUselessPad, had no information, and that
472  // in turn would have required proving that the descendants of
473  // LastUselesPad, which include EHPad, have no information about
474  // LastUselessPad, which would imply that EHPad was mapped to nullptr in
475  // the MemoMap on that invocation, which isn't the case if we got here.
476  assert(!MemoMap.count(UselessPad) || TempMemos.count(UselessPad));
477  // Assert as we enumerate users that 'UselessPad' doesn't have any unwind
478  // information that we'd be contradicting by making a map entry for it
479  // (which is something that getUnwindDestTokenHelper must have proved for
480  // us to get here). Just assert on is direct users here; the checks in
481  // this downward walk at its descendants will verify that they don't have
482  // any unwind edges that exit 'UselessPad' either (i.e. they either have no
483  // unwind edges or unwind to a sibling).
484  MemoMap[UselessPad] = UnwindDestToken;
485  if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(UselessPad)) {
486  assert(CatchSwitch->getUnwindDest() == nullptr && "Expected useless pad");
487  for (BasicBlock *HandlerBlock : CatchSwitch->handlers()) {
488  auto *CatchPad = HandlerBlock->getFirstNonPHI();
489  for (User *U : CatchPad->users()) {
490  assert(
491  (!isa<InvokeInst>(U) ||
492  (getParentPad(
493  cast<InvokeInst>(U)->getUnwindDest()->getFirstNonPHI()) ==
494  CatchPad)) &&
495  "Expected useless pad");
496  if (isa<CatchSwitchInst>(U) || isa<CleanupPadInst>(U))
497  Worklist.push_back(cast<Instruction>(U));
498  }
499  }
500  } else {
501  assert(isa<CleanupPadInst>(UselessPad));
502  for (User *U : UselessPad->users()) {
503  assert(!isa<CleanupReturnInst>(U) && "Expected useless pad");
504  assert((!isa<InvokeInst>(U) ||
505  (getParentPad(
506  cast<InvokeInst>(U)->getUnwindDest()->getFirstNonPHI()) ==
507  UselessPad)) &&
508  "Expected useless pad");
509  if (isa<CatchSwitchInst>(U) || isa<CleanupPadInst>(U))
510  Worklist.push_back(cast<Instruction>(U));
511  }
512  }
513  }
514 
515  return UnwindDestToken;
516 }
517 
518 /// When we inline a basic block into an invoke,
519 /// we have to turn all of the calls that can throw into invokes.
520 /// This function analyze BB to see if there are any calls, and if so,
521 /// it rewrites them to be invokes that jump to InvokeDest and fills in the PHI
522 /// nodes in that block with the values specified in InvokeDestPHIValues.
524  BasicBlock *BB, BasicBlock *UnwindEdge,
525  UnwindDestMemoTy *FuncletUnwindMap = nullptr) {
526  for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) {
527  Instruction *I = &*BBI++;
528 
529  // We only need to check for function calls: inlined invoke
530  // instructions require no special handling.
531  CallInst *CI = dyn_cast<CallInst>(I);
532 
533  if (!CI || CI->doesNotThrow() || isa<InlineAsm>(CI->getCalledValue()))
534  continue;
535 
536  // We do not need to (and in fact, cannot) convert possibly throwing calls
537  // to @llvm.experimental_deoptimize (resp. @llvm.experimental.guard) into
538  // invokes. The caller's "segment" of the deoptimization continuation
539  // attached to the newly inlined @llvm.experimental_deoptimize
540  // (resp. @llvm.experimental.guard) call should contain the exception
541  // handling logic, if any.
542  if (auto *F = CI->getCalledFunction())
543  if (F->getIntrinsicID() == Intrinsic::experimental_deoptimize ||
544  F->getIntrinsicID() == Intrinsic::experimental_guard)
545  continue;
546 
547  if (auto FuncletBundle = CI->getOperandBundle(LLVMContext::OB_funclet)) {
548  // This call is nested inside a funclet. If that funclet has an unwind
549  // destination within the inlinee, then unwinding out of this call would
550  // be UB. Rewriting this call to an invoke which targets the inlined
551  // invoke's unwind dest would give the call's parent funclet multiple
552  // unwind destinations, which is something that subsequent EH table
553  // generation can't handle and that the veirifer rejects. So when we
554  // see such a call, leave it as a call.
555  auto *FuncletPad = cast<Instruction>(FuncletBundle->Inputs[0]);
556  Value *UnwindDestToken =
557  getUnwindDestToken(FuncletPad, *FuncletUnwindMap);
558  if (UnwindDestToken && !isa<ConstantTokenNone>(UnwindDestToken))
559  continue;
560 #ifndef NDEBUG
561  Instruction *MemoKey;
562  if (auto *CatchPad = dyn_cast<CatchPadInst>(FuncletPad))
563  MemoKey = CatchPad->getCatchSwitch();
564  else
565  MemoKey = FuncletPad;
566  assert(FuncletUnwindMap->count(MemoKey) &&
567  (*FuncletUnwindMap)[MemoKey] == UnwindDestToken &&
568  "must get memoized to avoid confusing later searches");
569 #endif // NDEBUG
570  }
571 
572  changeToInvokeAndSplitBasicBlock(CI, UnwindEdge);
573  return BB;
574  }
575  return nullptr;
576 }
577 
578 /// If we inlined an invoke site, we need to convert calls
579 /// in the body of the inlined function into invokes.
580 ///
581 /// II is the invoke instruction being inlined. FirstNewBlock is the first
582 /// block of the inlined code (the last block is the end of the function),
583 /// and InlineCodeInfo is information about the code that got inlined.
584 static void HandleInlinedLandingPad(InvokeInst *II, BasicBlock *FirstNewBlock,
585  ClonedCodeInfo &InlinedCodeInfo) {
586  BasicBlock *InvokeDest = II->getUnwindDest();
587 
588  Function *Caller = FirstNewBlock->getParent();
589 
590  // The inlined code is currently at the end of the function, scan from the
591  // start of the inlined code to its end, checking for stuff we need to
592  // rewrite.
593  LandingPadInliningInfo Invoke(II);
594 
595  // Get all of the inlined landing pad instructions.
597  for (Function::iterator I = FirstNewBlock->getIterator(), E = Caller->end();
598  I != E; ++I)
599  if (InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator()))
600  InlinedLPads.insert(II->getLandingPadInst());
601 
602  // Append the clauses from the outer landing pad instruction into the inlined
603  // landing pad instructions.
604  LandingPadInst *OuterLPad = Invoke.getLandingPadInst();
605  for (LandingPadInst *InlinedLPad : InlinedLPads) {
606  unsigned OuterNum = OuterLPad->getNumClauses();
607  InlinedLPad->reserveClauses(OuterNum);
608  for (unsigned OuterIdx = 0; OuterIdx != OuterNum; ++OuterIdx)
609  InlinedLPad->addClause(OuterLPad->getClause(OuterIdx));
610  if (OuterLPad->isCleanup())
611  InlinedLPad->setCleanup(true);
612  }
613 
614  for (Function::iterator BB = FirstNewBlock->getIterator(), E = Caller->end();
615  BB != E; ++BB) {
616  if (InlinedCodeInfo.ContainsCalls)
618  &*BB, Invoke.getOuterResumeDest()))
619  // Update any PHI nodes in the exceptional block to indicate that there
620  // is now a new entry in them.
621  Invoke.addIncomingPHIValuesFor(NewBB);
622 
623  // Forward any resumes that are remaining here.
624  if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator()))
625  Invoke.forwardResume(RI, InlinedLPads);
626  }
627 
628  // Now that everything is happy, we have one final detail. The PHI nodes in
629  // the exception destination block still have entries due to the original
630  // invoke instruction. Eliminate these entries (which might even delete the
631  // PHI node) now.
632  InvokeDest->removePredecessor(II->getParent());
633 }
634 
635 /// If we inlined an invoke site, we need to convert calls
636 /// in the body of the inlined function into invokes.
637 ///
638 /// II is the invoke instruction being inlined. FirstNewBlock is the first
639 /// block of the inlined code (the last block is the end of the function),
640 /// and InlineCodeInfo is information about the code that got inlined.
641 static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock,
642  ClonedCodeInfo &InlinedCodeInfo) {
643  BasicBlock *UnwindDest = II->getUnwindDest();
644  Function *Caller = FirstNewBlock->getParent();
645 
646  assert(UnwindDest->getFirstNonPHI()->isEHPad() && "unexpected BasicBlock!");
647 
648  // If there are PHI nodes in the unwind destination block, we need to keep
649  // track of which values came into them from the invoke before removing the
650  // edge from this block.
651  SmallVector<Value *, 8> UnwindDestPHIValues;
652  BasicBlock *InvokeBB = II->getParent();
653  for (Instruction &I : *UnwindDest) {
654  // Save the value to use for this edge.
655  PHINode *PHI = dyn_cast<PHINode>(&I);
656  if (!PHI)
657  break;
658  UnwindDestPHIValues.push_back(PHI->getIncomingValueForBlock(InvokeBB));
659  }
660 
661  // Add incoming-PHI values to the unwind destination block for the given basic
662  // block, using the values for the original invoke's source block.
663  auto UpdatePHINodes = [&](BasicBlock *Src) {
664  BasicBlock::iterator I = UnwindDest->begin();
665  for (Value *V : UnwindDestPHIValues) {
666  PHINode *PHI = cast<PHINode>(I);
667  PHI->addIncoming(V, Src);
668  ++I;
669  }
670  };
671 
672  // This connects all the instructions which 'unwind to caller' to the invoke
673  // destination.
674  UnwindDestMemoTy FuncletUnwindMap;
675  for (Function::iterator BB = FirstNewBlock->getIterator(), E = Caller->end();
676  BB != E; ++BB) {
677  if (auto *CRI = dyn_cast<CleanupReturnInst>(BB->getTerminator())) {
678  if (CRI->unwindsToCaller()) {
679  auto *CleanupPad = CRI->getCleanupPad();
680  CleanupReturnInst::Create(CleanupPad, UnwindDest, CRI);
681  CRI->eraseFromParent();
682  UpdatePHINodes(&*BB);
683  // Finding a cleanupret with an unwind destination would confuse
684  // subsequent calls to getUnwindDestToken, so map the cleanuppad
685  // to short-circuit any such calls and recognize this as an "unwind
686  // to caller" cleanup.
687  assert(!FuncletUnwindMap.count(CleanupPad) ||
688  isa<ConstantTokenNone>(FuncletUnwindMap[CleanupPad]));
689  FuncletUnwindMap[CleanupPad] =
691  }
692  }
693 
694  Instruction *I = BB->getFirstNonPHI();
695  if (!I->isEHPad())
696  continue;
697 
698  Instruction *Replacement = nullptr;
699  if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(I)) {
700  if (CatchSwitch->unwindsToCaller()) {
701  Value *UnwindDestToken;
702  if (auto *ParentPad =
703  dyn_cast<Instruction>(CatchSwitch->getParentPad())) {
704  // This catchswitch is nested inside another funclet. If that
705  // funclet has an unwind destination within the inlinee, then
706  // unwinding out of this catchswitch would be UB. Rewriting this
707  // catchswitch to unwind to the inlined invoke's unwind dest would
708  // give the parent funclet multiple unwind destinations, which is
709  // something that subsequent EH table generation can't handle and
710  // that the veirifer rejects. So when we see such a call, leave it
711  // as "unwind to caller".
712  UnwindDestToken = getUnwindDestToken(ParentPad, FuncletUnwindMap);
713  if (UnwindDestToken && !isa<ConstantTokenNone>(UnwindDestToken))
714  continue;
715  } else {
716  // This catchswitch has no parent to inherit constraints from, and
717  // none of its descendants can have an unwind edge that exits it and
718  // targets another funclet in the inlinee. It may or may not have a
719  // descendant that definitively has an unwind to caller. In either
720  // case, we'll have to assume that any unwinds out of it may need to
721  // be routed to the caller, so treat it as though it has a definitive
722  // unwind to caller.
723  UnwindDestToken = ConstantTokenNone::get(Caller->getContext());
724  }
725  auto *NewCatchSwitch = CatchSwitchInst::Create(
726  CatchSwitch->getParentPad(), UnwindDest,
727  CatchSwitch->getNumHandlers(), CatchSwitch->getName(),
728  CatchSwitch);
729  for (BasicBlock *PadBB : CatchSwitch->handlers())
730  NewCatchSwitch->addHandler(PadBB);
731  // Propagate info for the old catchswitch over to the new one in
732  // the unwind map. This also serves to short-circuit any subsequent
733  // checks for the unwind dest of this catchswitch, which would get
734  // confused if they found the outer handler in the callee.
735  FuncletUnwindMap[NewCatchSwitch] = UnwindDestToken;
736  Replacement = NewCatchSwitch;
737  }
738  } else if (!isa<FuncletPadInst>(I)) {
739  llvm_unreachable("unexpected EHPad!");
740  }
741 
742  if (Replacement) {
743  Replacement->takeName(I);
744  I->replaceAllUsesWith(Replacement);
745  I->eraseFromParent();
746  UpdatePHINodes(&*BB);
747  }
748  }
749 
750  if (InlinedCodeInfo.ContainsCalls)
751  for (Function::iterator BB = FirstNewBlock->getIterator(),
752  E = Caller->end();
753  BB != E; ++BB)
755  &*BB, UnwindDest, &FuncletUnwindMap))
756  // Update any PHI nodes in the exceptional block to indicate that there
757  // is now a new entry in them.
758  UpdatePHINodes(NewBB);
759 
760  // Now that everything is happy, we have one final detail. The PHI nodes in
761  // the exception destination block still have entries due to the original
762  // invoke instruction. Eliminate these entries (which might even delete the
763  // PHI node) now.
764  UnwindDest->removePredecessor(InvokeBB);
765 }
766 
767 /// When inlining a call site that has !llvm.mem.parallel_loop_access or
768 /// llvm.access.group metadata, that metadata should be propagated to all
769 /// memory-accessing cloned instructions.
771  ValueToValueMapTy &VMap) {
772  MDNode *M =
773  CS.getInstruction()->getMetadata(LLVMContext::MD_mem_parallel_loop_access);
774  MDNode *CallAccessGroup =
775  CS.getInstruction()->getMetadata(LLVMContext::MD_access_group);
776  if (!M && !CallAccessGroup)
777  return;
778 
779  for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end();
780  VMI != VMIE; ++VMI) {
781  if (!VMI->second)
782  continue;
783 
784  Instruction *NI = dyn_cast<Instruction>(VMI->second);
785  if (!NI)
786  continue;
787 
788  if (M) {
789  if (MDNode *PM =
790  NI->getMetadata(LLVMContext::MD_mem_parallel_loop_access)) {
791  M = MDNode::concatenate(PM, M);
792  NI->setMetadata(LLVMContext::MD_mem_parallel_loop_access, M);
793  } else if (NI->mayReadOrWriteMemory()) {
794  NI->setMetadata(LLVMContext::MD_mem_parallel_loop_access, M);
795  }
796  }
797 
798  if (NI->mayReadOrWriteMemory()) {
799  MDNode *UnitedAccGroups = uniteAccessGroups(
800  NI->getMetadata(LLVMContext::MD_access_group), CallAccessGroup);
801  NI->setMetadata(LLVMContext::MD_access_group, UnitedAccGroups);
802  }
803  }
804 }
805 
806 /// When inlining a function that contains noalias scope metadata,
807 /// this metadata needs to be cloned so that the inlined blocks
808 /// have different "unique scopes" at every call site. Were this not done, then
809 /// aliasing scopes from a function inlined into a caller multiple times could
810 /// not be differentiated (and this would lead to miscompiles because the
811 /// non-aliasing property communicated by the metadata could have
812 /// call-site-specific control dependencies).
814  const Function *CalledFunc = CS.getCalledFunction();
816 
817  // Note: We could only clone the metadata if it is already used in the
818  // caller. I'm omitting that check here because it might confuse
819  // inter-procedural alias analysis passes. We can revisit this if it becomes
820  // an efficiency or overhead problem.
821 
822  for (const BasicBlock &I : *CalledFunc)
823  for (const Instruction &J : I) {
824  if (const MDNode *M = J.getMetadata(LLVMContext::MD_alias_scope))
825  MD.insert(M);
826  if (const MDNode *M = J.getMetadata(LLVMContext::MD_noalias))
827  MD.insert(M);
828  }
829 
830  if (MD.empty())
831  return;
832 
833  // Walk the existing metadata, adding the complete (perhaps cyclic) chain to
834  // the set.
835  SmallVector<const Metadata *, 16> Queue(MD.begin(), MD.end());
836  while (!Queue.empty()) {
837  const MDNode *M = cast<MDNode>(Queue.pop_back_val());
838  for (unsigned i = 0, ie = M->getNumOperands(); i != ie; ++i)
839  if (const MDNode *M1 = dyn_cast<MDNode>(M->getOperand(i)))
840  if (MD.insert(M1))
841  Queue.push_back(M1);
842  }
843 
844  // Now we have a complete set of all metadata in the chains used to specify
845  // the noalias scopes and the lists of those scopes.
846  SmallVector<TempMDTuple, 16> DummyNodes;
848  for (const MDNode *I : MD) {
849  DummyNodes.push_back(MDTuple::getTemporary(CalledFunc->getContext(), None));
850  MDMap[I].reset(DummyNodes.back().get());
851  }
852 
853  // Create new metadata nodes to replace the dummy nodes, replacing old
854  // metadata references with either a dummy node or an already-created new
855  // node.
856  for (const MDNode *I : MD) {
858  for (unsigned i = 0, ie = I->getNumOperands(); i != ie; ++i) {
859  const Metadata *V = I->getOperand(i);
860  if (const MDNode *M = dyn_cast<MDNode>(V))
861  NewOps.push_back(MDMap[M]);
862  else
863  NewOps.push_back(const_cast<Metadata *>(V));
864  }
865 
866  MDNode *NewM = MDNode::get(CalledFunc->getContext(), NewOps);
867  MDTuple *TempM = cast<MDTuple>(MDMap[I]);
868  assert(TempM->isTemporary() && "Expected temporary node");
869 
870  TempM->replaceAllUsesWith(NewM);
871  }
872 
873  // Now replace the metadata in the new inlined instructions with the
874  // repacements from the map.
875  for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end();
876  VMI != VMIE; ++VMI) {
877  if (!VMI->second)
878  continue;
879 
880  Instruction *NI = dyn_cast<Instruction>(VMI->second);
881  if (!NI)
882  continue;
883 
884  if (MDNode *M = NI->getMetadata(LLVMContext::MD_alias_scope)) {
885  MDNode *NewMD = MDMap[M];
886  // If the call site also had alias scope metadata (a list of scopes to
887  // which instructions inside it might belong), propagate those scopes to
888  // the inlined instructions.
889  if (MDNode *CSM =
890  CS.getInstruction()->getMetadata(LLVMContext::MD_alias_scope))
891  NewMD = MDNode::concatenate(NewMD, CSM);
892  NI->setMetadata(LLVMContext::MD_alias_scope, NewMD);
893  } else if (NI->mayReadOrWriteMemory()) {
894  if (MDNode *M =
895  CS.getInstruction()->getMetadata(LLVMContext::MD_alias_scope))
896  NI->setMetadata(LLVMContext::MD_alias_scope, M);
897  }
898 
899  if (MDNode *M = NI->getMetadata(LLVMContext::MD_noalias)) {
900  MDNode *NewMD = MDMap[M];
901  // If the call site also had noalias metadata (a list of scopes with
902  // which instructions inside it don't alias), propagate those scopes to
903  // the inlined instructions.
904  if (MDNode *CSM =
905  CS.getInstruction()->getMetadata(LLVMContext::MD_noalias))
906  NewMD = MDNode::concatenate(NewMD, CSM);
907  NI->setMetadata(LLVMContext::MD_noalias, NewMD);
908  } else if (NI->mayReadOrWriteMemory()) {
909  if (MDNode *M = CS.getInstruction()->getMetadata(LLVMContext::MD_noalias))
910  NI->setMetadata(LLVMContext::MD_noalias, M);
911  }
912  }
913 }
914 
915 /// If the inlined function has noalias arguments,
916 /// then add new alias scopes for each noalias argument, tag the mapped noalias
917 /// parameters with noalias metadata specifying the new scope, and tag all
918 /// non-derived loads, stores and memory intrinsics with the new alias scopes.
920  const DataLayout &DL, AAResults *CalleeAAR) {
922  return;
923 
924  const Function *CalledFunc = CS.getCalledFunction();
926 
927  for (const Argument &Arg : CalledFunc->args())
928  if (Arg.hasNoAliasAttr() && !Arg.use_empty())
929  NoAliasArgs.push_back(&Arg);
930 
931  if (NoAliasArgs.empty())
932  return;
933 
934  // To do a good job, if a noalias variable is captured, we need to know if
935  // the capture point dominates the particular use we're considering.
936  DominatorTree DT;
937  DT.recalculate(const_cast<Function&>(*CalledFunc));
938 
939  // noalias indicates that pointer values based on the argument do not alias
940  // pointer values which are not based on it. So we add a new "scope" for each
941  // noalias function argument. Accesses using pointers based on that argument
942  // become part of that alias scope, accesses using pointers not based on that
943  // argument are tagged as noalias with that scope.
944 
946  MDBuilder MDB(CalledFunc->getContext());
947 
948  // Create a new scope domain for this function.
949  MDNode *NewDomain =
950  MDB.createAnonymousAliasScopeDomain(CalledFunc->getName());
951  for (unsigned i = 0, e = NoAliasArgs.size(); i != e; ++i) {
952  const Argument *A = NoAliasArgs[i];
953 
954  std::string Name = CalledFunc->getName();
955  if (A->hasName()) {
956  Name += ": %";
957  Name += A->getName();
958  } else {
959  Name += ": argument ";
960  Name += utostr(i);
961  }
962 
963  // Note: We always create a new anonymous root here. This is true regardless
964  // of the linkage of the callee because the aliasing "scope" is not just a
965  // property of the callee, but also all control dependencies in the caller.
966  MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name);
967  NewScopes.insert(std::make_pair(A, NewScope));
968  }
969 
970  // Iterate over all new instructions in the map; for all memory-access
971  // instructions, add the alias scope metadata.
972  for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end();
973  VMI != VMIE; ++VMI) {
974  if (const Instruction *I = dyn_cast<Instruction>(VMI->first)) {
975  if (!VMI->second)
976  continue;
977 
978  Instruction *NI = dyn_cast<Instruction>(VMI->second);
979  if (!NI)
980  continue;
981 
982  bool IsArgMemOnlyCall = false, IsFuncCall = false;
984 
985  if (const LoadInst *LI = dyn_cast<LoadInst>(I))
986  PtrArgs.push_back(LI->getPointerOperand());
987  else if (const StoreInst *SI = dyn_cast<StoreInst>(I))
988  PtrArgs.push_back(SI->getPointerOperand());
989  else if (const VAArgInst *VAAI = dyn_cast<VAArgInst>(I))
990  PtrArgs.push_back(VAAI->getPointerOperand());
991  else if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(I))
992  PtrArgs.push_back(CXI->getPointerOperand());
993  else if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(I))
994  PtrArgs.push_back(RMWI->getPointerOperand());
995  else if (const auto *Call = dyn_cast<CallBase>(I)) {
996  // If we know that the call does not access memory, then we'll still
997  // know that about the inlined clone of this call site, and we don't
998  // need to add metadata.
999  if (Call->doesNotAccessMemory())
1000  continue;
1001 
1002  IsFuncCall = true;
1003  if (CalleeAAR) {
1004  FunctionModRefBehavior MRB = CalleeAAR->getModRefBehavior(Call);
1005  if (MRB == FMRB_OnlyAccessesArgumentPointees ||
1007  IsArgMemOnlyCall = true;
1008  }
1009 
1010  for (Value *Arg : Call->args()) {
1011  // We need to check the underlying objects of all arguments, not just
1012  // the pointer arguments, because we might be passing pointers as
1013  // integers, etc.
1014  // However, if we know that the call only accesses pointer arguments,
1015  // then we only need to check the pointer arguments.
1016  if (IsArgMemOnlyCall && !Arg->getType()->isPointerTy())
1017  continue;
1018 
1019  PtrArgs.push_back(Arg);
1020  }
1021  }
1022 
1023  // If we found no pointers, then this instruction is not suitable for
1024  // pairing with an instruction to receive aliasing metadata.
1025  // However, if this is a call, this we might just alias with none of the
1026  // noalias arguments.
1027  if (PtrArgs.empty() && !IsFuncCall)
1028  continue;
1029 
1030  // It is possible that there is only one underlying object, but you
1031  // need to go through several PHIs to see it, and thus could be
1032  // repeated in the Objects list.
1035 
1037  for (const Value *V : PtrArgs) {
1039  GetUnderlyingObjects(V, Objects, DL, /* LI = */ nullptr);
1040 
1041  for (const Value *O : Objects)
1042  ObjSet.insert(O);
1043  }
1044 
1045  // Figure out if we're derived from anything that is not a noalias
1046  // argument.
1047  bool CanDeriveViaCapture = false, UsesAliasingPtr = false;
1048  for (const Value *V : ObjSet) {
1049  // Is this value a constant that cannot be derived from any pointer
1050  // value (we need to exclude constant expressions, for example, that
1051  // are formed from arithmetic on global symbols).
1052  bool IsNonPtrConst = isa<ConstantInt>(V) || isa<ConstantFP>(V) ||
1053  isa<ConstantPointerNull>(V) ||
1054  isa<ConstantDataVector>(V) || isa<UndefValue>(V);
1055  if (IsNonPtrConst)
1056  continue;
1057 
1058  // If this is anything other than a noalias argument, then we cannot
1059  // completely describe the aliasing properties using alias.scope
1060  // metadata (and, thus, won't add any).
1061  if (const Argument *A = dyn_cast<Argument>(V)) {
1062  if (!A->hasNoAliasAttr())
1063  UsesAliasingPtr = true;
1064  } else {
1065  UsesAliasingPtr = true;
1066  }
1067 
1068  // If this is not some identified function-local object (which cannot
1069  // directly alias a noalias argument), or some other argument (which,
1070  // by definition, also cannot alias a noalias argument), then we could
1071  // alias a noalias argument that has been captured).
1072  if (!isa<Argument>(V) &&
1073  !isIdentifiedFunctionLocal(const_cast<Value*>(V)))
1074  CanDeriveViaCapture = true;
1075  }
1076 
1077  // A function call can always get captured noalias pointers (via other
1078  // parameters, globals, etc.).
1079  if (IsFuncCall && !IsArgMemOnlyCall)
1080  CanDeriveViaCapture = true;
1081 
1082  // First, we want to figure out all of the sets with which we definitely
1083  // don't alias. Iterate over all noalias set, and add those for which:
1084  // 1. The noalias argument is not in the set of objects from which we
1085  // definitely derive.
1086  // 2. The noalias argument has not yet been captured.
1087  // An arbitrary function that might load pointers could see captured
1088  // noalias arguments via other noalias arguments or globals, and so we
1089  // must always check for prior capture.
1090  for (const Argument *A : NoAliasArgs) {
1091  if (!ObjSet.count(A) && (!CanDeriveViaCapture ||
1092  // It might be tempting to skip the
1093  // PointerMayBeCapturedBefore check if
1094  // A->hasNoCaptureAttr() is true, but this is
1095  // incorrect because nocapture only guarantees
1096  // that no copies outlive the function, not
1097  // that the value cannot be locally captured.
1099  /* ReturnCaptures */ false,
1100  /* StoreCaptures */ false, I, &DT)))
1101  NoAliases.push_back(NewScopes[A]);
1102  }
1103 
1104  if (!NoAliases.empty())
1105  NI->setMetadata(LLVMContext::MD_noalias,
1107  NI->getMetadata(LLVMContext::MD_noalias),
1108  MDNode::get(CalledFunc->getContext(), NoAliases)));
1109 
1110  // Next, we want to figure out all of the sets to which we might belong.
1111  // We might belong to a set if the noalias argument is in the set of
1112  // underlying objects. If there is some non-noalias argument in our list
1113  // of underlying objects, then we cannot add a scope because the fact
1114  // that some access does not alias with any set of our noalias arguments
1115  // cannot itself guarantee that it does not alias with this access
1116  // (because there is some pointer of unknown origin involved and the
1117  // other access might also depend on this pointer). We also cannot add
1118  // scopes to arbitrary functions unless we know they don't access any
1119  // non-parameter pointer-values.
1120  bool CanAddScopes = !UsesAliasingPtr;
1121  if (CanAddScopes && IsFuncCall)
1122  CanAddScopes = IsArgMemOnlyCall;
1123 
1124  if (CanAddScopes)
1125  for (const Argument *A : NoAliasArgs) {
1126  if (ObjSet.count(A))
1127  Scopes.push_back(NewScopes[A]);
1128  }
1129 
1130  if (!Scopes.empty())
1131  NI->setMetadata(
1132  LLVMContext::MD_alias_scope,
1133  MDNode::concatenate(NI->getMetadata(LLVMContext::MD_alias_scope),
1134  MDNode::get(CalledFunc->getContext(), Scopes)));
1135  }
1136  }
1137 }
1138 
1139 /// If the inlined function has non-byval align arguments, then
1140 /// add @llvm.assume-based alignment assumptions to preserve this information.
1143  return;
1144 
1145  AssumptionCache *AC = &(*IFI.GetAssumptionCache)(*CS.getCaller());
1146  auto &DL = CS.getCaller()->getParent()->getDataLayout();
1147 
1148  // To avoid inserting redundant assumptions, we should check for assumptions
1149  // already in the caller. To do this, we might need a DT of the caller.
1150  DominatorTree DT;
1151  bool DTCalculated = false;
1152 
1153  Function *CalledFunc = CS.getCalledFunction();
1154  for (Argument &Arg : CalledFunc->args()) {
1155  unsigned Align = Arg.getType()->isPointerTy() ? Arg.getParamAlignment() : 0;
1156  if (Align && !Arg.hasByValOrInAllocaAttr() && !Arg.hasNUses(0)) {
1157  if (!DTCalculated) {
1158  DT.recalculate(*CS.getCaller());
1159  DTCalculated = true;
1160  }
1161 
1162  // If we can already prove the asserted alignment in the context of the
1163  // caller, then don't bother inserting the assumption.
1164  Value *ArgVal = CS.getArgument(Arg.getArgNo());
1165  if (getKnownAlignment(ArgVal, DL, CS.getInstruction(), AC, &DT) >= Align)
1166  continue;
1167 
1168  CallInst *NewAsmp = IRBuilder<>(CS.getInstruction())
1169  .CreateAlignmentAssumption(DL, ArgVal, Align);
1170  AC->registerAssumption(NewAsmp);
1171  }
1172  }
1173 }
1174 
1175 /// Once we have cloned code over from a callee into the caller,
1176 /// update the specified callgraph to reflect the changes we made.
1177 /// Note that it's possible that not all code was copied over, so only
1178 /// some edges of the callgraph may remain.
1180  Function::iterator FirstNewBlock,
1181  ValueToValueMapTy &VMap,
1182  InlineFunctionInfo &IFI) {
1183  CallGraph &CG = *IFI.CG;
1184  const Function *Caller = CS.getCaller();
1185  const Function *Callee = CS.getCalledFunction();
1186  CallGraphNode *CalleeNode = CG[Callee];
1187  CallGraphNode *CallerNode = CG[Caller];
1188 
1189  // Since we inlined some uninlined call sites in the callee into the caller,
1190  // add edges from the caller to all of the callees of the callee.
1191  CallGraphNode::iterator I = CalleeNode->begin(), E = CalleeNode->end();
1192 
1193  // Consider the case where CalleeNode == CallerNode.
1195  if (CalleeNode == CallerNode) {
1196  CallCache.assign(I, E);
1197  I = CallCache.begin();
1198  E = CallCache.end();
1199  }
1200 
1201  for (; I != E; ++I) {
1202  const Value *OrigCall = I->first;
1203 
1204  ValueToValueMapTy::iterator VMI = VMap.find(OrigCall);
1205  // Only copy the edge if the call was inlined!
1206  if (VMI == VMap.end() || VMI->second == nullptr)
1207  continue;
1208 
1209  // If the call was inlined, but then constant folded, there is no edge to
1210  // add. Check for this case.
1211  auto *NewCall = dyn_cast<CallBase>(VMI->second);
1212  if (!NewCall)
1213  continue;
1214 
1215  // We do not treat intrinsic calls like real function calls because we
1216  // expect them to become inline code; do not add an edge for an intrinsic.
1217  if (NewCall->getCalledFunction() &&
1218  NewCall->getCalledFunction()->isIntrinsic())
1219  continue;
1220 
1221  // Remember that this call site got inlined for the client of
1222  // InlineFunction.
1223  IFI.InlinedCalls.push_back(NewCall);
1224 
1225  // It's possible that inlining the callsite will cause it to go from an
1226  // indirect to a direct call by resolving a function pointer. If this
1227  // happens, set the callee of the new call site to a more precise
1228  // destination. This can also happen if the call graph node of the caller
1229  // was just unnecessarily imprecise.
1230  if (!I->second->getFunction())
1231  if (Function *F = NewCall->getCalledFunction()) {
1232  // Indirect call site resolved to direct call.
1233  CallerNode->addCalledFunction(NewCall, CG[F]);
1234 
1235  continue;
1236  }
1237 
1238  CallerNode->addCalledFunction(NewCall, I->second);
1239  }
1240 
1241  // Update the call graph by deleting the edge from Callee to Caller. We must
1242  // do this after the loop above in case Caller and Callee are the same.
1243  CallerNode->removeCallEdgeFor(*cast<CallBase>(CS.getInstruction()));
1244 }
1245 
1246 static void HandleByValArgumentInit(Value *Dst, Value *Src, Module *M,
1247  BasicBlock *InsertBlock,
1248  InlineFunctionInfo &IFI) {
1249  Type *AggTy = cast<PointerType>(Src->getType())->getElementType();
1250  IRBuilder<> Builder(InsertBlock, InsertBlock->begin());
1251 
1252  Value *Size = Builder.getInt64(M->getDataLayout().getTypeStoreSize(AggTy));
1253 
1254  // Always generate a memcpy of alignment 1 here because we don't know
1255  // the alignment of the src pointer. Other optimizations can infer
1256  // better alignment.
1257  Builder.CreateMemCpy(Dst, /*DstAlign*/1, Src, /*SrcAlign*/1, Size);
1258 }
1259 
1260 /// When inlining a call site that has a byval argument,
1261 /// we have to make the implicit memcpy explicit by adding it.
1263  const Function *CalledFunc,
1264  InlineFunctionInfo &IFI,
1265  unsigned ByValAlignment) {
1266  PointerType *ArgTy = cast<PointerType>(Arg->getType());
1267  Type *AggTy = ArgTy->getElementType();
1268 
1269  Function *Caller = TheCall->getFunction();
1270  const DataLayout &DL = Caller->getParent()->getDataLayout();
1271 
1272  // If the called function is readonly, then it could not mutate the caller's
1273  // copy of the byval'd memory. In this case, it is safe to elide the copy and
1274  // temporary.
1275  if (CalledFunc->onlyReadsMemory()) {
1276  // If the byval argument has a specified alignment that is greater than the
1277  // passed in pointer, then we either have to round up the input pointer or
1278  // give up on this transformation.
1279  if (ByValAlignment <= 1) // 0 = unspecified, 1 = no particular alignment.
1280  return Arg;
1281 
1282  AssumptionCache *AC =
1283  IFI.GetAssumptionCache ? &(*IFI.GetAssumptionCache)(*Caller) : nullptr;
1284 
1285  // If the pointer is already known to be sufficiently aligned, or if we can
1286  // round it up to a larger alignment, then we don't need a temporary.
1287  if (getOrEnforceKnownAlignment(Arg, ByValAlignment, DL, TheCall, AC) >=
1288  ByValAlignment)
1289  return Arg;
1290 
1291  // Otherwise, we have to make a memcpy to get a safe alignment. This is bad
1292  // for code quality, but rarely happens and is required for correctness.
1293  }
1294 
1295  // Create the alloca. If we have DataLayout, use nice alignment.
1296  unsigned Align = DL.getPrefTypeAlignment(AggTy);
1297 
1298  // If the byval had an alignment specified, we *must* use at least that
1299  // alignment, as it is required by the byval argument (and uses of the
1300  // pointer inside the callee).
1301  Align = std::max(Align, ByValAlignment);
1302 
1303  Value *NewAlloca = new AllocaInst(AggTy, DL.getAllocaAddrSpace(),
1304  nullptr, Align, Arg->getName(),
1305  &*Caller->begin()->begin());
1306  IFI.StaticAllocas.push_back(cast<AllocaInst>(NewAlloca));
1307 
1308  // Uses of the argument in the function should use our new alloca
1309  // instead.
1310  return NewAlloca;
1311 }
1312 
1313 // Check whether this Value is used by a lifetime intrinsic.
1315  for (User *U : V->users())
1316  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U))
1317  if (II->isLifetimeStartOrEnd())
1318  return true;
1319  return false;
1320 }
1321 
1322 // Check whether the given alloca already has
1323 // lifetime.start or lifetime.end intrinsics.
1324 static bool hasLifetimeMarkers(AllocaInst *AI) {
1325  Type *Ty = AI->getType();
1326  Type *Int8PtrTy = Type::getInt8PtrTy(Ty->getContext(),
1327  Ty->getPointerAddressSpace());
1328  if (Ty == Int8PtrTy)
1329  return isUsedByLifetimeMarker(AI);
1330 
1331  // Do a scan to find all the casts to i8*.
1332  for (User *U : AI->users()) {
1333  if (U->getType() != Int8PtrTy) continue;
1334  if (U->stripPointerCasts() != AI) continue;
1335  if (isUsedByLifetimeMarker(U))
1336  return true;
1337  }
1338  return false;
1339 }
1340 
1341 /// Return the result of AI->isStaticAlloca() if AI were moved to the entry
1342 /// block. Allocas used in inalloca calls and allocas of dynamic array size
1343 /// cannot be static.
1344 static bool allocaWouldBeStaticInEntry(const AllocaInst *AI ) {
1345  return isa<Constant>(AI->getArraySize()) && !AI->isUsedWithInAlloca();
1346 }
1347 
1348 /// Returns a DebugLoc for a new DILocation which is a clone of \p OrigDL
1349 /// inlined at \p InlinedAt. \p IANodes is an inlined-at cache.
1350 static DebugLoc inlineDebugLoc(DebugLoc OrigDL, DILocation *InlinedAt,
1351  LLVMContext &Ctx,
1353  auto IA = DebugLoc::appendInlinedAt(OrigDL, InlinedAt, Ctx, IANodes);
1354  return DebugLoc::get(OrigDL.getLine(), OrigDL.getCol(), OrigDL.getScope(),
1355  IA);
1356 }
1357 
1358 /// Returns the LoopID for a loop which has has been cloned from another
1359 /// function for inlining with the new inlined-at start and end locs.
1360 static MDNode *inlineLoopID(const MDNode *OrigLoopId, DILocation *InlinedAt,
1361  LLVMContext &Ctx,
1363  assert(OrigLoopId && OrigLoopId->getNumOperands() > 0 &&
1364  "Loop ID needs at least one operand");
1365  assert(OrigLoopId && OrigLoopId->getOperand(0).get() == OrigLoopId &&
1366  "Loop ID should refer to itself");
1367 
1368  // Save space for the self-referential LoopID.
1369  SmallVector<Metadata *, 4> MDs = {nullptr};
1370 
1371  for (unsigned i = 1; i < OrigLoopId->getNumOperands(); ++i) {
1372  Metadata *MD = OrigLoopId->getOperand(i);
1373  // Update the DILocations to encode the inlined-at metadata.
1374  if (DILocation *DL = dyn_cast<DILocation>(MD))
1375  MDs.push_back(inlineDebugLoc(DL, InlinedAt, Ctx, IANodes));
1376  else
1377  MDs.push_back(MD);
1378  }
1379 
1380  MDNode *NewLoopID = MDNode::getDistinct(Ctx, MDs);
1381  // Insert the self-referential LoopID.
1382  NewLoopID->replaceOperandWith(0, NewLoopID);
1383  return NewLoopID;
1384 }
1385 
1386 /// Update inlined instructions' line numbers to
1387 /// to encode location where these instructions are inlined.
1389  Instruction *TheCall, bool CalleeHasDebugInfo) {
1390  const DebugLoc &TheCallDL = TheCall->getDebugLoc();
1391  if (!TheCallDL)
1392  return;
1393 
1394  auto &Ctx = Fn->getContext();
1395  DILocation *InlinedAtNode = TheCallDL;
1396 
1397  // Create a unique call site, not to be confused with any other call from the
1398  // same location.
1399  InlinedAtNode = DILocation::getDistinct(
1400  Ctx, InlinedAtNode->getLine(), InlinedAtNode->getColumn(),
1401  InlinedAtNode->getScope(), InlinedAtNode->getInlinedAt());
1402 
1403  // Cache the inlined-at nodes as they're built so they are reused, without
1404  // this every instruction's inlined-at chain would become distinct from each
1405  // other.
1407 
1408  for (; FI != Fn->end(); ++FI) {
1409  for (BasicBlock::iterator BI = FI->begin(), BE = FI->end();
1410  BI != BE; ++BI) {
1411  // Loop metadata needs to be updated so that the start and end locs
1412  // reference inlined-at locations.
1413  if (MDNode *LoopID = BI->getMetadata(LLVMContext::MD_loop)) {
1414  MDNode *NewLoopID =
1415  inlineLoopID(LoopID, InlinedAtNode, BI->getContext(), IANodes);
1416  BI->setMetadata(LLVMContext::MD_loop, NewLoopID);
1417  }
1418 
1419  if (DebugLoc DL = BI->getDebugLoc()) {
1420  DebugLoc IDL =
1421  inlineDebugLoc(DL, InlinedAtNode, BI->getContext(), IANodes);
1422  BI->setDebugLoc(IDL);
1423  continue;
1424  }
1425 
1426  if (CalleeHasDebugInfo)
1427  continue;
1428 
1429  // If the inlined instruction has no line number, make it look as if it
1430  // originates from the call location. This is important for
1431  // ((__always_inline__, __nodebug__)) functions which must use caller
1432  // location for all instructions in their function body.
1433 
1434  // Don't update static allocas, as they may get moved later.
1435  if (auto *AI = dyn_cast<AllocaInst>(BI))
1437  continue;
1438 
1439  BI->setDebugLoc(TheCallDL);
1440  }
1441  }
1442 }
1443 
1444 /// Update the block frequencies of the caller after a callee has been inlined.
1445 ///
1446 /// Each block cloned into the caller has its block frequency scaled by the
1447 /// ratio of CallSiteFreq/CalleeEntryFreq. This ensures that the cloned copy of
1448 /// callee's entry block gets the same frequency as the callsite block and the
1449 /// relative frequencies of all cloned blocks remain the same after cloning.
1450 static void updateCallerBFI(BasicBlock *CallSiteBlock,
1451  const ValueToValueMapTy &VMap,
1452  BlockFrequencyInfo *CallerBFI,
1453  BlockFrequencyInfo *CalleeBFI,
1454  const BasicBlock &CalleeEntryBlock) {
1456  for (auto const &Entry : VMap) {
1457  if (!isa<BasicBlock>(Entry.first) || !Entry.second)
1458  continue;
1459  auto *OrigBB = cast<BasicBlock>(Entry.first);
1460  auto *ClonedBB = cast<BasicBlock>(Entry.second);
1461  uint64_t Freq = CalleeBFI->getBlockFreq(OrigBB).getFrequency();
1462  if (!ClonedBBs.insert(ClonedBB).second) {
1463  // Multiple blocks in the callee might get mapped to one cloned block in
1464  // the caller since we prune the callee as we clone it. When that happens,
1465  // we want to use the maximum among the original blocks' frequencies.
1466  uint64_t NewFreq = CallerBFI->getBlockFreq(ClonedBB).getFrequency();
1467  if (NewFreq > Freq)
1468  Freq = NewFreq;
1469  }
1470  CallerBFI->setBlockFreq(ClonedBB, Freq);
1471  }
1472  BasicBlock *EntryClone = cast<BasicBlock>(VMap.lookup(&CalleeEntryBlock));
1473  CallerBFI->setBlockFreqAndScale(
1474  EntryClone, CallerBFI->getBlockFreq(CallSiteBlock).getFrequency(),
1475  ClonedBBs);
1476 }
1477 
1478 /// Update the branch metadata for cloned call instructions.
1479 static void updateCallProfile(Function *Callee, const ValueToValueMapTy &VMap,
1480  const ProfileCount &CalleeEntryCount,
1481  const Instruction *TheCall,
1482  ProfileSummaryInfo *PSI,
1483  BlockFrequencyInfo *CallerBFI) {
1484  if (!CalleeEntryCount.hasValue() || CalleeEntryCount.isSynthetic() ||
1485  CalleeEntryCount.getCount() < 1)
1486  return;
1487  auto CallSiteCount = PSI ? PSI->getProfileCount(TheCall, CallerBFI) : None;
1488  int64_t CallCount =
1489  std::min(CallSiteCount.hasValue() ? CallSiteCount.getValue() : 0,
1490  CalleeEntryCount.getCount());
1491  updateProfileCallee(Callee, -CallCount, &VMap);
1492 }
1493 
1495  Function *Callee, int64_t entryDelta,
1497  auto CalleeCount = Callee->getEntryCount();
1498  if (!CalleeCount.hasValue())
1499  return;
1500 
1501  uint64_t priorEntryCount = CalleeCount.getCount();
1502  uint64_t newEntryCount;
1503 
1504  // Since CallSiteCount is an estimate, it could exceed the original callee
1505  // count and has to be set to 0 so guard against underflow.
1506  if (entryDelta < 0 && static_cast<uint64_t>(-entryDelta) > priorEntryCount)
1507  newEntryCount = 0;
1508  else
1509  newEntryCount = priorEntryCount + entryDelta;
1510 
1511  Callee->setEntryCount(newEntryCount);
1512 
1513  // During inlining ?
1514  if (VMap) {
1515  uint64_t cloneEntryCount = priorEntryCount - newEntryCount;
1516  for (auto const &Entry : *VMap)
1517  if (isa<CallInst>(Entry.first))
1518  if (auto *CI = dyn_cast_or_null<CallInst>(Entry.second))
1519  CI->updateProfWeight(cloneEntryCount, priorEntryCount);
1520  }
1521  for (BasicBlock &BB : *Callee)
1522  // No need to update the callsite if it is pruned during inlining.
1523  if (!VMap || VMap->count(&BB))
1524  for (Instruction &I : BB)
1525  if (CallInst *CI = dyn_cast<CallInst>(&I))
1526  CI->updateProfWeight(newEntryCount, priorEntryCount);
1527 }
1528 
1529 /// This function inlines the called function into the basic block of the
1530 /// caller. This returns false if it is not possible to inline this call.
1531 /// The program is still in a well defined state if this occurs though.
1532 ///
1533 /// Note that this only does one level of inlining. For example, if the
1534 /// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now
1535 /// exists in the instruction stream. Similarly this will inline a recursive
1536 /// function by one level.
1538  AAResults *CalleeAAR,
1539  bool InsertLifetime,
1540  Function *ForwardVarArgsTo) {
1541  Instruction *TheCall = CS.getInstruction();
1542  assert(TheCall->getParent() && TheCall->getFunction()
1543  && "Instruction not in function!");
1544 
1545  // FIXME: we don't inline callbr yet.
1546  if (isa<CallBrInst>(TheCall))
1547  return false;
1548 
1549  // If IFI has any state in it, zap it before we fill it in.
1550  IFI.reset();
1551 
1552  Function *CalledFunc = CS.getCalledFunction();
1553  if (!CalledFunc || // Can't inline external function or indirect
1554  CalledFunc->isDeclaration()) // call!
1555  return "external or indirect";
1556 
1557  // The inliner does not know how to inline through calls with operand bundles
1558  // in general ...
1559  if (CS.hasOperandBundles()) {
1560  for (int i = 0, e = CS.getNumOperandBundles(); i != e; ++i) {
1562  // ... but it knows how to inline through "deopt" operand bundles ...
1563  if (Tag == LLVMContext::OB_deopt)
1564  continue;
1565  // ... and "funclet" operand bundles.
1566  if (Tag == LLVMContext::OB_funclet)
1567  continue;
1568 
1569  return "unsupported operand bundle";
1570  }
1571  }
1572 
1573  // If the call to the callee cannot throw, set the 'nounwind' flag on any
1574  // calls that we inline.
1575  bool MarkNoUnwind = CS.doesNotThrow();
1576 
1577  BasicBlock *OrigBB = TheCall->getParent();
1578  Function *Caller = OrigBB->getParent();
1579 
1580  // GC poses two hazards to inlining, which only occur when the callee has GC:
1581  // 1. If the caller has no GC, then the callee's GC must be propagated to the
1582  // caller.
1583  // 2. If the caller has a differing GC, it is invalid to inline.
1584  if (CalledFunc->hasGC()) {
1585  if (!Caller->hasGC())
1586  Caller->setGC(CalledFunc->getGC());
1587  else if (CalledFunc->getGC() != Caller->getGC())
1588  return "incompatible GC";
1589  }
1590 
1591  // Get the personality function from the callee if it contains a landing pad.
1592  Constant *CalledPersonality =
1593  CalledFunc->hasPersonalityFn()
1594  ? CalledFunc->getPersonalityFn()->stripPointerCasts()
1595  : nullptr;
1596 
1597  // Find the personality function used by the landing pads of the caller. If it
1598  // exists, then check to see that it matches the personality function used in
1599  // the callee.
1600  Constant *CallerPersonality =
1601  Caller->hasPersonalityFn()
1602  ? Caller->getPersonalityFn()->stripPointerCasts()
1603  : nullptr;
1604  if (CalledPersonality) {
1605  if (!CallerPersonality)
1606  Caller->setPersonalityFn(CalledPersonality);
1607  // If the personality functions match, then we can perform the
1608  // inlining. Otherwise, we can't inline.
1609  // TODO: This isn't 100% true. Some personality functions are proper
1610  // supersets of others and can be used in place of the other.
1611  else if (CalledPersonality != CallerPersonality)
1612  return "incompatible personality";
1613  }
1614 
1615  // We need to figure out which funclet the callsite was in so that we may
1616  // properly nest the callee.
1617  Instruction *CallSiteEHPad = nullptr;
1618  if (CallerPersonality) {
1619  EHPersonality Personality = classifyEHPersonality(CallerPersonality);
1620  if (isScopedEHPersonality(Personality)) {
1621  Optional<OperandBundleUse> ParentFunclet =
1623  if (ParentFunclet)
1624  CallSiteEHPad = cast<FuncletPadInst>(ParentFunclet->Inputs.front());
1625 
1626  // OK, the inlining site is legal. What about the target function?
1627 
1628  if (CallSiteEHPad) {
1629  if (Personality == EHPersonality::MSVC_CXX) {
1630  // The MSVC personality cannot tolerate catches getting inlined into
1631  // cleanup funclets.
1632  if (isa<CleanupPadInst>(CallSiteEHPad)) {
1633  // Ok, the call site is within a cleanuppad. Let's check the callee
1634  // for catchpads.
1635  for (const BasicBlock &CalledBB : *CalledFunc) {
1636  if (isa<CatchSwitchInst>(CalledBB.getFirstNonPHI()))
1637  return "catch in cleanup funclet";
1638  }
1639  }
1640  } else if (isAsynchronousEHPersonality(Personality)) {
1641  // SEH is even less tolerant, there may not be any sort of exceptional
1642  // funclet in the callee.
1643  for (const BasicBlock &CalledBB : *CalledFunc) {
1644  if (CalledBB.isEHPad())
1645  return "SEH in cleanup funclet";
1646  }
1647  }
1648  }
1649  }
1650  }
1651 
1652  // Determine if we are dealing with a call in an EHPad which does not unwind
1653  // to caller.
1654  bool EHPadForCallUnwindsLocally = false;
1655  if (CallSiteEHPad && CS.isCall()) {
1656  UnwindDestMemoTy FuncletUnwindMap;
1657  Value *CallSiteUnwindDestToken =
1658  getUnwindDestToken(CallSiteEHPad, FuncletUnwindMap);
1659 
1660  EHPadForCallUnwindsLocally =
1661  CallSiteUnwindDestToken &&
1662  !isa<ConstantTokenNone>(CallSiteUnwindDestToken);
1663  }
1664 
1665  // Get an iterator to the last basic block in the function, which will have
1666  // the new function inlined after it.
1667  Function::iterator LastBlock = --Caller->end();
1668 
1669  // Make sure to capture all of the return instructions from the cloned
1670  // function.
1672  ClonedCodeInfo InlinedFunctionInfo;
1673  Function::iterator FirstNewBlock;
1674 
1675  { // Scope to destroy VMap after cloning.
1676  ValueToValueMapTy VMap;
1677  // Keep a list of pair (dst, src) to emit byval initializations.
1679 
1680  auto &DL = Caller->getParent()->getDataLayout();
1681 
1682  // Calculate the vector of arguments to pass into the function cloner, which
1683  // matches up the formal to the actual argument values.
1685  unsigned ArgNo = 0;
1686  for (Function::arg_iterator I = CalledFunc->arg_begin(),
1687  E = CalledFunc->arg_end(); I != E; ++I, ++AI, ++ArgNo) {
1688  Value *ActualArg = *AI;
1689 
1690  // When byval arguments actually inlined, we need to make the copy implied
1691  // by them explicit. However, we don't do this if the callee is readonly
1692  // or readnone, because the copy would be unneeded: the callee doesn't
1693  // modify the struct.
1694  if (CS.isByValArgument(ArgNo)) {
1695  ActualArg = HandleByValArgument(ActualArg, TheCall, CalledFunc, IFI,
1696  CalledFunc->getParamAlignment(ArgNo));
1697  if (ActualArg != *AI)
1698  ByValInit.push_back(std::make_pair(ActualArg, (Value*) *AI));
1699  }
1700 
1701  VMap[&*I] = ActualArg;
1702  }
1703 
1704  // Add alignment assumptions if necessary. We do this before the inlined
1705  // instructions are actually cloned into the caller so that we can easily
1706  // check what will be known at the start of the inlined code.
1707  AddAlignmentAssumptions(CS, IFI);
1708 
1709  // We want the inliner to prune the code as it copies. We would LOVE to
1710  // have no dead or constant instructions leftover after inlining occurs
1711  // (which can happen, e.g., because an argument was constant), but we'll be
1712  // happy with whatever the cloner can do.
1713  CloneAndPruneFunctionInto(Caller, CalledFunc, VMap,
1714  /*ModuleLevelChanges=*/false, Returns, ".i",
1715  &InlinedFunctionInfo, TheCall);
1716  // Remember the first block that is newly cloned over.
1717  FirstNewBlock = LastBlock; ++FirstNewBlock;
1718 
1719  if (IFI.CallerBFI != nullptr && IFI.CalleeBFI != nullptr)
1720  // Update the BFI of blocks cloned into the caller.
1721  updateCallerBFI(OrigBB, VMap, IFI.CallerBFI, IFI.CalleeBFI,
1722  CalledFunc->front());
1723 
1724  updateCallProfile(CalledFunc, VMap, CalledFunc->getEntryCount(), TheCall,
1725  IFI.PSI, IFI.CallerBFI);
1726 
1727  // Inject byval arguments initialization.
1728  for (std::pair<Value*, Value*> &Init : ByValInit)
1729  HandleByValArgumentInit(Init.first, Init.second, Caller->getParent(),
1730  &*FirstNewBlock, IFI);
1731 
1732  Optional<OperandBundleUse> ParentDeopt =
1734  if (ParentDeopt) {
1736 
1737  for (auto &VH : InlinedFunctionInfo.OperandBundleCallSites) {
1738  Instruction *I = dyn_cast_or_null<Instruction>(VH);
1739  if (!I) continue; // instruction was DCE'd or RAUW'ed to undef
1740 
1741  OpDefs.clear();
1742 
1743  CallSite ICS(I);
1744  OpDefs.reserve(ICS.getNumOperandBundles());
1745 
1746  for (unsigned i = 0, e = ICS.getNumOperandBundles(); i < e; ++i) {
1747  auto ChildOB = ICS.getOperandBundleAt(i);
1748  if (ChildOB.getTagID() != LLVMContext::OB_deopt) {
1749  // If the inlined call has other operand bundles, let them be
1750  OpDefs.emplace_back(ChildOB);
1751  continue;
1752  }
1753 
1754  // It may be useful to separate this logic (of handling operand
1755  // bundles) out to a separate "policy" component if this gets crowded.
1756  // Prepend the parent's deoptimization continuation to the newly
1757  // inlined call's deoptimization continuation.
1758  std::vector<Value *> MergedDeoptArgs;
1759  MergedDeoptArgs.reserve(ParentDeopt->Inputs.size() +
1760  ChildOB.Inputs.size());
1761 
1762  MergedDeoptArgs.insert(MergedDeoptArgs.end(),
1763  ParentDeopt->Inputs.begin(),
1764  ParentDeopt->Inputs.end());
1765  MergedDeoptArgs.insert(MergedDeoptArgs.end(), ChildOB.Inputs.begin(),
1766  ChildOB.Inputs.end());
1767 
1768  OpDefs.emplace_back("deopt", std::move(MergedDeoptArgs));
1769  }
1770 
1771  Instruction *NewI = nullptr;
1772  if (isa<CallInst>(I))
1773  NewI = CallInst::Create(cast<CallInst>(I), OpDefs, I);
1774  else if (isa<CallBrInst>(I))
1775  NewI = CallBrInst::Create(cast<CallBrInst>(I), OpDefs, I);
1776  else
1777  NewI = InvokeInst::Create(cast<InvokeInst>(I), OpDefs, I);
1778 
1779  // Note: the RAUW does the appropriate fixup in VMap, so we need to do
1780  // this even if the call returns void.
1781  I->replaceAllUsesWith(NewI);
1782 
1783  VH = nullptr;
1784  I->eraseFromParent();
1785  }
1786  }
1787 
1788  // Update the callgraph if requested.
1789  if (IFI.CG)
1790  UpdateCallGraphAfterInlining(CS, FirstNewBlock, VMap, IFI);
1791 
1792  // For 'nodebug' functions, the associated DISubprogram is always null.
1793  // Conservatively avoid propagating the callsite debug location to
1794  // instructions inlined from a function whose DISubprogram is not null.
1795  fixupLineNumbers(Caller, FirstNewBlock, TheCall,
1796  CalledFunc->getSubprogram() != nullptr);
1797 
1798  // Clone existing noalias metadata if necessary.
1799  CloneAliasScopeMetadata(CS, VMap);
1800 
1801  // Add noalias metadata if necessary.
1802  AddAliasScopeMetadata(CS, VMap, DL, CalleeAAR);
1803 
1804  // Propagate llvm.mem.parallel_loop_access if necessary.
1806 
1807  // Register any cloned assumptions.
1808  if (IFI.GetAssumptionCache)
1809  for (BasicBlock &NewBlock :
1810  make_range(FirstNewBlock->getIterator(), Caller->end()))
1811  for (Instruction &I : NewBlock) {
1812  if (auto *II = dyn_cast<IntrinsicInst>(&I))
1813  if (II->getIntrinsicID() == Intrinsic::assume)
1814  (*IFI.GetAssumptionCache)(*Caller).registerAssumption(II);
1815  }
1816  }
1817 
1818  // If there are any alloca instructions in the block that used to be the entry
1819  // block for the callee, move them to the entry block of the caller. First
1820  // calculate which instruction they should be inserted before. We insert the
1821  // instructions at the end of the current alloca list.
1822  {
1823  BasicBlock::iterator InsertPoint = Caller->begin()->begin();
1824  for (BasicBlock::iterator I = FirstNewBlock->begin(),
1825  E = FirstNewBlock->end(); I != E; ) {
1826  AllocaInst *AI = dyn_cast<AllocaInst>(I++);
1827  if (!AI) continue;
1828 
1829  // If the alloca is now dead, remove it. This often occurs due to code
1830  // specialization.
1831  if (AI->use_empty()) {
1832  AI->eraseFromParent();
1833  continue;
1834  }
1835 
1836  if (!allocaWouldBeStaticInEntry(AI))
1837  continue;
1838 
1839  // Keep track of the static allocas that we inline into the caller.
1840  IFI.StaticAllocas.push_back(AI);
1841 
1842  // Scan for the block of allocas that we can move over, and move them
1843  // all at once.
1844  while (isa<AllocaInst>(I) &&
1845  allocaWouldBeStaticInEntry(cast<AllocaInst>(I))) {
1846  IFI.StaticAllocas.push_back(cast<AllocaInst>(I));
1847  ++I;
1848  }
1849 
1850  // Transfer all of the allocas over in a block. Using splice means
1851  // that the instructions aren't removed from the symbol table, then
1852  // reinserted.
1853  Caller->getEntryBlock().getInstList().splice(
1854  InsertPoint, FirstNewBlock->getInstList(), AI->getIterator(), I);
1855  }
1856  // Move any dbg.declares describing the allocas into the entry basic block.
1857  DIBuilder DIB(*Caller->getParent());
1858  for (auto &AI : IFI.StaticAllocas)
1860  }
1861 
1862  SmallVector<Value*,4> VarArgsToForward;
1863  SmallVector<AttributeSet, 4> VarArgsAttrs;
1864  for (unsigned i = CalledFunc->getFunctionType()->getNumParams();
1865  i < CS.getNumArgOperands(); i++) {
1866  VarArgsToForward.push_back(CS.getArgOperand(i));
1867  VarArgsAttrs.push_back(CS.getAttributes().getParamAttributes(i));
1868  }
1869 
1870  bool InlinedMustTailCalls = false, InlinedDeoptimizeCalls = false;
1871  if (InlinedFunctionInfo.ContainsCalls) {
1872  CallInst::TailCallKind CallSiteTailKind = CallInst::TCK_None;
1873  if (CallInst *CI = dyn_cast<CallInst>(TheCall))
1874  CallSiteTailKind = CI->getTailCallKind();
1875 
1876  // For inlining purposes, the "notail" marker is the same as no marker.
1877  if (CallSiteTailKind == CallInst::TCK_NoTail)
1878  CallSiteTailKind = CallInst::TCK_None;
1879 
1880  for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E;
1881  ++BB) {
1882  for (auto II = BB->begin(); II != BB->end();) {
1883  Instruction &I = *II++;
1884  CallInst *CI = dyn_cast<CallInst>(&I);
1885  if (!CI)
1886  continue;
1887 
1888  // Forward varargs from inlined call site to calls to the
1889  // ForwardVarArgsTo function, if requested, and to musttail calls.
1890  if (!VarArgsToForward.empty() &&
1891  ((ForwardVarArgsTo &&
1892  CI->getCalledFunction() == ForwardVarArgsTo) ||
1893  CI->isMustTailCall())) {
1894  // Collect attributes for non-vararg parameters.
1897  if (!Attrs.isEmpty() || !VarArgsAttrs.empty()) {
1898  for (unsigned ArgNo = 0;
1899  ArgNo < CI->getFunctionType()->getNumParams(); ++ArgNo)
1900  ArgAttrs.push_back(Attrs.getParamAttributes(ArgNo));
1901  }
1902 
1903  // Add VarArg attributes.
1904  ArgAttrs.append(VarArgsAttrs.begin(), VarArgsAttrs.end());
1905  Attrs = AttributeList::get(CI->getContext(), Attrs.getFnAttributes(),
1906  Attrs.getRetAttributes(), ArgAttrs);
1907  // Add VarArgs to existing parameters.
1908  SmallVector<Value *, 6> Params(CI->arg_operands());
1909  Params.append(VarArgsToForward.begin(), VarArgsToForward.end());
1910  CallInst *NewCI = CallInst::Create(
1911  CI->getFunctionType(), CI->getCalledOperand(), Params, "", CI);
1912  NewCI->setDebugLoc(CI->getDebugLoc());
1913  NewCI->setAttributes(Attrs);
1914  NewCI->setCallingConv(CI->getCallingConv());
1915  CI->replaceAllUsesWith(NewCI);
1916  CI->eraseFromParent();
1917  CI = NewCI;
1918  }
1919 
1920  if (Function *F = CI->getCalledFunction())
1921  InlinedDeoptimizeCalls |=
1922  F->getIntrinsicID() == Intrinsic::experimental_deoptimize;
1923 
1924  // We need to reduce the strength of any inlined tail calls. For
1925  // musttail, we have to avoid introducing potential unbounded stack
1926  // growth. For example, if functions 'f' and 'g' are mutually recursive
1927  // with musttail, we can inline 'g' into 'f' so long as we preserve
1928  // musttail on the cloned call to 'f'. If either the inlined call site
1929  // or the cloned call site is *not* musttail, the program already has
1930  // one frame of stack growth, so it's safe to remove musttail. Here is
1931  // a table of example transformations:
1932  //
1933  // f -> musttail g -> musttail f ==> f -> musttail f
1934  // f -> musttail g -> tail f ==> f -> tail f
1935  // f -> g -> musttail f ==> f -> f
1936  // f -> g -> tail f ==> f -> f
1937  //
1938  // Inlined notail calls should remain notail calls.
1939  CallInst::TailCallKind ChildTCK = CI->getTailCallKind();
1940  if (ChildTCK != CallInst::TCK_NoTail)
1941  ChildTCK = std::min(CallSiteTailKind, ChildTCK);
1942  CI->setTailCallKind(ChildTCK);
1943  InlinedMustTailCalls |= CI->isMustTailCall();
1944 
1945  // Calls inlined through a 'nounwind' call site should be marked
1946  // 'nounwind'.
1947  if (MarkNoUnwind)
1948  CI->setDoesNotThrow();
1949  }
1950  }
1951  }
1952 
1953  // Leave lifetime markers for the static alloca's, scoping them to the
1954  // function we just inlined.
1955  if (InsertLifetime && !IFI.StaticAllocas.empty()) {
1956  IRBuilder<> builder(&FirstNewBlock->front());
1957  for (unsigned ai = 0, ae = IFI.StaticAllocas.size(); ai != ae; ++ai) {
1958  AllocaInst *AI = IFI.StaticAllocas[ai];
1959  // Don't mark swifterror allocas. They can't have bitcast uses.
1960  if (AI->isSwiftError())
1961  continue;
1962 
1963  // If the alloca is already scoped to something smaller than the whole
1964  // function then there's no need to add redundant, less accurate markers.
1965  if (hasLifetimeMarkers(AI))
1966  continue;
1967 
1968  // Try to determine the size of the allocation.
1969  ConstantInt *AllocaSize = nullptr;
1970  if (ConstantInt *AIArraySize =
1971  dyn_cast<ConstantInt>(AI->getArraySize())) {
1972  auto &DL = Caller->getParent()->getDataLayout();
1973  Type *AllocaType = AI->getAllocatedType();
1974  uint64_t AllocaTypeSize = DL.getTypeAllocSize(AllocaType);
1975  uint64_t AllocaArraySize = AIArraySize->getLimitedValue();
1976 
1977  // Don't add markers for zero-sized allocas.
1978  if (AllocaArraySize == 0)
1979  continue;
1980 
1981  // Check that array size doesn't saturate uint64_t and doesn't
1982  // overflow when it's multiplied by type size.
1983  if (AllocaArraySize != std::numeric_limits<uint64_t>::max() &&
1984  std::numeric_limits<uint64_t>::max() / AllocaArraySize >=
1985  AllocaTypeSize) {
1986  AllocaSize = ConstantInt::get(Type::getInt64Ty(AI->getContext()),
1987  AllocaArraySize * AllocaTypeSize);
1988  }
1989  }
1990 
1991  builder.CreateLifetimeStart(AI, AllocaSize);
1992  for (ReturnInst *RI : Returns) {
1993  // Don't insert llvm.lifetime.end calls between a musttail or deoptimize
1994  // call and a return. The return kills all local allocas.
1995  if (InlinedMustTailCalls &&
1997  continue;
1998  if (InlinedDeoptimizeCalls &&
2000  continue;
2001  IRBuilder<>(RI).CreateLifetimeEnd(AI, AllocaSize);
2002  }
2003  }
2004  }
2005 
2006  // If the inlined code contained dynamic alloca instructions, wrap the inlined
2007  // code with llvm.stacksave/llvm.stackrestore intrinsics.
2008  if (InlinedFunctionInfo.ContainsDynamicAllocas) {
2009  Module *M = Caller->getParent();
2010  // Get the two intrinsics we care about.
2011  Function *StackSave = Intrinsic::getDeclaration(M, Intrinsic::stacksave);
2012  Function *StackRestore=Intrinsic::getDeclaration(M,Intrinsic::stackrestore);
2013 
2014  // Insert the llvm.stacksave.
2015  CallInst *SavedPtr = IRBuilder<>(&*FirstNewBlock, FirstNewBlock->begin())
2016  .CreateCall(StackSave, {}, "savedstack");
2017 
2018  // Insert a call to llvm.stackrestore before any return instructions in the
2019  // inlined function.
2020  for (ReturnInst *RI : Returns) {
2021  // Don't insert llvm.stackrestore calls between a musttail or deoptimize
2022  // call and a return. The return will restore the stack pointer.
2023  if (InlinedMustTailCalls && RI->getParent()->getTerminatingMustTailCall())
2024  continue;
2025  if (InlinedDeoptimizeCalls && RI->getParent()->getTerminatingDeoptimizeCall())
2026  continue;
2027  IRBuilder<>(RI).CreateCall(StackRestore, SavedPtr);
2028  }
2029  }
2030 
2031  // If we are inlining for an invoke instruction, we must make sure to rewrite
2032  // any call instructions into invoke instructions. This is sensitive to which
2033  // funclet pads were top-level in the inlinee, so must be done before
2034  // rewriting the "parent pad" links.
2035  if (auto *II = dyn_cast<InvokeInst>(TheCall)) {
2036  BasicBlock *UnwindDest = II->getUnwindDest();
2037  Instruction *FirstNonPHI = UnwindDest->getFirstNonPHI();
2038  if (isa<LandingPadInst>(FirstNonPHI)) {
2039  HandleInlinedLandingPad(II, &*FirstNewBlock, InlinedFunctionInfo);
2040  } else {
2041  HandleInlinedEHPad(II, &*FirstNewBlock, InlinedFunctionInfo);
2042  }
2043  }
2044 
2045  // Update the lexical scopes of the new funclets and callsites.
2046  // Anything that had 'none' as its parent is now nested inside the callsite's
2047  // EHPad.
2048 
2049  if (CallSiteEHPad) {
2050  for (Function::iterator BB = FirstNewBlock->getIterator(),
2051  E = Caller->end();
2052  BB != E; ++BB) {
2053  // Add bundle operands to any top-level call sites.
2055  for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E;) {
2056  Instruction *I = &*BBI++;
2057  CallSite CS(I);
2058  if (!CS)
2059  continue;
2060 
2061  // Skip call sites which are nounwind intrinsics.
2062  auto *CalledFn =
2064  if (CalledFn && CalledFn->isIntrinsic() && CS.doesNotThrow())
2065  continue;
2066 
2067  // Skip call sites which already have a "funclet" bundle.
2069  continue;
2070 
2071  CS.getOperandBundlesAsDefs(OpBundles);
2072  OpBundles.emplace_back("funclet", CallSiteEHPad);
2073 
2074  Instruction *NewInst;
2075  if (CS.isCall())
2076  NewInst = CallInst::Create(cast<CallInst>(I), OpBundles, I);
2077  else if (CS.isCallBr())
2078  NewInst = CallBrInst::Create(cast<CallBrInst>(I), OpBundles, I);
2079  else
2080  NewInst = InvokeInst::Create(cast<InvokeInst>(I), OpBundles, I);
2081  NewInst->takeName(I);
2082  I->replaceAllUsesWith(NewInst);
2083  I->eraseFromParent();
2084 
2085  OpBundles.clear();
2086  }
2087 
2088  // It is problematic if the inlinee has a cleanupret which unwinds to
2089  // caller and we inline it into a call site which doesn't unwind but into
2090  // an EH pad that does. Such an edge must be dynamically unreachable.
2091  // As such, we replace the cleanupret with unreachable.
2092  if (auto *CleanupRet = dyn_cast<CleanupReturnInst>(BB->getTerminator()))
2093  if (CleanupRet->unwindsToCaller() && EHPadForCallUnwindsLocally)
2094  changeToUnreachable(CleanupRet, /*UseLLVMTrap=*/false);
2095 
2096  Instruction *I = BB->getFirstNonPHI();
2097  if (!I->isEHPad())
2098  continue;
2099 
2100  if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(I)) {
2101  if (isa<ConstantTokenNone>(CatchSwitch->getParentPad()))
2102  CatchSwitch->setParentPad(CallSiteEHPad);
2103  } else {
2104  auto *FPI = cast<FuncletPadInst>(I);
2105  if (isa<ConstantTokenNone>(FPI->getParentPad()))
2106  FPI->setParentPad(CallSiteEHPad);
2107  }
2108  }
2109  }
2110 
2111  if (InlinedDeoptimizeCalls) {
2112  // We need to at least remove the deoptimizing returns from the Return set,
2113  // so that the control flow from those returns does not get merged into the
2114  // caller (but terminate it instead). If the caller's return type does not
2115  // match the callee's return type, we also need to change the return type of
2116  // the intrinsic.
2117  if (Caller->getReturnType() == TheCall->getType()) {
2118  auto NewEnd = llvm::remove_if(Returns, [](ReturnInst *RI) {
2119  return RI->getParent()->getTerminatingDeoptimizeCall() != nullptr;
2120  });
2121  Returns.erase(NewEnd, Returns.end());
2122  } else {
2123  SmallVector<ReturnInst *, 8> NormalReturns;
2124  Function *NewDeoptIntrinsic = Intrinsic::getDeclaration(
2125  Caller->getParent(), Intrinsic::experimental_deoptimize,
2126  {Caller->getReturnType()});
2127 
2128  for (ReturnInst *RI : Returns) {
2129  CallInst *DeoptCall = RI->getParent()->getTerminatingDeoptimizeCall();
2130  if (!DeoptCall) {
2131  NormalReturns.push_back(RI);
2132  continue;
2133  }
2134 
2135  // The calling convention on the deoptimize call itself may be bogus,
2136  // since the code we're inlining may have undefined behavior (and may
2137  // never actually execute at runtime); but all
2138  // @llvm.experimental.deoptimize declarations have to have the same
2139  // calling convention in a well-formed module.
2140  auto CallingConv = DeoptCall->getCalledFunction()->getCallingConv();
2141  NewDeoptIntrinsic->setCallingConv(CallingConv);
2142  auto *CurBB = RI->getParent();
2143  RI->eraseFromParent();
2144 
2145  SmallVector<Value *, 4> CallArgs(DeoptCall->arg_begin(),
2146  DeoptCall->arg_end());
2147 
2149  DeoptCall->getOperandBundlesAsDefs(OpBundles);
2150  DeoptCall->eraseFromParent();
2151  assert(!OpBundles.empty() &&
2152  "Expected at least the deopt operand bundle");
2153 
2154  IRBuilder<> Builder(CurBB);
2155  CallInst *NewDeoptCall =
2156  Builder.CreateCall(NewDeoptIntrinsic, CallArgs, OpBundles);
2157  NewDeoptCall->setCallingConv(CallingConv);
2158  if (NewDeoptCall->getType()->isVoidTy())
2159  Builder.CreateRetVoid();
2160  else
2161  Builder.CreateRet(NewDeoptCall);
2162  }
2163 
2164  // Leave behind the normal returns so we can merge control flow.
2165  std::swap(Returns, NormalReturns);
2166  }
2167  }
2168 
2169  // Handle any inlined musttail call sites. In order for a new call site to be
2170  // musttail, the source of the clone and the inlined call site must have been
2171  // musttail. Therefore it's safe to return without merging control into the
2172  // phi below.
2173  if (InlinedMustTailCalls) {
2174  // Check if we need to bitcast the result of any musttail calls.
2175  Type *NewRetTy = Caller->getReturnType();
2176  bool NeedBitCast = !TheCall->use_empty() && TheCall->getType() != NewRetTy;
2177 
2178  // Handle the returns preceded by musttail calls separately.
2179  SmallVector<ReturnInst *, 8> NormalReturns;
2180  for (ReturnInst *RI : Returns) {
2181  CallInst *ReturnedMustTail =
2183  if (!ReturnedMustTail) {
2184  NormalReturns.push_back(RI);
2185  continue;
2186  }
2187  if (!NeedBitCast)
2188  continue;
2189 
2190  // Delete the old return and any preceding bitcast.
2191  BasicBlock *CurBB = RI->getParent();
2192  auto *OldCast = dyn_cast_or_null<BitCastInst>(RI->getReturnValue());
2193  RI->eraseFromParent();
2194  if (OldCast)
2195  OldCast->eraseFromParent();
2196 
2197  // Insert a new bitcast and return with the right type.
2198  IRBuilder<> Builder(CurBB);
2199  Builder.CreateRet(Builder.CreateBitCast(ReturnedMustTail, NewRetTy));
2200  }
2201 
2202  // Leave behind the normal returns so we can merge control flow.
2203  std::swap(Returns, NormalReturns);
2204  }
2205 
2206  // Now that all of the transforms on the inlined code have taken place but
2207  // before we splice the inlined code into the CFG and lose track of which
2208  // blocks were actually inlined, collect the call sites. We only do this if
2209  // call graph updates weren't requested, as those provide value handle based
2210  // tracking of inlined call sites instead.
2211  if (InlinedFunctionInfo.ContainsCalls && !IFI.CG) {
2212  // Otherwise just collect the raw call sites that were inlined.
2213  for (BasicBlock &NewBB :
2214  make_range(FirstNewBlock->getIterator(), Caller->end()))
2215  for (Instruction &I : NewBB)
2216  if (auto CS = CallSite(&I))
2217  IFI.InlinedCallSites.push_back(CS);
2218  }
2219 
2220  // If we cloned in _exactly one_ basic block, and if that block ends in a
2221  // return instruction, we splice the body of the inlined callee directly into
2222  // the calling basic block.
2223  if (Returns.size() == 1 && std::distance(FirstNewBlock, Caller->end()) == 1) {
2224  // Move all of the instructions right before the call.
2225  OrigBB->getInstList().splice(TheCall->getIterator(),
2226  FirstNewBlock->getInstList(),
2227  FirstNewBlock->begin(), FirstNewBlock->end());
2228  // Remove the cloned basic block.
2229  Caller->getBasicBlockList().pop_back();
2230 
2231  // If the call site was an invoke instruction, add a branch to the normal
2232  // destination.
2233  if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) {
2234  BranchInst *NewBr = BranchInst::Create(II->getNormalDest(), TheCall);
2235  NewBr->setDebugLoc(Returns[0]->getDebugLoc());
2236  }
2237 
2238  // If the return instruction returned a value, replace uses of the call with
2239  // uses of the returned value.
2240  if (!TheCall->use_empty()) {
2241  ReturnInst *R = Returns[0];
2242  if (TheCall == R->getReturnValue())
2243  TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType()));
2244  else
2245  TheCall->replaceAllUsesWith(R->getReturnValue());
2246  }
2247  // Since we are now done with the Call/Invoke, we can delete it.
2248  TheCall->eraseFromParent();
2249 
2250  // Since we are now done with the return instruction, delete it also.
2251  Returns[0]->eraseFromParent();
2252 
2253  // We are now done with the inlining.
2254  return true;
2255  }
2256 
2257  // Otherwise, we have the normal case, of more than one block to inline or
2258  // multiple return sites.
2259 
2260  // We want to clone the entire callee function into the hole between the
2261  // "starter" and "ender" blocks. How we accomplish this depends on whether
2262  // this is an invoke instruction or a call instruction.
2263  BasicBlock *AfterCallBB;
2264  BranchInst *CreatedBranchToNormalDest = nullptr;
2265  if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) {
2266 
2267  // Add an unconditional branch to make this look like the CallInst case...
2268  CreatedBranchToNormalDest = BranchInst::Create(II->getNormalDest(), TheCall);
2269 
2270  // Split the basic block. This guarantees that no PHI nodes will have to be
2271  // updated due to new incoming edges, and make the invoke case more
2272  // symmetric to the call case.
2273  AfterCallBB =
2274  OrigBB->splitBasicBlock(CreatedBranchToNormalDest->getIterator(),
2275  CalledFunc->getName() + ".exit");
2276 
2277  } else { // It's a call
2278  // If this is a call instruction, we need to split the basic block that
2279  // the call lives in.
2280  //
2281  AfterCallBB = OrigBB->splitBasicBlock(TheCall->getIterator(),
2282  CalledFunc->getName() + ".exit");
2283  }
2284 
2285  if (IFI.CallerBFI) {
2286  // Copy original BB's block frequency to AfterCallBB
2287  IFI.CallerBFI->setBlockFreq(
2288  AfterCallBB, IFI.CallerBFI->getBlockFreq(OrigBB).getFrequency());
2289  }
2290 
2291  // Change the branch that used to go to AfterCallBB to branch to the first
2292  // basic block of the inlined function.
2293  //
2294  Instruction *Br = OrigBB->getTerminator();
2295  assert(Br && Br->getOpcode() == Instruction::Br &&
2296  "splitBasicBlock broken!");
2297  Br->setOperand(0, &*FirstNewBlock);
2298 
2299  // Now that the function is correct, make it a little bit nicer. In
2300  // particular, move the basic blocks inserted from the end of the function
2301  // into the space made by splitting the source basic block.
2302  Caller->getBasicBlockList().splice(AfterCallBB->getIterator(),
2303  Caller->getBasicBlockList(), FirstNewBlock,
2304  Caller->end());
2305 
2306  // Handle all of the return instructions that we just cloned in, and eliminate
2307  // any users of the original call/invoke instruction.
2308  Type *RTy = CalledFunc->getReturnType();
2309 
2310  PHINode *PHI = nullptr;
2311  if (Returns.size() > 1) {
2312  // The PHI node should go at the front of the new basic block to merge all
2313  // possible incoming values.
2314  if (!TheCall->use_empty()) {
2315  PHI = PHINode::Create(RTy, Returns.size(), TheCall->getName(),
2316  &AfterCallBB->front());
2317  // Anything that used the result of the function call should now use the
2318  // PHI node as their operand.
2319  TheCall->replaceAllUsesWith(PHI);
2320  }
2321 
2322  // Loop over all of the return instructions adding entries to the PHI node
2323  // as appropriate.
2324  if (PHI) {
2325  for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
2326  ReturnInst *RI = Returns[i];
2327  assert(RI->getReturnValue()->getType() == PHI->getType() &&
2328  "Ret value not consistent in function!");
2329  PHI->addIncoming(RI->getReturnValue(), RI->getParent());
2330  }
2331  }
2332 
2333  // Add a branch to the merge points and remove return instructions.
2334  DebugLoc Loc;
2335  for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
2336  ReturnInst *RI = Returns[i];
2337  BranchInst* BI = BranchInst::Create(AfterCallBB, RI);
2338  Loc = RI->getDebugLoc();
2339  BI->setDebugLoc(Loc);
2340  RI->eraseFromParent();
2341  }
2342  // We need to set the debug location to *somewhere* inside the
2343  // inlined function. The line number may be nonsensical, but the
2344  // instruction will at least be associated with the right
2345  // function.
2346  if (CreatedBranchToNormalDest)
2347  CreatedBranchToNormalDest->setDebugLoc(Loc);
2348  } else if (!Returns.empty()) {
2349  // Otherwise, if there is exactly one return value, just replace anything
2350  // using the return value of the call with the computed value.
2351  if (!TheCall->use_empty()) {
2352  if (TheCall == Returns[0]->getReturnValue())
2353  TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType()));
2354  else
2355  TheCall->replaceAllUsesWith(Returns[0]->getReturnValue());
2356  }
2357 
2358  // Update PHI nodes that use the ReturnBB to use the AfterCallBB.
2359  BasicBlock *ReturnBB = Returns[0]->getParent();
2360  ReturnBB->replaceAllUsesWith(AfterCallBB);
2361 
2362  // Splice the code from the return block into the block that it will return
2363  // to, which contains the code that was after the call.
2364  AfterCallBB->getInstList().splice(AfterCallBB->begin(),
2365  ReturnBB->getInstList());
2366 
2367  if (CreatedBranchToNormalDest)
2368  CreatedBranchToNormalDest->setDebugLoc(Returns[0]->getDebugLoc());
2369 
2370  // Delete the return instruction now and empty ReturnBB now.
2371  Returns[0]->eraseFromParent();
2372  ReturnBB->eraseFromParent();
2373  } else if (!TheCall->use_empty()) {
2374  // No returns, but something is using the return value of the call. Just
2375  // nuke the result.
2376  TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType()));
2377  }
2378 
2379  // Since we are now done with the Call/Invoke, we can delete it.
2380  TheCall->eraseFromParent();
2381 
2382  // If we inlined any musttail calls and the original return is now
2383  // unreachable, delete it. It can only contain a bitcast and ret.
2384  if (InlinedMustTailCalls && pred_begin(AfterCallBB) == pred_end(AfterCallBB))
2385  AfterCallBB->eraseFromParent();
2386 
2387  // We should always be able to fold the entry block of the function into the
2388  // single predecessor of the block...
2389  assert(cast<BranchInst>(Br)->isUnconditional() && "splitBasicBlock broken!");
2390  BasicBlock *CalleeEntry = cast<BranchInst>(Br)->getSuccessor(0);
2391 
2392  // Splice the code entry block into calling block, right before the
2393  // unconditional branch.
2394  CalleeEntry->replaceAllUsesWith(OrigBB); // Update PHI nodes
2395  OrigBB->getInstList().splice(Br->getIterator(), CalleeEntry->getInstList());
2396 
2397  // Remove the unconditional branch.
2398  OrigBB->getInstList().erase(Br);
2399 
2400  // Now we can remove the CalleeEntry block, which is now empty.
2401  Caller->getBasicBlockList().erase(CalleeEntry);
2402 
2403  // If we inserted a phi node, check to see if it has a single value (e.g. all
2404  // the entries are the same or undef). If so, remove the PHI so it doesn't
2405  // block other optimizations.
2406  if (PHI) {
2407  AssumptionCache *AC =
2408  IFI.GetAssumptionCache ? &(*IFI.GetAssumptionCache)(*Caller) : nullptr;
2409  auto &DL = Caller->getParent()->getDataLayout();
2410  if (Value *V = SimplifyInstruction(PHI, {DL, nullptr, nullptr, AC})) {
2411  PHI->replaceAllUsesWith(V);
2412  PHI->eraseFromParent();
2413  }
2414  }
2415 
2416  return true;
2417 }
bool onlyReadsMemory() const
Determine if the function does not access or only reads memory.
Definition: Function.h:486
static DebugLoc inlineDebugLoc(DebugLoc OrigDL, DILocation *InlinedAt, LLVMContext &Ctx, DenseMap< const MDNode *, MDNode *> &IANodes)
Returns a DebugLoc for a new DILocation which is a clone of OrigDL inlined at InlinedAt.
Return a value (possibly void), from a function.
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks &#39;this&#39; from the containing basic block and deletes it.
Definition: Instruction.cpp:67
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:641
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
DILocation * get() const
Get the underlying DILocation.
Definition: DebugLoc.cpp:21
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static cl::opt< bool > EnableNoAliasConversion("enable-noalias-to-md-conversion", cl::init(true), cl::Hidden, cl::desc("Convert noalias attributes to metadata during inlining."))
unsigned getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to ensure that the alignment of V is at least PrefAlign bytes.
Definition: Local.cpp:1181
static void updateCallProfile(Function *Callee, const ValueToValueMapTy &VMap, const ProfileCount &CalleeEntryCount, const Instruction *TheCall, ProfileSummaryInfo *PSI, BlockFrequencyInfo *CallerBFI)
Update the branch metadata for cloned call instructions.
This class represents an incoming formal argument to a Function.
Definition: Argument.h:29
iterator erase(iterator where)
Definition: ilist.h:265
IterTy arg_begin() const
Definition: CallSite.h:584
This class represents lattice values for constants.
Definition: AllocatorList.h:23
CallGraph * CG
If non-null, InlineFunction will update the callgraph to reflect the changes it makes.
Definition: Cloning.h:186
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:66
void replaceOperandWith(unsigned I, Metadata *New)
Replace a specific operand.
Definition: Metadata.cpp:858
iterator end()
Definition: Function.h:687
amdgpu Simplify well known AMD library false FunctionCallee Value const Twine & Name
An instruction that atomically checks whether a specified value is in a memory location, and, if it is, stores a new value there.
Definition: Instructions.h:536
static DebugLoc appendInlinedAt(DebugLoc DL, DILocation *InlinedAt, LLVMContext &Ctx, DenseMap< const MDNode *, MDNode *> &Cache, bool ReplaceLast=false)
Rebuild the entire inlined-at chain for this instruction so that the top of the chain now is inlined-...
Definition: DebugLoc.cpp:82
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
std::function< AssumptionCache &(Function &)> * GetAssumptionCache
Definition: Cloning.h:187
void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Notify the BasicBlock that the predecessor Pred is no longer able to reach it.
Definition: BasicBlock.cpp:308
void push_back(const T &Elt)
Definition: SmallVector.h:211
void recalculate(ParentType &Func)
recalculate - compute a dominator tree for the given function
This provides a very simple, boring adaptor for a begin and end iterator into a range type...
unsigned getParamAlignment(unsigned ArgNo) const
Extract the alignment for a call or parameter (0=unknown).
Definition: Function.h:438
Analysis providing profile information.
const CallInst * getTerminatingMustTailCall() const
Returns the call instruction marked &#39;musttail&#39; prior to the terminating return instruction of this ba...
Definition: BasicBlock.cpp:150
This class represents a function call, abstracting a target machine&#39;s calling convention.
void setGC(std::string Str)
Definition: Function.cpp:483
This file contains the declarations for metadata subclasses.
A cache of @llvm.assume calls within a function.
bool isSwiftError() const
Return true if this alloca is used as a swifterror argument to a call.
Definition: Instructions.h:137
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:743
static void AddAlignmentAssumptions(CallSite CS, InlineFunctionInfo &IFI)
If the inlined function has non-byval align arguments, then add .assume-based alignment assumptions t...
void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Definition: CallSite.h:595
unsigned getLine() const
Definition: DebugLoc.cpp:25
arg_iterator arg_end()
Definition: Function.h:709
A debug info location.
Definition: DebugLoc.h:33
Metadata node.
Definition: Metadata.h:863
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1100
F(f)
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
Definition: InstrTypes.h:1212
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1068
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:635
An instruction for reading from memory.
Definition: Instructions.h:169
FunTy * getCalledFunction() const
Return the function being called if this is a direct call, otherwise return null (if it&#39;s an indirect...
Definition: CallSite.h:111
Optional< OperandBundleUse > getOperandBundle(StringRef Name) const
Definition: CallSite.h:568
static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap, const DataLayout &DL, AAResults *CalleeAAR)
If the inlined function has noalias arguments, then add new alias scopes for each noalias argument...
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:181
an instruction that atomically reads a memory location, combines it with another value, and then stores the result back.
Definition: Instructions.h:699
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:144
Constant * getClause(unsigned Idx) const
Get the value of the clause at index Idx.
void reserve(size_type N)
Definition: SmallVector.h:369
bool isMustTailCall() const
This class captures the data input to the InlineFunction call, and records the auxiliary results prod...
Definition: Cloning.h:173
iterator end()
Get an iterator to the end of the SetVector.
Definition: SetVector.h:92
static CallBrInst * Create(FunctionType *Ty, Value *Func, BasicBlock *DefaultDest, ArrayRef< BasicBlock *> IndirectDests, ArrayRef< Value *> Args, const Twine &NameStr, Instruction *InsertBefore=nullptr)
A node in the call graph for a module.
Definition: CallGraph.h:164
Tuple of metadata.
Definition: Metadata.h:1105
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:130
static void fixupLineNumbers(Function *Fn, Function::iterator FI, Instruction *TheCall, bool CalleeHasDebugInfo)
Update inlined instructions&#39; line numbers to to encode location where these instructions are inlined...
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:343
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:273
Function::ProfileCount ProfileCount
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:195
constexpr double phi
Definition: MathExtras.h:71
unsigned getAllocaAddrSpace() const
Definition: DataLayout.h:273
const CallInst * getTerminatingDeoptimizeCall() const
Returns the call instruction calling @llvm.experimental.deoptimize prior to the terminating return in...
Definition: BasicBlock.cpp:181
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:369
void setEntryCount(ProfileCount Count, const DenseSet< GlobalValue::GUID > *Imports=nullptr)
Set the entry count for this function.
Definition: Function.cpp:1529
iterator end()
Definition: CallGraph.h:190
static void HandleInlinedLandingPad(InvokeInst *II, BasicBlock *FirstNewBlock, ClonedCodeInfo &InlinedCodeInfo)
If we inlined an invoke site, we need to convert calls in the body of the inlined function into invok...
PointerType * getType() const
Overload to return most specific pointer type.
Definition: Instructions.h:96
TailCallKind getTailCallKind() const
A Use represents the edge between a Value definition and its users.
Definition: Use.h:55
ValTy * getCalledValue() const
Return the pointer to function that is being called.
Definition: CallSite.h:104
uint32_t getTagID() const
Return the tag of this operand bundle as an integer.
Definition: InstrTypes.h:1028
static void updateCallerBFI(BasicBlock *CallSiteBlock, const ValueToValueMapTy &VMap, BlockFrequencyInfo *CallerBFI, BlockFrequencyInfo *CalleeBFI, const BasicBlock &CalleeEntryBlock)
Update the block frequencies of the caller after a callee has been inlined.
ReturnInst * CreateRet(Value *V)
Create a &#39;ret <val>&#39; instruction.
Definition: IRBuilder.h:865
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:779
AttributeSet getRetAttributes() const
The attributes for the ret value are returned.
static void CloneAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap)
When inlining a function that contains noalias scope metadata, this metadata needs to be cloned so th...
The only memory references in this function (if it has any) are non-volatile loads from objects point...
void setBlockFreqAndScale(const BasicBlock *ReferenceBB, uint64_t Freq, SmallPtrSetImpl< BasicBlock *> &BlocksToScale)
Set the frequency of ReferenceBB to Freq and scale the frequencies of the blocks in BlocksToScale suc...
unsigned getNumClauses() const
Get the number of clauses for this landing pad.
std::vector< CallRecord > CalledFunctionsVector
Definition: CallGraph.h:171
bool isScopedEHPersonality(EHPersonality Pers)
Returns true if this personality uses scope-style EH IR instructions: catchswitch, catchpad/ret, and cleanuppad/ret.
FunctionModRefBehavior getModRefBehavior(const CallBase *Call)
Return the behavior of the given call site.
void addHandler(BasicBlock *Dest)
Add an entry to the switch instruction...
InstrTy * getInstruction() const
Definition: CallSite.h:96
This file provides interfaces used to build and manipulate a call graph, which is a very useful tool ...
bool doesNotThrow() const
Determine if the call cannot unwind.
Definition: InstrTypes.h:1689
static void UpdateCallGraphAfterInlining(CallSite CS, Function::iterator FirstNewBlock, ValueToValueMapTy &VMap, InlineFunctionInfo &IFI)
Once we have cloned code over from a callee into the caller, update the specified callgraph to reflec...
void CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, ValueToValueMapTy &VMap, bool ModuleLevelChanges, SmallVectorImpl< ReturnInst *> &Returns, const char *NameSuffix="", ClonedCodeInfo *CodeInfo=nullptr, Instruction *TheCall=nullptr)
This works exactly like CloneFunctionInto, except that it does some simple constant prop and DCE on t...
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1963
static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB, ArrayRef< BasicBlock *> Preds, BranchInst *BI, bool HasLoopExit)
Update the PHI nodes in OrigBB to include the values coming from NewBB.
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:246
std::vector< WeakTrackingVH > OperandBundleCallSites
All cloned call sites that have operand bundles attached are appended to this vector.
Definition: Cloning.h:77
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
LandingPadInst * getLandingPadInst() const
Get the landingpad instruction from the landing pad block (the unwind destination).
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type...
Definition: DataLayout.h:454
Value * getCalledOperand() const
Definition: InstrTypes.h:1276
LLVMContext & getContext() const
Definition: Metadata.h:923
FunctionModRefBehavior
Summary of how a function affects memory in the program.
AttributeSet getParamAttributes(unsigned ArgNo) const
The attributes for the argument or parameter at the given index are returned.
bool isUsedWithInAlloca() const
Return true if this alloca is used as an inalloca argument to a call.
Definition: Instructions.h:126
iterator find(const KeyT &Val)
Definition: ValueMap.h:156
iterator begin()
Get an iterator to the beginning of the SetVector.
Definition: SetVector.h:82
BlockFrequencyInfo * CalleeBFI
Definition: Cloning.h:189
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:244
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:125
iterator_range< User::op_iterator > arg_operands()
Definition: InstrTypes.h:1233
const std::string & getGC() const
Definition: Function.cpp:478
An instruction for storing to memory.
Definition: Instructions.h:325
bool hasPersonalityFn() const
Check whether this function has a personality function.
Definition: Function.h:737
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:429
bool PointerMayBeCapturedBefore(const Value *V, bool ReturnCaptures, bool StoreCaptures, const Instruction *I, const DominatorTree *DT, bool IncludeI=false, OrderedBasicBlock *OBB=nullptr, unsigned MaxUsesToExplore=DefaultMaxUsesToExplore)
PointerMayBeCapturedBefore - Return true if this pointer value may be captured by the enclosing funct...
bool doesNotThrow() const
Determine if the call cannot unwind.
Definition: CallSite.h:518
Debug location.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:291
iterator begin()
Definition: Function.h:685
InlineResult is basically true or false.
Definition: InlineCost.h:135
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:144
Function * getDeclaration(Module *M, ID id, ArrayRef< Type *> Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1093
Value * getOperand(unsigned i) const
Definition: User.h:169
Class to represent pointers.
Definition: DerivedTypes.h:579
Optional< OperandBundleUse > getOperandBundle(StringRef Name) const
Return an operand bundle by name, if present.
Definition: InstrTypes.h:1814
static BasicBlock * HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB, BasicBlock *UnwindEdge, UnwindDestMemoTy *FuncletUnwindMap=nullptr)
When we inline a basic block into an invoke, we have to turn all of the calls that can throw into inv...
static bool isUsedByLifetimeMarker(Value *V)
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
static cl::opt< bool > PreserveAlignmentAssumptions("preserve-alignment-assumptions-during-inlining", cl::init(true), cl::Hidden, cl::desc("Convert align attributes to assumptions during inlining."))
bool isVoidTy() const
Return true if this is &#39;void&#39;.
Definition: Type.h:141
const BasicBlock & getEntryBlock() const
Definition: Function.h:669
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
static TempMDTuple getTemporary(LLVMContext &Context, ArrayRef< Metadata *> MDs)
Return a temporary node.
Definition: Metadata.h:1152
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata *> MDs)
Definition: Metadata.h:1165
void setCallingConv(CallingConv::ID CC)
Definition: Function.h:216
SmallVector< CallSite, 8 > InlinedCallSites
All of the new call sites inlined into the caller.
Definition: Cloning.h:204
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:168
The landingpad instruction holds all of the information necessary to generate correct exception handl...
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:196
void GetUnderlyingObjects(const Value *V, SmallVectorImpl< const Value *> &Objects, const DataLayout &DL, LoopInfo *LI=nullptr, unsigned MaxLookup=6)
This method is similar to GetUnderlyingObject except that it can look through phi and select instruct...
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:328
uint64_t getCount() const
Definition: Function.h:271
unsigned getNumArgOperands() const
Definition: CallSite.h:303
Value * getCalledValue() const
Definition: InstrTypes.h:1280
bool hasName() const
Definition: Value.h:252
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:64
DISubprogram * getSubprogram() const
Get the attached subprogram.
Definition: Metadata.cpp:1522
Conditional or Unconditional Branch instruction.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:41
ArrayRef< Use > Inputs
Definition: InstrTypes.h:1001
Resume the propagation of an exception.
static MDNode * inlineLoopID(const MDNode *OrigLoopId, DILocation *InlinedAt, LLVMContext &Ctx, DenseMap< const MDNode *, MDNode *> &IANodes)
Returns the LoopID for a loop which has has been cloned from another function for inlining with the n...
static MDTuple * getDistinct(LLVMContext &Context, ArrayRef< Metadata *> MDs)
Definition: Metadata.h:1173
Value * getIncomingValueForBlock(const BasicBlock *BB) const
This file contains the declarations for the subclasses of Constant, which represent the different fla...
const Instruction & front() const
Definition: BasicBlock.h:285
AttributeList getAttributes() const
Get the parameter attributes of the call.
Definition: CallSite.h:337
FunctionType * getFunctionType() const
Definition: InstrTypes.h:1144
ValTy * getArgument(unsigned ArgNo) const
Definition: CallSite.h:193
unsigned getNumParams() const
Return the number of fixed parameters this function type requires.
Definition: DerivedTypes.h:144
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
ProfileCount getEntryCount(bool AllowSynthetic=false) const
Get the entry count for this function.
Definition: Function.cpp:1547
Interval::pred_iterator pred_begin(Interval *I)
pred_begin/pred_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:112
unsigned getPrefTypeAlignment(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:765
EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
void splice(iterator where, iplist_impl &L2)
Definition: ilist.h:327
static Value * getUnwindDestTokenHelper(Instruction *EHPad, UnwindDestMemoTy &MemoMap)
Helper for getUnwindDestToken that does the descendant-ward part of the search.
ProfileSummaryInfo * PSI
Definition: Cloning.h:188
void removeCallEdgeFor(CallBase &Call)
Removes the edge in the node for the specified call site.
Definition: CallGraph.cpp:186
static bool hasLifetimeMarkers(AllocaInst *AI)
constexpr double e
Definition: MathExtras.h:57
void setCallingConv(CallingConv::ID CC)
Definition: InstrTypes.h:1348
The only memory references in this function (if it has any) are non-volatile loads and stores from ob...
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Interval::pred_iterator pred_end(Interval *I)
Definition: Interval.h:115
arg_iterator arg_begin()
Definition: Function.h:700
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:381
self_iterator getIterator()
Definition: ilist_node.h:81
void setTailCallKind(TailCallKind TCK)
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:59
auto remove_if(R &&Range, UnaryPredicate P) -> decltype(adl_begin(Range))
Provide wrappers to std::remove_if which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1205
MDNode * getScope() const
Definition: DebugLoc.cpp:35
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:205
InlineResult InlineFunction(CallBase *CB, InlineFunctionInfo &IFI, AAResults *CalleeAAR=nullptr, bool InsertLifetime=true)
This function inlines the called function into the basic block of the caller.
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1446
MDNode * uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2)
Compute the union of two access-group lists.
const Constant * stripPointerCasts() const
Definition: Constant.h:183
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:529
This class represents the va_arg llvm instruction, which returns an argument of the specified type gi...
iterator erase(const_iterator CI)
Definition: SmallVector.h:434
Class to represent profile counts.
Definition: Function.h:260
const Value * getArraySize() const
Get the number of elements allocated.
Definition: Instructions.h:92
size_t size() const
Definition: SmallVector.h:52
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:224
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Optional< uint64_t > getProfileCount(const Instruction *CallInst, BlockFrequencyInfo *BFI, bool AllowSynthetic=false)
Returns the profile count for CallInst.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1222
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:40
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
Definition: Instructions.h:105
bool isIdentifiedFunctionLocal(const Value *V)
Return true if V is umabigously identified at the function-level.
iterator end()
Definition: ValueMap.h:136
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
static void HandleByValArgumentInit(Value *Dst, Value *Src, Module *M, BasicBlock *InsertBlock, InlineFunctionInfo &IFI)
const InstListType & getInstList() const
Return the underlying instruction list container.
Definition: BasicBlock.h:338
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:297
Iterator for intrusive lists based on ilist_node.
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:390
static CatchSwitchInst * Create(Value *ParentPad, BasicBlock *UnwindDest, unsigned NumHandlers, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
iterator end()
Definition: BasicBlock.h:275
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:212
Module.h This file contains the declarations for the Module class.
unsigned getNumOperandBundles() const
Definition: CallSite.h:544
void setBlockFreq(const BasicBlock *BB, uint64_t Freq)
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:374
static Value * getParentPad(Value *EHPad)
Helper for getUnwindDestToken/getUnwindDestTokenHelper.
void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Return the list of operand bundles attached to this instruction as a vector of OperandBundleDefs.
Definition: InstrTypes.h:1849
static cl::opt< bool > NoAliases("riscv-no-aliases", cl::desc("Disable the emission of assembler pseudo instructions"), cl::init(false), cl::Hidden)
static Value * getUnwindDestToken(Instruction *EHPad, UnwindDestMemoTy &MemoMap)
Given an EH pad, find where it unwinds.
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:653
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
std::string utostr(uint64_t X, bool isNeg=false)
Definition: StringExtras.h:223
AttributeList getAttributes() const
Return the parameter attributes for this call.
Definition: InstrTypes.h:1366
BlockFrequency getBlockFreq(const BasicBlock *BB) const
getblockFreq - Return block frequency.
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:163
amdgpu Simplify well known AMD library false FunctionCallee Callee
bool isCleanup() const
Return &#39;true&#39; if this landingpad instruction is a cleanup.
iterator_range< user_iterator > users()
Definition: Value.h:420
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:387
bool ContainsCalls
This is set to true if the cloned code contains a normal call instruction.
Definition: Cloning.h:67
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Definition: InstrTypes.h:1206
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
SmallVector< AllocaInst *, 4 > StaticAllocas
InlineFunction fills this in with all static allocas that get copied into the caller.
Definition: Cloning.h:193
The basic data container for the call graph of a Module of IR.
Definition: CallGraph.h:73
bool hasGC() const
hasGC/getGC/setGC/clearGC - The name of the garbage collection algorithm to use during code generatio...
Definition: Function.h:354
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:331
void registerAssumption(CallInst *CI)
Add an @llvm.assume intrinsic to this function&#39;s cache.
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first found DebugLoc that has a DILocation, given a range of instructions.
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
SmallVector< WeakTrackingVH, 8 > InlinedCalls
InlineFunction fills this in with callsites that were inlined from the callee.
Definition: Cloning.h:197
static InvokeInst * Create(FunctionType *Ty, Value *Func, BasicBlock *IfNormal, BasicBlock *IfException, ArrayRef< Value *> Args, const Twine &NameStr, Instruction *InsertBefore=nullptr)
CallingConv::ID getCallingConv() const
Definition: InstrTypes.h:1344
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation.
Definition: InstrTypes.h:1287
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
static MDNode * concatenate(MDNode *A, MDNode *B)
Methods for metadata merging.
Definition: Metadata.cpp:895
static void PropagateParallelLoopAccessMetadata(CallSite CS, ValueToValueMapTy &VMap)
When inlining a call site that has !llvm.mem.parallel_loop_access or llvm.access.group metadata...
SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink &#39;this&#39; from the containing function and delete it.
Definition: BasicBlock.cpp:121
#define I(x, y, z)
Definition: MD5.cpp:58
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:72
static Value * HandleByValArgument(Value *Arg, Instruction *TheCall, const Function *CalledFunc, InlineFunctionInfo &IFI, unsigned ByValAlignment)
When inlining a call site that has a byval argument, we have to make the implicit memcpy explicit by ...
iterator end()
Definition: DenseMap.h:82
This struct can be used to capture information about code being cloned, while it is being cloned...
Definition: Cloning.h:65
void setAttributes(AttributeList A)
Set the parameter attributes for this call.
Definition: InstrTypes.h:1370
unsigned getKnownAlignment(Value *V, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to infer an alignment for the specified pointer.
Definition: Local.h:267
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
const BasicBlockListType & getBasicBlockList() const
Get the underlying elements of the Function...
Definition: Function.h:662
uint32_t Size
Definition: Profile.cpp:46
static ConstantTokenNone * get(LLVMContext &Context)
Return the ConstantTokenNone.
Definition: Constants.cpp:1161
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="")
Split the basic block into two basic blocks at the specified instruction.
Definition: BasicBlock.cpp:414
Value * getReturnValue() const
Convenience accessor. Returns null if there is no return value.
std::vector< CallRecord >::iterator iterator
Definition: CallGraph.h:183
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:145
void updateProfileCallee(Function *Callee, int64_t entryDelta, const ValueMap< const Value *, WeakTrackingVH > *VMap=nullptr)
Updates profile information by adjusting the entry count by adding entryDelta then scaling callsite i...
bool isCallBr() const
Return true if a CallBrInst is enclosed.
Definition: CallSite.h:94
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:231
unsigned getCol() const
Definition: DebugLoc.cpp:30
static bool allocaWouldBeStaticInEntry(const AllocaInst *AI)
Return the result of AI->isStaticAlloca() if AI were moved to the entry block.
BlockFrequencyInfo * CallerBFI
Definition: Cloning.h:189
bool isCall() const
Return true if a CallInst is enclosed.
Definition: CallSite.h:87
size_type count(const KeyT &Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: ValueMap.h:152
bool replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress, DIBuilder &Builder, uint8_t DIExprFlags, int Offset)
Replaces llvm.dbg.declare instruction when the alloca it describes is replaced with a new value...
Definition: Local.cpp:1559
bool isByValArgument(unsigned ArgNo) const
Determine whether this argument is passed by value.
Definition: CallSite.h:607
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool ContainsDynamicAllocas
This is set to true if the cloned code contains a &#39;dynamic&#39; alloca.
Definition: Cloning.h:72
const BasicBlock & front() const
Definition: Function.h:692
bool isAsynchronousEHPersonality(EHPersonality Pers)
Returns true if this personality function catches asynchronous exceptions.
BasicBlock * getUnwindDest() const
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:575
LLVM Value Representation.
Definition: Value.h:74
Constant * getPersonalityFn() const
Get the personality function associated with this function.
Definition: Function.cpp:1465
A vector that has set insertion semantics.
Definition: SetVector.h:40
static CleanupReturnInst * Create(Value *CleanupPad, BasicBlock *UnwindBB=nullptr, Instruction *InsertBefore=nullptr)
AttributeSet getFnAttributes() const
The function attributes are returned.
Metadata * get() const
Definition: Metadata.h:721
BasicBlock * changeToInvokeAndSplitBasicBlock(CallInst *CI, BasicBlock *UnwindEdge)
Convert the CallInst to InvokeInst with the specified unwind edge basic block.
Definition: Local.cpp:1974
Invoke instruction.
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
Definition: Instruction.h:593
iterator begin()
Definition: CallGraph.h:189
OperandBundleUse getOperandBundleAt(unsigned Index) const
Definition: CallSite.h:564
void setPersonalityFn(Constant *Fn)
Definition: Function.cpp:1470
static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock, ClonedCodeInfo &InlinedCodeInfo)
If we inlined an invoke site, we need to convert calls in the body of the inlined function into invok...
void pop_back()
Definition: ilist.h:316
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1074
bool isEmpty() const
Return true if there are no attributes.
Definition: Attributes.h:669
Value * SimplifyInstruction(Instruction *I, const SimplifyQuery &Q, OptimizationRemarkEmitter *ORE=nullptr)
See if we can compute a simplified version of this instruction.
Root of the metadata hierarchy.
Definition: Metadata.h:57
unsigned changeToUnreachable(Instruction *I, bool UseLLVMTrap, bool PreserveLCSSA=false, DomTreeUpdater *DTU=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Insert an unreachable instruction before the specified instruction, making it and the rest of the cod...
Definition: Local.cpp:1898
void setDoesNotThrow()
Definition: InstrTypes.h:1690
bool use_empty() const
Definition: Value.h:343
iterator begin()
Definition: ValueMap.h:135
Type * getElementType() const
Definition: DerivedTypes.h:598
ValTy * getArgOperand(unsigned i) const
Definition: CallSite.h:307
static AttributeList get(LLVMContext &C, ArrayRef< std::pair< unsigned, Attribute >> Attrs)
Create an AttributeList with the specified parameters in it.
Definition: Attributes.cpp:972
iterator_range< arg_iterator > args()
Definition: Function.h:724
bool hasOperandBundles() const
Definition: CallSite.h:548
void addCalledFunction(CallBase *Call, CallGraphNode *M)
Adds a function to the list of functions called by this one.
Definition: CallGraph.h:232
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:43
const BasicBlock * getParent() const
Definition: Instruction.h:66
an instruction to allocate memory on the stack
Definition: Instructions.h:59
bool mayReadOrWriteMemory() const
Return true if this instruction may read or write memory.
Definition: Instruction.h:542
FunTy * getCaller() const
Return the caller function for this call site.
Definition: CallSite.h:275