File: | lib/Transforms/Utils/InlineFunction.cpp |
Warning: | line 1508, column 12 Value stored to 'newEntryCount' during its initialization is never read |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===- InlineFunction.cpp - Code to perform function inlining -------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements inlining of a function into a call site, resolving |
10 | // parameters and the return value as appropriate. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "llvm/ADT/DenseMap.h" |
15 | #include "llvm/ADT/None.h" |
16 | #include "llvm/ADT/Optional.h" |
17 | #include "llvm/ADT/STLExtras.h" |
18 | #include "llvm/ADT/SetVector.h" |
19 | #include "llvm/ADT/SmallPtrSet.h" |
20 | #include "llvm/ADT/SmallVector.h" |
21 | #include "llvm/ADT/StringExtras.h" |
22 | #include "llvm/ADT/iterator_range.h" |
23 | #include "llvm/Analysis/AliasAnalysis.h" |
24 | #include "llvm/Analysis/AssumptionCache.h" |
25 | #include "llvm/Analysis/BlockFrequencyInfo.h" |
26 | #include "llvm/Analysis/CallGraph.h" |
27 | #include "llvm/Analysis/CaptureTracking.h" |
28 | #include "llvm/Analysis/EHPersonalities.h" |
29 | #include "llvm/Analysis/InstructionSimplify.h" |
30 | #include "llvm/Analysis/ProfileSummaryInfo.h" |
31 | #include "llvm/Transforms/Utils/Local.h" |
32 | #include "llvm/Analysis/ValueTracking.h" |
33 | #include "llvm/Analysis/VectorUtils.h" |
34 | #include "llvm/IR/Argument.h" |
35 | #include "llvm/IR/BasicBlock.h" |
36 | #include "llvm/IR/CFG.h" |
37 | #include "llvm/IR/CallSite.h" |
38 | #include "llvm/IR/Constant.h" |
39 | #include "llvm/IR/Constants.h" |
40 | #include "llvm/IR/DIBuilder.h" |
41 | #include "llvm/IR/DataLayout.h" |
42 | #include "llvm/IR/DebugInfoMetadata.h" |
43 | #include "llvm/IR/DebugLoc.h" |
44 | #include "llvm/IR/DerivedTypes.h" |
45 | #include "llvm/IR/Dominators.h" |
46 | #include "llvm/IR/Function.h" |
47 | #include "llvm/IR/IRBuilder.h" |
48 | #include "llvm/IR/InstrTypes.h" |
49 | #include "llvm/IR/Instruction.h" |
50 | #include "llvm/IR/Instructions.h" |
51 | #include "llvm/IR/IntrinsicInst.h" |
52 | #include "llvm/IR/Intrinsics.h" |
53 | #include "llvm/IR/LLVMContext.h" |
54 | #include "llvm/IR/MDBuilder.h" |
55 | #include "llvm/IR/Metadata.h" |
56 | #include "llvm/IR/Module.h" |
57 | #include "llvm/IR/Type.h" |
58 | #include "llvm/IR/User.h" |
59 | #include "llvm/IR/Value.h" |
60 | #include "llvm/Support/Casting.h" |
61 | #include "llvm/Support/CommandLine.h" |
62 | #include "llvm/Support/ErrorHandling.h" |
63 | #include "llvm/Transforms/Utils/Cloning.h" |
64 | #include "llvm/Transforms/Utils/ValueMapper.h" |
65 | #include <algorithm> |
66 | #include <cassert> |
67 | #include <cstdint> |
68 | #include <iterator> |
69 | #include <limits> |
70 | #include <string> |
71 | #include <utility> |
72 | #include <vector> |
73 | |
74 | using namespace llvm; |
75 | using ProfileCount = Function::ProfileCount; |
76 | |
77 | static cl::opt<bool> |
78 | EnableNoAliasConversion("enable-noalias-to-md-conversion", cl::init(true), |
79 | cl::Hidden, |
80 | cl::desc("Convert noalias attributes to metadata during inlining.")); |
81 | |
82 | static cl::opt<bool> |
83 | PreserveAlignmentAssumptions("preserve-alignment-assumptions-during-inlining", |
84 | cl::init(true), cl::Hidden, |
85 | cl::desc("Convert align attributes to assumptions during inlining.")); |
86 | |
87 | llvm::InlineResult llvm::InlineFunction(CallInst *CI, InlineFunctionInfo &IFI, |
88 | AAResults *CalleeAAR, |
89 | bool InsertLifetime) { |
90 | return InlineFunction(CallSite(CI), IFI, CalleeAAR, InsertLifetime); |
91 | } |
92 | |
93 | llvm::InlineResult llvm::InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI, |
94 | AAResults *CalleeAAR, |
95 | bool InsertLifetime) { |
96 | return InlineFunction(CallSite(II), IFI, CalleeAAR, InsertLifetime); |
97 | } |
98 | |
99 | namespace { |
100 | |
101 | /// A class for recording information about inlining a landing pad. |
102 | class LandingPadInliningInfo { |
103 | /// Destination of the invoke's unwind. |
104 | BasicBlock *OuterResumeDest; |
105 | |
106 | /// Destination for the callee's resume. |
107 | BasicBlock *InnerResumeDest = nullptr; |
108 | |
109 | /// LandingPadInst associated with the invoke. |
110 | LandingPadInst *CallerLPad = nullptr; |
111 | |
112 | /// PHI for EH values from landingpad insts. |
113 | PHINode *InnerEHValuesPHI = nullptr; |
114 | |
115 | SmallVector<Value*, 8> UnwindDestPHIValues; |
116 | |
117 | public: |
118 | LandingPadInliningInfo(InvokeInst *II) |
119 | : OuterResumeDest(II->getUnwindDest()) { |
120 | // If there are PHI nodes in the unwind destination block, we need to keep |
121 | // track of which values came into them from the invoke before removing |
122 | // the edge from this block. |
123 | BasicBlock *InvokeBB = II->getParent(); |
124 | BasicBlock::iterator I = OuterResumeDest->begin(); |
125 | for (; isa<PHINode>(I); ++I) { |
126 | // Save the value to use for this edge. |
127 | PHINode *PHI = cast<PHINode>(I); |
128 | UnwindDestPHIValues.push_back(PHI->getIncomingValueForBlock(InvokeBB)); |
129 | } |
130 | |
131 | CallerLPad = cast<LandingPadInst>(I); |
132 | } |
133 | |
134 | /// The outer unwind destination is the target of |
135 | /// unwind edges introduced for calls within the inlined function. |
136 | BasicBlock *getOuterResumeDest() const { |
137 | return OuterResumeDest; |
138 | } |
139 | |
140 | BasicBlock *getInnerResumeDest(); |
141 | |
142 | LandingPadInst *getLandingPadInst() const { return CallerLPad; } |
143 | |
144 | /// Forward the 'resume' instruction to the caller's landing pad block. |
145 | /// When the landing pad block has only one predecessor, this is |
146 | /// a simple branch. When there is more than one predecessor, we need to |
147 | /// split the landing pad block after the landingpad instruction and jump |
148 | /// to there. |
149 | void forwardResume(ResumeInst *RI, |
150 | SmallPtrSetImpl<LandingPadInst*> &InlinedLPads); |
151 | |
152 | /// Add incoming-PHI values to the unwind destination block for the given |
153 | /// basic block, using the values for the original invoke's source block. |
154 | void addIncomingPHIValuesFor(BasicBlock *BB) const { |
155 | addIncomingPHIValuesForInto(BB, OuterResumeDest); |
156 | } |
157 | |
158 | void addIncomingPHIValuesForInto(BasicBlock *src, BasicBlock *dest) const { |
159 | BasicBlock::iterator I = dest->begin(); |
160 | for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) { |
161 | PHINode *phi = cast<PHINode>(I); |
162 | phi->addIncoming(UnwindDestPHIValues[i], src); |
163 | } |
164 | } |
165 | }; |
166 | |
167 | } // end anonymous namespace |
168 | |
169 | /// Get or create a target for the branch from ResumeInsts. |
170 | BasicBlock *LandingPadInliningInfo::getInnerResumeDest() { |
171 | if (InnerResumeDest) return InnerResumeDest; |
172 | |
173 | // Split the landing pad. |
174 | BasicBlock::iterator SplitPoint = ++CallerLPad->getIterator(); |
175 | InnerResumeDest = |
176 | OuterResumeDest->splitBasicBlock(SplitPoint, |
177 | OuterResumeDest->getName() + ".body"); |
178 | |
179 | // The number of incoming edges we expect to the inner landing pad. |
180 | const unsigned PHICapacity = 2; |
181 | |
182 | // Create corresponding new PHIs for all the PHIs in the outer landing pad. |
183 | Instruction *InsertPoint = &InnerResumeDest->front(); |
184 | BasicBlock::iterator I = OuterResumeDest->begin(); |
185 | for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) { |
186 | PHINode *OuterPHI = cast<PHINode>(I); |
187 | PHINode *InnerPHI = PHINode::Create(OuterPHI->getType(), PHICapacity, |
188 | OuterPHI->getName() + ".lpad-body", |
189 | InsertPoint); |
190 | OuterPHI->replaceAllUsesWith(InnerPHI); |
191 | InnerPHI->addIncoming(OuterPHI, OuterResumeDest); |
192 | } |
193 | |
194 | // Create a PHI for the exception values. |
195 | InnerEHValuesPHI = PHINode::Create(CallerLPad->getType(), PHICapacity, |
196 | "eh.lpad-body", InsertPoint); |
197 | CallerLPad->replaceAllUsesWith(InnerEHValuesPHI); |
198 | InnerEHValuesPHI->addIncoming(CallerLPad, OuterResumeDest); |
199 | |
200 | // All done. |
201 | return InnerResumeDest; |
202 | } |
203 | |
204 | /// Forward the 'resume' instruction to the caller's landing pad block. |
205 | /// When the landing pad block has only one predecessor, this is a simple |
206 | /// branch. When there is more than one predecessor, we need to split the |
207 | /// landing pad block after the landingpad instruction and jump to there. |
208 | void LandingPadInliningInfo::forwardResume( |
209 | ResumeInst *RI, SmallPtrSetImpl<LandingPadInst *> &InlinedLPads) { |
210 | BasicBlock *Dest = getInnerResumeDest(); |
211 | BasicBlock *Src = RI->getParent(); |
212 | |
213 | BranchInst::Create(Dest, Src); |
214 | |
215 | // Update the PHIs in the destination. They were inserted in an order which |
216 | // makes this work. |
217 | addIncomingPHIValuesForInto(Src, Dest); |
218 | |
219 | InnerEHValuesPHI->addIncoming(RI->getOperand(0), Src); |
220 | RI->eraseFromParent(); |
221 | } |
222 | |
223 | /// Helper for getUnwindDestToken/getUnwindDestTokenHelper. |
224 | static Value *getParentPad(Value *EHPad) { |
225 | if (auto *FPI = dyn_cast<FuncletPadInst>(EHPad)) |
226 | return FPI->getParentPad(); |
227 | return cast<CatchSwitchInst>(EHPad)->getParentPad(); |
228 | } |
229 | |
230 | using UnwindDestMemoTy = DenseMap<Instruction *, Value *>; |
231 | |
232 | /// Helper for getUnwindDestToken that does the descendant-ward part of |
233 | /// the search. |
234 | static Value *getUnwindDestTokenHelper(Instruction *EHPad, |
235 | UnwindDestMemoTy &MemoMap) { |
236 | SmallVector<Instruction *, 8> Worklist(1, EHPad); |
237 | |
238 | while (!Worklist.empty()) { |
239 | Instruction *CurrentPad = Worklist.pop_back_val(); |
240 | // We only put pads on the worklist that aren't in the MemoMap. When |
241 | // we find an unwind dest for a pad we may update its ancestors, but |
242 | // the queue only ever contains uncles/great-uncles/etc. of CurrentPad, |
243 | // so they should never get updated while queued on the worklist. |
244 | assert(!MemoMap.count(CurrentPad))((!MemoMap.count(CurrentPad)) ? static_cast<void> (0) : __assert_fail ("!MemoMap.count(CurrentPad)", "/build/llvm-toolchain-snapshot-9~svn362543/lib/Transforms/Utils/InlineFunction.cpp" , 244, __PRETTY_FUNCTION__)); |
245 | Value *UnwindDestToken = nullptr; |
246 | if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(CurrentPad)) { |
247 | if (CatchSwitch->hasUnwindDest()) { |
248 | UnwindDestToken = CatchSwitch->getUnwindDest()->getFirstNonPHI(); |
249 | } else { |
250 | // Catchswitch doesn't have a 'nounwind' variant, and one might be |
251 | // annotated as "unwinds to caller" when really it's nounwind (see |
252 | // e.g. SimplifyCFGOpt::SimplifyUnreachable), so we can't infer the |
253 | // parent's unwind dest from this. We can check its catchpads' |
254 | // descendants, since they might include a cleanuppad with an |
255 | // "unwinds to caller" cleanupret, which can be trusted. |
256 | for (auto HI = CatchSwitch->handler_begin(), |
257 | HE = CatchSwitch->handler_end(); |
258 | HI != HE && !UnwindDestToken; ++HI) { |
259 | BasicBlock *HandlerBlock = *HI; |
260 | auto *CatchPad = cast<CatchPadInst>(HandlerBlock->getFirstNonPHI()); |
261 | for (User *Child : CatchPad->users()) { |
262 | // Intentionally ignore invokes here -- since the catchswitch is |
263 | // marked "unwind to caller", it would be a verifier error if it |
264 | // contained an invoke which unwinds out of it, so any invoke we'd |
265 | // encounter must unwind to some child of the catch. |
266 | if (!isa<CleanupPadInst>(Child) && !isa<CatchSwitchInst>(Child)) |
267 | continue; |
268 | |
269 | Instruction *ChildPad = cast<Instruction>(Child); |
270 | auto Memo = MemoMap.find(ChildPad); |
271 | if (Memo == MemoMap.end()) { |
272 | // Haven't figured out this child pad yet; queue it. |
273 | Worklist.push_back(ChildPad); |
274 | continue; |
275 | } |
276 | // We've already checked this child, but might have found that |
277 | // it offers no proof either way. |
278 | Value *ChildUnwindDestToken = Memo->second; |
279 | if (!ChildUnwindDestToken) |
280 | continue; |
281 | // We already know the child's unwind dest, which can either |
282 | // be ConstantTokenNone to indicate unwind to caller, or can |
283 | // be another child of the catchpad. Only the former indicates |
284 | // the unwind dest of the catchswitch. |
285 | if (isa<ConstantTokenNone>(ChildUnwindDestToken)) { |
286 | UnwindDestToken = ChildUnwindDestToken; |
287 | break; |
288 | } |
289 | assert(getParentPad(ChildUnwindDestToken) == CatchPad)((getParentPad(ChildUnwindDestToken) == CatchPad) ? static_cast <void> (0) : __assert_fail ("getParentPad(ChildUnwindDestToken) == CatchPad" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Transforms/Utils/InlineFunction.cpp" , 289, __PRETTY_FUNCTION__)); |
290 | } |
291 | } |
292 | } |
293 | } else { |
294 | auto *CleanupPad = cast<CleanupPadInst>(CurrentPad); |
295 | for (User *U : CleanupPad->users()) { |
296 | if (auto *CleanupRet = dyn_cast<CleanupReturnInst>(U)) { |
297 | if (BasicBlock *RetUnwindDest = CleanupRet->getUnwindDest()) |
298 | UnwindDestToken = RetUnwindDest->getFirstNonPHI(); |
299 | else |
300 | UnwindDestToken = ConstantTokenNone::get(CleanupPad->getContext()); |
301 | break; |
302 | } |
303 | Value *ChildUnwindDestToken; |
304 | if (auto *Invoke = dyn_cast<InvokeInst>(U)) { |
305 | ChildUnwindDestToken = Invoke->getUnwindDest()->getFirstNonPHI(); |
306 | } else if (isa<CleanupPadInst>(U) || isa<CatchSwitchInst>(U)) { |
307 | Instruction *ChildPad = cast<Instruction>(U); |
308 | auto Memo = MemoMap.find(ChildPad); |
309 | if (Memo == MemoMap.end()) { |
310 | // Haven't resolved this child yet; queue it and keep searching. |
311 | Worklist.push_back(ChildPad); |
312 | continue; |
313 | } |
314 | // We've checked this child, but still need to ignore it if it |
315 | // had no proof either way. |
316 | ChildUnwindDestToken = Memo->second; |
317 | if (!ChildUnwindDestToken) |
318 | continue; |
319 | } else { |
320 | // Not a relevant user of the cleanuppad |
321 | continue; |
322 | } |
323 | // In a well-formed program, the child/invoke must either unwind to |
324 | // an(other) child of the cleanup, or exit the cleanup. In the |
325 | // first case, continue searching. |
326 | if (isa<Instruction>(ChildUnwindDestToken) && |
327 | getParentPad(ChildUnwindDestToken) == CleanupPad) |
328 | continue; |
329 | UnwindDestToken = ChildUnwindDestToken; |
330 | break; |
331 | } |
332 | } |
333 | // If we haven't found an unwind dest for CurrentPad, we may have queued its |
334 | // children, so move on to the next in the worklist. |
335 | if (!UnwindDestToken) |
336 | continue; |
337 | |
338 | // Now we know that CurrentPad unwinds to UnwindDestToken. It also exits |
339 | // any ancestors of CurrentPad up to but not including UnwindDestToken's |
340 | // parent pad. Record this in the memo map, and check to see if the |
341 | // original EHPad being queried is one of the ones exited. |
342 | Value *UnwindParent; |
343 | if (auto *UnwindPad = dyn_cast<Instruction>(UnwindDestToken)) |
344 | UnwindParent = getParentPad(UnwindPad); |
345 | else |
346 | UnwindParent = nullptr; |
347 | bool ExitedOriginalPad = false; |
348 | for (Instruction *ExitedPad = CurrentPad; |
349 | ExitedPad && ExitedPad != UnwindParent; |
350 | ExitedPad = dyn_cast<Instruction>(getParentPad(ExitedPad))) { |
351 | // Skip over catchpads since they just follow their catchswitches. |
352 | if (isa<CatchPadInst>(ExitedPad)) |
353 | continue; |
354 | MemoMap[ExitedPad] = UnwindDestToken; |
355 | ExitedOriginalPad |= (ExitedPad == EHPad); |
356 | } |
357 | |
358 | if (ExitedOriginalPad) |
359 | return UnwindDestToken; |
360 | |
361 | // Continue the search. |
362 | } |
363 | |
364 | // No definitive information is contained within this funclet. |
365 | return nullptr; |
366 | } |
367 | |
368 | /// Given an EH pad, find where it unwinds. If it unwinds to an EH pad, |
369 | /// return that pad instruction. If it unwinds to caller, return |
370 | /// ConstantTokenNone. If it does not have a definitive unwind destination, |
371 | /// return nullptr. |
372 | /// |
373 | /// This routine gets invoked for calls in funclets in inlinees when inlining |
374 | /// an invoke. Since many funclets don't have calls inside them, it's queried |
375 | /// on-demand rather than building a map of pads to unwind dests up front. |
376 | /// Determining a funclet's unwind dest may require recursively searching its |
377 | /// descendants, and also ancestors and cousins if the descendants don't provide |
378 | /// an answer. Since most funclets will have their unwind dest immediately |
379 | /// available as the unwind dest of a catchswitch or cleanupret, this routine |
380 | /// searches top-down from the given pad and then up. To avoid worst-case |
381 | /// quadratic run-time given that approach, it uses a memo map to avoid |
382 | /// re-processing funclet trees. The callers that rewrite the IR as they go |
383 | /// take advantage of this, for correctness, by checking/forcing rewritten |
384 | /// pads' entries to match the original callee view. |
385 | static Value *getUnwindDestToken(Instruction *EHPad, |
386 | UnwindDestMemoTy &MemoMap) { |
387 | // Catchpads unwind to the same place as their catchswitch; |
388 | // redirct any queries on catchpads so the code below can |
389 | // deal with just catchswitches and cleanuppads. |
390 | if (auto *CPI = dyn_cast<CatchPadInst>(EHPad)) |
391 | EHPad = CPI->getCatchSwitch(); |
392 | |
393 | // Check if we've already determined the unwind dest for this pad. |
394 | auto Memo = MemoMap.find(EHPad); |
395 | if (Memo != MemoMap.end()) |
396 | return Memo->second; |
397 | |
398 | // Search EHPad and, if necessary, its descendants. |
399 | Value *UnwindDestToken = getUnwindDestTokenHelper(EHPad, MemoMap); |
400 | assert((UnwindDestToken == nullptr) != (MemoMap.count(EHPad) != 0))(((UnwindDestToken == nullptr) != (MemoMap.count(EHPad) != 0) ) ? static_cast<void> (0) : __assert_fail ("(UnwindDestToken == nullptr) != (MemoMap.count(EHPad) != 0)" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Transforms/Utils/InlineFunction.cpp" , 400, __PRETTY_FUNCTION__)); |
401 | if (UnwindDestToken) |
402 | return UnwindDestToken; |
403 | |
404 | // No information is available for this EHPad from itself or any of its |
405 | // descendants. An unwind all the way out to a pad in the caller would |
406 | // need also to agree with the unwind dest of the parent funclet, so |
407 | // search up the chain to try to find a funclet with information. Put |
408 | // null entries in the memo map to avoid re-processing as we go up. |
409 | MemoMap[EHPad] = nullptr; |
410 | #ifndef NDEBUG |
411 | SmallPtrSet<Instruction *, 4> TempMemos; |
412 | TempMemos.insert(EHPad); |
413 | #endif |
414 | Instruction *LastUselessPad = EHPad; |
415 | Value *AncestorToken; |
416 | for (AncestorToken = getParentPad(EHPad); |
417 | auto *AncestorPad = dyn_cast<Instruction>(AncestorToken); |
418 | AncestorToken = getParentPad(AncestorToken)) { |
419 | // Skip over catchpads since they just follow their catchswitches. |
420 | if (isa<CatchPadInst>(AncestorPad)) |
421 | continue; |
422 | // If the MemoMap had an entry mapping AncestorPad to nullptr, since we |
423 | // haven't yet called getUnwindDestTokenHelper for AncestorPad in this |
424 | // call to getUnwindDestToken, that would mean that AncestorPad had no |
425 | // information in itself, its descendants, or its ancestors. If that |
426 | // were the case, then we should also have recorded the lack of information |
427 | // for the descendant that we're coming from. So assert that we don't |
428 | // find a null entry in the MemoMap for AncestorPad. |
429 | assert(!MemoMap.count(AncestorPad) || MemoMap[AncestorPad])((!MemoMap.count(AncestorPad) || MemoMap[AncestorPad]) ? static_cast <void> (0) : __assert_fail ("!MemoMap.count(AncestorPad) || MemoMap[AncestorPad]" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Transforms/Utils/InlineFunction.cpp" , 429, __PRETTY_FUNCTION__)); |
430 | auto AncestorMemo = MemoMap.find(AncestorPad); |
431 | if (AncestorMemo == MemoMap.end()) { |
432 | UnwindDestToken = getUnwindDestTokenHelper(AncestorPad, MemoMap); |
433 | } else { |
434 | UnwindDestToken = AncestorMemo->second; |
435 | } |
436 | if (UnwindDestToken) |
437 | break; |
438 | LastUselessPad = AncestorPad; |
439 | MemoMap[LastUselessPad] = nullptr; |
440 | #ifndef NDEBUG |
441 | TempMemos.insert(LastUselessPad); |
442 | #endif |
443 | } |
444 | |
445 | // We know that getUnwindDestTokenHelper was called on LastUselessPad and |
446 | // returned nullptr (and likewise for EHPad and any of its ancestors up to |
447 | // LastUselessPad), so LastUselessPad has no information from below. Since |
448 | // getUnwindDestTokenHelper must investigate all downward paths through |
449 | // no-information nodes to prove that a node has no information like this, |
450 | // and since any time it finds information it records it in the MemoMap for |
451 | // not just the immediately-containing funclet but also any ancestors also |
452 | // exited, it must be the case that, walking downward from LastUselessPad, |
453 | // visiting just those nodes which have not been mapped to an unwind dest |
454 | // by getUnwindDestTokenHelper (the nullptr TempMemos notwithstanding, since |
455 | // they are just used to keep getUnwindDestTokenHelper from repeating work), |
456 | // any node visited must have been exhaustively searched with no information |
457 | // for it found. |
458 | SmallVector<Instruction *, 8> Worklist(1, LastUselessPad); |
459 | while (!Worklist.empty()) { |
460 | Instruction *UselessPad = Worklist.pop_back_val(); |
461 | auto Memo = MemoMap.find(UselessPad); |
462 | if (Memo != MemoMap.end() && Memo->second) { |
463 | // Here the name 'UselessPad' is a bit of a misnomer, because we've found |
464 | // that it is a funclet that does have information about unwinding to |
465 | // a particular destination; its parent was a useless pad. |
466 | // Since its parent has no information, the unwind edge must not escape |
467 | // the parent, and must target a sibling of this pad. This local unwind |
468 | // gives us no information about EHPad. Leave it and the subtree rooted |
469 | // at it alone. |
470 | assert(getParentPad(Memo->second) == getParentPad(UselessPad))((getParentPad(Memo->second) == getParentPad(UselessPad)) ? static_cast<void> (0) : __assert_fail ("getParentPad(Memo->second) == getParentPad(UselessPad)" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Transforms/Utils/InlineFunction.cpp" , 470, __PRETTY_FUNCTION__)); |
471 | continue; |
472 | } |
473 | // We know we don't have information for UselesPad. If it has an entry in |
474 | // the MemoMap (mapping it to nullptr), it must be one of the TempMemos |
475 | // added on this invocation of getUnwindDestToken; if a previous invocation |
476 | // recorded nullptr, it would have had to prove that the ancestors of |
477 | // UselessPad, which include LastUselessPad, had no information, and that |
478 | // in turn would have required proving that the descendants of |
479 | // LastUselesPad, which include EHPad, have no information about |
480 | // LastUselessPad, which would imply that EHPad was mapped to nullptr in |
481 | // the MemoMap on that invocation, which isn't the case if we got here. |
482 | assert(!MemoMap.count(UselessPad) || TempMemos.count(UselessPad))((!MemoMap.count(UselessPad) || TempMemos.count(UselessPad)) ? static_cast<void> (0) : __assert_fail ("!MemoMap.count(UselessPad) || TempMemos.count(UselessPad)" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Transforms/Utils/InlineFunction.cpp" , 482, __PRETTY_FUNCTION__)); |
483 | // Assert as we enumerate users that 'UselessPad' doesn't have any unwind |
484 | // information that we'd be contradicting by making a map entry for it |
485 | // (which is something that getUnwindDestTokenHelper must have proved for |
486 | // us to get here). Just assert on is direct users here; the checks in |
487 | // this downward walk at its descendants will verify that they don't have |
488 | // any unwind edges that exit 'UselessPad' either (i.e. they either have no |
489 | // unwind edges or unwind to a sibling). |
490 | MemoMap[UselessPad] = UnwindDestToken; |
491 | if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(UselessPad)) { |
492 | assert(CatchSwitch->getUnwindDest() == nullptr && "Expected useless pad")((CatchSwitch->getUnwindDest() == nullptr && "Expected useless pad" ) ? static_cast<void> (0) : __assert_fail ("CatchSwitch->getUnwindDest() == nullptr && \"Expected useless pad\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Transforms/Utils/InlineFunction.cpp" , 492, __PRETTY_FUNCTION__)); |
493 | for (BasicBlock *HandlerBlock : CatchSwitch->handlers()) { |
494 | auto *CatchPad = HandlerBlock->getFirstNonPHI(); |
495 | for (User *U : CatchPad->users()) { |
496 | assert((((!isa<InvokeInst>(U) || (getParentPad( cast<InvokeInst >(U)->getUnwindDest()->getFirstNonPHI()) == CatchPad )) && "Expected useless pad") ? static_cast<void> (0) : __assert_fail ("(!isa<InvokeInst>(U) || (getParentPad( cast<InvokeInst>(U)->getUnwindDest()->getFirstNonPHI()) == CatchPad)) && \"Expected useless pad\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Transforms/Utils/InlineFunction.cpp" , 501, __PRETTY_FUNCTION__)) |
497 | (!isa<InvokeInst>(U) ||(((!isa<InvokeInst>(U) || (getParentPad( cast<InvokeInst >(U)->getUnwindDest()->getFirstNonPHI()) == CatchPad )) && "Expected useless pad") ? static_cast<void> (0) : __assert_fail ("(!isa<InvokeInst>(U) || (getParentPad( cast<InvokeInst>(U)->getUnwindDest()->getFirstNonPHI()) == CatchPad)) && \"Expected useless pad\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Transforms/Utils/InlineFunction.cpp" , 501, __PRETTY_FUNCTION__)) |
498 | (getParentPad((((!isa<InvokeInst>(U) || (getParentPad( cast<InvokeInst >(U)->getUnwindDest()->getFirstNonPHI()) == CatchPad )) && "Expected useless pad") ? static_cast<void> (0) : __assert_fail ("(!isa<InvokeInst>(U) || (getParentPad( cast<InvokeInst>(U)->getUnwindDest()->getFirstNonPHI()) == CatchPad)) && \"Expected useless pad\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Transforms/Utils/InlineFunction.cpp" , 501, __PRETTY_FUNCTION__)) |
499 | cast<InvokeInst>(U)->getUnwindDest()->getFirstNonPHI()) ==(((!isa<InvokeInst>(U) || (getParentPad( cast<InvokeInst >(U)->getUnwindDest()->getFirstNonPHI()) == CatchPad )) && "Expected useless pad") ? static_cast<void> (0) : __assert_fail ("(!isa<InvokeInst>(U) || (getParentPad( cast<InvokeInst>(U)->getUnwindDest()->getFirstNonPHI()) == CatchPad)) && \"Expected useless pad\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Transforms/Utils/InlineFunction.cpp" , 501, __PRETTY_FUNCTION__)) |
500 | CatchPad)) &&(((!isa<InvokeInst>(U) || (getParentPad( cast<InvokeInst >(U)->getUnwindDest()->getFirstNonPHI()) == CatchPad )) && "Expected useless pad") ? static_cast<void> (0) : __assert_fail ("(!isa<InvokeInst>(U) || (getParentPad( cast<InvokeInst>(U)->getUnwindDest()->getFirstNonPHI()) == CatchPad)) && \"Expected useless pad\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Transforms/Utils/InlineFunction.cpp" , 501, __PRETTY_FUNCTION__)) |
501 | "Expected useless pad")(((!isa<InvokeInst>(U) || (getParentPad( cast<InvokeInst >(U)->getUnwindDest()->getFirstNonPHI()) == CatchPad )) && "Expected useless pad") ? static_cast<void> (0) : __assert_fail ("(!isa<InvokeInst>(U) || (getParentPad( cast<InvokeInst>(U)->getUnwindDest()->getFirstNonPHI()) == CatchPad)) && \"Expected useless pad\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Transforms/Utils/InlineFunction.cpp" , 501, __PRETTY_FUNCTION__)); |
502 | if (isa<CatchSwitchInst>(U) || isa<CleanupPadInst>(U)) |
503 | Worklist.push_back(cast<Instruction>(U)); |
504 | } |
505 | } |
506 | } else { |
507 | assert(isa<CleanupPadInst>(UselessPad))((isa<CleanupPadInst>(UselessPad)) ? static_cast<void > (0) : __assert_fail ("isa<CleanupPadInst>(UselessPad)" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Transforms/Utils/InlineFunction.cpp" , 507, __PRETTY_FUNCTION__)); |
508 | for (User *U : UselessPad->users()) { |
509 | assert(!isa<CleanupReturnInst>(U) && "Expected useless pad")((!isa<CleanupReturnInst>(U) && "Expected useless pad" ) ? static_cast<void> (0) : __assert_fail ("!isa<CleanupReturnInst>(U) && \"Expected useless pad\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Transforms/Utils/InlineFunction.cpp" , 509, __PRETTY_FUNCTION__)); |
510 | assert((!isa<InvokeInst>(U) ||(((!isa<InvokeInst>(U) || (getParentPad( cast<InvokeInst >(U)->getUnwindDest()->getFirstNonPHI()) == UselessPad )) && "Expected useless pad") ? static_cast<void> (0) : __assert_fail ("(!isa<InvokeInst>(U) || (getParentPad( cast<InvokeInst>(U)->getUnwindDest()->getFirstNonPHI()) == UselessPad)) && \"Expected useless pad\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Transforms/Utils/InlineFunction.cpp" , 514, __PRETTY_FUNCTION__)) |
511 | (getParentPad((((!isa<InvokeInst>(U) || (getParentPad( cast<InvokeInst >(U)->getUnwindDest()->getFirstNonPHI()) == UselessPad )) && "Expected useless pad") ? static_cast<void> (0) : __assert_fail ("(!isa<InvokeInst>(U) || (getParentPad( cast<InvokeInst>(U)->getUnwindDest()->getFirstNonPHI()) == UselessPad)) && \"Expected useless pad\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Transforms/Utils/InlineFunction.cpp" , 514, __PRETTY_FUNCTION__)) |
512 | cast<InvokeInst>(U)->getUnwindDest()->getFirstNonPHI()) ==(((!isa<InvokeInst>(U) || (getParentPad( cast<InvokeInst >(U)->getUnwindDest()->getFirstNonPHI()) == UselessPad )) && "Expected useless pad") ? static_cast<void> (0) : __assert_fail ("(!isa<InvokeInst>(U) || (getParentPad( cast<InvokeInst>(U)->getUnwindDest()->getFirstNonPHI()) == UselessPad)) && \"Expected useless pad\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Transforms/Utils/InlineFunction.cpp" , 514, __PRETTY_FUNCTION__)) |
513 | UselessPad)) &&(((!isa<InvokeInst>(U) || (getParentPad( cast<InvokeInst >(U)->getUnwindDest()->getFirstNonPHI()) == UselessPad )) && "Expected useless pad") ? static_cast<void> (0) : __assert_fail ("(!isa<InvokeInst>(U) || (getParentPad( cast<InvokeInst>(U)->getUnwindDest()->getFirstNonPHI()) == UselessPad)) && \"Expected useless pad\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Transforms/Utils/InlineFunction.cpp" , 514, __PRETTY_FUNCTION__)) |
514 | "Expected useless pad")(((!isa<InvokeInst>(U) || (getParentPad( cast<InvokeInst >(U)->getUnwindDest()->getFirstNonPHI()) == UselessPad )) && "Expected useless pad") ? static_cast<void> (0) : __assert_fail ("(!isa<InvokeInst>(U) || (getParentPad( cast<InvokeInst>(U)->getUnwindDest()->getFirstNonPHI()) == UselessPad)) && \"Expected useless pad\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Transforms/Utils/InlineFunction.cpp" , 514, __PRETTY_FUNCTION__)); |
515 | if (isa<CatchSwitchInst>(U) || isa<CleanupPadInst>(U)) |
516 | Worklist.push_back(cast<Instruction>(U)); |
517 | } |
518 | } |
519 | } |
520 | |
521 | return UnwindDestToken; |
522 | } |
523 | |
524 | /// When we inline a basic block into an invoke, |
525 | /// we have to turn all of the calls that can throw into invokes. |
526 | /// This function analyze BB to see if there are any calls, and if so, |
527 | /// it rewrites them to be invokes that jump to InvokeDest and fills in the PHI |
528 | /// nodes in that block with the values specified in InvokeDestPHIValues. |
529 | static BasicBlock *HandleCallsInBlockInlinedThroughInvoke( |
530 | BasicBlock *BB, BasicBlock *UnwindEdge, |
531 | UnwindDestMemoTy *FuncletUnwindMap = nullptr) { |
532 | for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) { |
533 | Instruction *I = &*BBI++; |
534 | |
535 | // We only need to check for function calls: inlined invoke |
536 | // instructions require no special handling. |
537 | CallInst *CI = dyn_cast<CallInst>(I); |
538 | |
539 | if (!CI || CI->doesNotThrow() || isa<InlineAsm>(CI->getCalledValue())) |
540 | continue; |
541 | |
542 | // We do not need to (and in fact, cannot) convert possibly throwing calls |
543 | // to @llvm.experimental_deoptimize (resp. @llvm.experimental.guard) into |
544 | // invokes. The caller's "segment" of the deoptimization continuation |
545 | // attached to the newly inlined @llvm.experimental_deoptimize |
546 | // (resp. @llvm.experimental.guard) call should contain the exception |
547 | // handling logic, if any. |
548 | if (auto *F = CI->getCalledFunction()) |
549 | if (F->getIntrinsicID() == Intrinsic::experimental_deoptimize || |
550 | F->getIntrinsicID() == Intrinsic::experimental_guard) |
551 | continue; |
552 | |
553 | if (auto FuncletBundle = CI->getOperandBundle(LLVMContext::OB_funclet)) { |
554 | // This call is nested inside a funclet. If that funclet has an unwind |
555 | // destination within the inlinee, then unwinding out of this call would |
556 | // be UB. Rewriting this call to an invoke which targets the inlined |
557 | // invoke's unwind dest would give the call's parent funclet multiple |
558 | // unwind destinations, which is something that subsequent EH table |
559 | // generation can't handle and that the veirifer rejects. So when we |
560 | // see such a call, leave it as a call. |
561 | auto *FuncletPad = cast<Instruction>(FuncletBundle->Inputs[0]); |
562 | Value *UnwindDestToken = |
563 | getUnwindDestToken(FuncletPad, *FuncletUnwindMap); |
564 | if (UnwindDestToken && !isa<ConstantTokenNone>(UnwindDestToken)) |
565 | continue; |
566 | #ifndef NDEBUG |
567 | Instruction *MemoKey; |
568 | if (auto *CatchPad = dyn_cast<CatchPadInst>(FuncletPad)) |
569 | MemoKey = CatchPad->getCatchSwitch(); |
570 | else |
571 | MemoKey = FuncletPad; |
572 | assert(FuncletUnwindMap->count(MemoKey) &&((FuncletUnwindMap->count(MemoKey) && (*FuncletUnwindMap )[MemoKey] == UnwindDestToken && "must get memoized to avoid confusing later searches" ) ? static_cast<void> (0) : __assert_fail ("FuncletUnwindMap->count(MemoKey) && (*FuncletUnwindMap)[MemoKey] == UnwindDestToken && \"must get memoized to avoid confusing later searches\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Transforms/Utils/InlineFunction.cpp" , 574, __PRETTY_FUNCTION__)) |
573 | (*FuncletUnwindMap)[MemoKey] == UnwindDestToken &&((FuncletUnwindMap->count(MemoKey) && (*FuncletUnwindMap )[MemoKey] == UnwindDestToken && "must get memoized to avoid confusing later searches" ) ? static_cast<void> (0) : __assert_fail ("FuncletUnwindMap->count(MemoKey) && (*FuncletUnwindMap)[MemoKey] == UnwindDestToken && \"must get memoized to avoid confusing later searches\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Transforms/Utils/InlineFunction.cpp" , 574, __PRETTY_FUNCTION__)) |
574 | "must get memoized to avoid confusing later searches")((FuncletUnwindMap->count(MemoKey) && (*FuncletUnwindMap )[MemoKey] == UnwindDestToken && "must get memoized to avoid confusing later searches" ) ? static_cast<void> (0) : __assert_fail ("FuncletUnwindMap->count(MemoKey) && (*FuncletUnwindMap)[MemoKey] == UnwindDestToken && \"must get memoized to avoid confusing later searches\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Transforms/Utils/InlineFunction.cpp" , 574, __PRETTY_FUNCTION__)); |
575 | #endif // NDEBUG |
576 | } |
577 | |
578 | changeToInvokeAndSplitBasicBlock(CI, UnwindEdge); |
579 | return BB; |
580 | } |
581 | return nullptr; |
582 | } |
583 | |
584 | /// If we inlined an invoke site, we need to convert calls |
585 | /// in the body of the inlined function into invokes. |
586 | /// |
587 | /// II is the invoke instruction being inlined. FirstNewBlock is the first |
588 | /// block of the inlined code (the last block is the end of the function), |
589 | /// and InlineCodeInfo is information about the code that got inlined. |
590 | static void HandleInlinedLandingPad(InvokeInst *II, BasicBlock *FirstNewBlock, |
591 | ClonedCodeInfo &InlinedCodeInfo) { |
592 | BasicBlock *InvokeDest = II->getUnwindDest(); |
593 | |
594 | Function *Caller = FirstNewBlock->getParent(); |
595 | |
596 | // The inlined code is currently at the end of the function, scan from the |
597 | // start of the inlined code to its end, checking for stuff we need to |
598 | // rewrite. |
599 | LandingPadInliningInfo Invoke(II); |
600 | |
601 | // Get all of the inlined landing pad instructions. |
602 | SmallPtrSet<LandingPadInst*, 16> InlinedLPads; |
603 | for (Function::iterator I = FirstNewBlock->getIterator(), E = Caller->end(); |
604 | I != E; ++I) |
605 | if (InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) |
606 | InlinedLPads.insert(II->getLandingPadInst()); |
607 | |
608 | // Append the clauses from the outer landing pad instruction into the inlined |
609 | // landing pad instructions. |
610 | LandingPadInst *OuterLPad = Invoke.getLandingPadInst(); |
611 | for (LandingPadInst *InlinedLPad : InlinedLPads) { |
612 | unsigned OuterNum = OuterLPad->getNumClauses(); |
613 | InlinedLPad->reserveClauses(OuterNum); |
614 | for (unsigned OuterIdx = 0; OuterIdx != OuterNum; ++OuterIdx) |
615 | InlinedLPad->addClause(OuterLPad->getClause(OuterIdx)); |
616 | if (OuterLPad->isCleanup()) |
617 | InlinedLPad->setCleanup(true); |
618 | } |
619 | |
620 | for (Function::iterator BB = FirstNewBlock->getIterator(), E = Caller->end(); |
621 | BB != E; ++BB) { |
622 | if (InlinedCodeInfo.ContainsCalls) |
623 | if (BasicBlock *NewBB = HandleCallsInBlockInlinedThroughInvoke( |
624 | &*BB, Invoke.getOuterResumeDest())) |
625 | // Update any PHI nodes in the exceptional block to indicate that there |
626 | // is now a new entry in them. |
627 | Invoke.addIncomingPHIValuesFor(NewBB); |
628 | |
629 | // Forward any resumes that are remaining here. |
630 | if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator())) |
631 | Invoke.forwardResume(RI, InlinedLPads); |
632 | } |
633 | |
634 | // Now that everything is happy, we have one final detail. The PHI nodes in |
635 | // the exception destination block still have entries due to the original |
636 | // invoke instruction. Eliminate these entries (which might even delete the |
637 | // PHI node) now. |
638 | InvokeDest->removePredecessor(II->getParent()); |
639 | } |
640 | |
641 | /// If we inlined an invoke site, we need to convert calls |
642 | /// in the body of the inlined function into invokes. |
643 | /// |
644 | /// II is the invoke instruction being inlined. FirstNewBlock is the first |
645 | /// block of the inlined code (the last block is the end of the function), |
646 | /// and InlineCodeInfo is information about the code that got inlined. |
647 | static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock, |
648 | ClonedCodeInfo &InlinedCodeInfo) { |
649 | BasicBlock *UnwindDest = II->getUnwindDest(); |
650 | Function *Caller = FirstNewBlock->getParent(); |
651 | |
652 | assert(UnwindDest->getFirstNonPHI()->isEHPad() && "unexpected BasicBlock!")((UnwindDest->getFirstNonPHI()->isEHPad() && "unexpected BasicBlock!" ) ? static_cast<void> (0) : __assert_fail ("UnwindDest->getFirstNonPHI()->isEHPad() && \"unexpected BasicBlock!\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Transforms/Utils/InlineFunction.cpp" , 652, __PRETTY_FUNCTION__)); |
653 | |
654 | // If there are PHI nodes in the unwind destination block, we need to keep |
655 | // track of which values came into them from the invoke before removing the |
656 | // edge from this block. |
657 | SmallVector<Value *, 8> UnwindDestPHIValues; |
658 | BasicBlock *InvokeBB = II->getParent(); |
659 | for (Instruction &I : *UnwindDest) { |
660 | // Save the value to use for this edge. |
661 | PHINode *PHI = dyn_cast<PHINode>(&I); |
662 | if (!PHI) |
663 | break; |
664 | UnwindDestPHIValues.push_back(PHI->getIncomingValueForBlock(InvokeBB)); |
665 | } |
666 | |
667 | // Add incoming-PHI values to the unwind destination block for the given basic |
668 | // block, using the values for the original invoke's source block. |
669 | auto UpdatePHINodes = [&](BasicBlock *Src) { |
670 | BasicBlock::iterator I = UnwindDest->begin(); |
671 | for (Value *V : UnwindDestPHIValues) { |
672 | PHINode *PHI = cast<PHINode>(I); |
673 | PHI->addIncoming(V, Src); |
674 | ++I; |
675 | } |
676 | }; |
677 | |
678 | // This connects all the instructions which 'unwind to caller' to the invoke |
679 | // destination. |
680 | UnwindDestMemoTy FuncletUnwindMap; |
681 | for (Function::iterator BB = FirstNewBlock->getIterator(), E = Caller->end(); |
682 | BB != E; ++BB) { |
683 | if (auto *CRI = dyn_cast<CleanupReturnInst>(BB->getTerminator())) { |
684 | if (CRI->unwindsToCaller()) { |
685 | auto *CleanupPad = CRI->getCleanupPad(); |
686 | CleanupReturnInst::Create(CleanupPad, UnwindDest, CRI); |
687 | CRI->eraseFromParent(); |
688 | UpdatePHINodes(&*BB); |
689 | // Finding a cleanupret with an unwind destination would confuse |
690 | // subsequent calls to getUnwindDestToken, so map the cleanuppad |
691 | // to short-circuit any such calls and recognize this as an "unwind |
692 | // to caller" cleanup. |
693 | assert(!FuncletUnwindMap.count(CleanupPad) ||((!FuncletUnwindMap.count(CleanupPad) || isa<ConstantTokenNone >(FuncletUnwindMap[CleanupPad])) ? static_cast<void> (0) : __assert_fail ("!FuncletUnwindMap.count(CleanupPad) || isa<ConstantTokenNone>(FuncletUnwindMap[CleanupPad])" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Transforms/Utils/InlineFunction.cpp" , 694, __PRETTY_FUNCTION__)) |
694 | isa<ConstantTokenNone>(FuncletUnwindMap[CleanupPad]))((!FuncletUnwindMap.count(CleanupPad) || isa<ConstantTokenNone >(FuncletUnwindMap[CleanupPad])) ? static_cast<void> (0) : __assert_fail ("!FuncletUnwindMap.count(CleanupPad) || isa<ConstantTokenNone>(FuncletUnwindMap[CleanupPad])" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Transforms/Utils/InlineFunction.cpp" , 694, __PRETTY_FUNCTION__)); |
695 | FuncletUnwindMap[CleanupPad] = |
696 | ConstantTokenNone::get(Caller->getContext()); |
697 | } |
698 | } |
699 | |
700 | Instruction *I = BB->getFirstNonPHI(); |
701 | if (!I->isEHPad()) |
702 | continue; |
703 | |
704 | Instruction *Replacement = nullptr; |
705 | if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(I)) { |
706 | if (CatchSwitch->unwindsToCaller()) { |
707 | Value *UnwindDestToken; |
708 | if (auto *ParentPad = |
709 | dyn_cast<Instruction>(CatchSwitch->getParentPad())) { |
710 | // This catchswitch is nested inside another funclet. If that |
711 | // funclet has an unwind destination within the inlinee, then |
712 | // unwinding out of this catchswitch would be UB. Rewriting this |
713 | // catchswitch to unwind to the inlined invoke's unwind dest would |
714 | // give the parent funclet multiple unwind destinations, which is |
715 | // something that subsequent EH table generation can't handle and |
716 | // that the veirifer rejects. So when we see such a call, leave it |
717 | // as "unwind to caller". |
718 | UnwindDestToken = getUnwindDestToken(ParentPad, FuncletUnwindMap); |
719 | if (UnwindDestToken && !isa<ConstantTokenNone>(UnwindDestToken)) |
720 | continue; |
721 | } else { |
722 | // This catchswitch has no parent to inherit constraints from, and |
723 | // none of its descendants can have an unwind edge that exits it and |
724 | // targets another funclet in the inlinee. It may or may not have a |
725 | // descendant that definitively has an unwind to caller. In either |
726 | // case, we'll have to assume that any unwinds out of it may need to |
727 | // be routed to the caller, so treat it as though it has a definitive |
728 | // unwind to caller. |
729 | UnwindDestToken = ConstantTokenNone::get(Caller->getContext()); |
730 | } |
731 | auto *NewCatchSwitch = CatchSwitchInst::Create( |
732 | CatchSwitch->getParentPad(), UnwindDest, |
733 | CatchSwitch->getNumHandlers(), CatchSwitch->getName(), |
734 | CatchSwitch); |
735 | for (BasicBlock *PadBB : CatchSwitch->handlers()) |
736 | NewCatchSwitch->addHandler(PadBB); |
737 | // Propagate info for the old catchswitch over to the new one in |
738 | // the unwind map. This also serves to short-circuit any subsequent |
739 | // checks for the unwind dest of this catchswitch, which would get |
740 | // confused if they found the outer handler in the callee. |
741 | FuncletUnwindMap[NewCatchSwitch] = UnwindDestToken; |
742 | Replacement = NewCatchSwitch; |
743 | } |
744 | } else if (!isa<FuncletPadInst>(I)) { |
745 | llvm_unreachable("unexpected EHPad!")::llvm::llvm_unreachable_internal("unexpected EHPad!", "/build/llvm-toolchain-snapshot-9~svn362543/lib/Transforms/Utils/InlineFunction.cpp" , 745); |
746 | } |
747 | |
748 | if (Replacement) { |
749 | Replacement->takeName(I); |
750 | I->replaceAllUsesWith(Replacement); |
751 | I->eraseFromParent(); |
752 | UpdatePHINodes(&*BB); |
753 | } |
754 | } |
755 | |
756 | if (InlinedCodeInfo.ContainsCalls) |
757 | for (Function::iterator BB = FirstNewBlock->getIterator(), |
758 | E = Caller->end(); |
759 | BB != E; ++BB) |
760 | if (BasicBlock *NewBB = HandleCallsInBlockInlinedThroughInvoke( |
761 | &*BB, UnwindDest, &FuncletUnwindMap)) |
762 | // Update any PHI nodes in the exceptional block to indicate that there |
763 | // is now a new entry in them. |
764 | UpdatePHINodes(NewBB); |
765 | |
766 | // Now that everything is happy, we have one final detail. The PHI nodes in |
767 | // the exception destination block still have entries due to the original |
768 | // invoke instruction. Eliminate these entries (which might even delete the |
769 | // PHI node) now. |
770 | UnwindDest->removePredecessor(InvokeBB); |
771 | } |
772 | |
773 | /// When inlining a call site that has !llvm.mem.parallel_loop_access or |
774 | /// llvm.access.group metadata, that metadata should be propagated to all |
775 | /// memory-accessing cloned instructions. |
776 | static void PropagateParallelLoopAccessMetadata(CallSite CS, |
777 | ValueToValueMapTy &VMap) { |
778 | MDNode *M = |
779 | CS.getInstruction()->getMetadata(LLVMContext::MD_mem_parallel_loop_access); |
780 | MDNode *CallAccessGroup = |
781 | CS.getInstruction()->getMetadata(LLVMContext::MD_access_group); |
782 | if (!M && !CallAccessGroup) |
783 | return; |
784 | |
785 | for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end(); |
786 | VMI != VMIE; ++VMI) { |
787 | if (!VMI->second) |
788 | continue; |
789 | |
790 | Instruction *NI = dyn_cast<Instruction>(VMI->second); |
791 | if (!NI) |
792 | continue; |
793 | |
794 | if (M) { |
795 | if (MDNode *PM = |
796 | NI->getMetadata(LLVMContext::MD_mem_parallel_loop_access)) { |
797 | M = MDNode::concatenate(PM, M); |
798 | NI->setMetadata(LLVMContext::MD_mem_parallel_loop_access, M); |
799 | } else if (NI->mayReadOrWriteMemory()) { |
800 | NI->setMetadata(LLVMContext::MD_mem_parallel_loop_access, M); |
801 | } |
802 | } |
803 | |
804 | if (NI->mayReadOrWriteMemory()) { |
805 | MDNode *UnitedAccGroups = uniteAccessGroups( |
806 | NI->getMetadata(LLVMContext::MD_access_group), CallAccessGroup); |
807 | NI->setMetadata(LLVMContext::MD_access_group, UnitedAccGroups); |
808 | } |
809 | } |
810 | } |
811 | |
812 | /// When inlining a function that contains noalias scope metadata, |
813 | /// this metadata needs to be cloned so that the inlined blocks |
814 | /// have different "unique scopes" at every call site. Were this not done, then |
815 | /// aliasing scopes from a function inlined into a caller multiple times could |
816 | /// not be differentiated (and this would lead to miscompiles because the |
817 | /// non-aliasing property communicated by the metadata could have |
818 | /// call-site-specific control dependencies). |
819 | static void CloneAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap) { |
820 | const Function *CalledFunc = CS.getCalledFunction(); |
821 | SetVector<const MDNode *> MD; |
822 | |
823 | // Note: We could only clone the metadata if it is already used in the |
824 | // caller. I'm omitting that check here because it might confuse |
825 | // inter-procedural alias analysis passes. We can revisit this if it becomes |
826 | // an efficiency or overhead problem. |
827 | |
828 | for (const BasicBlock &I : *CalledFunc) |
829 | for (const Instruction &J : I) { |
830 | if (const MDNode *M = J.getMetadata(LLVMContext::MD_alias_scope)) |
831 | MD.insert(M); |
832 | if (const MDNode *M = J.getMetadata(LLVMContext::MD_noalias)) |
833 | MD.insert(M); |
834 | } |
835 | |
836 | if (MD.empty()) |
837 | return; |
838 | |
839 | // Walk the existing metadata, adding the complete (perhaps cyclic) chain to |
840 | // the set. |
841 | SmallVector<const Metadata *, 16> Queue(MD.begin(), MD.end()); |
842 | while (!Queue.empty()) { |
843 | const MDNode *M = cast<MDNode>(Queue.pop_back_val()); |
844 | for (unsigned i = 0, ie = M->getNumOperands(); i != ie; ++i) |
845 | if (const MDNode *M1 = dyn_cast<MDNode>(M->getOperand(i))) |
846 | if (MD.insert(M1)) |
847 | Queue.push_back(M1); |
848 | } |
849 | |
850 | // Now we have a complete set of all metadata in the chains used to specify |
851 | // the noalias scopes and the lists of those scopes. |
852 | SmallVector<TempMDTuple, 16> DummyNodes; |
853 | DenseMap<const MDNode *, TrackingMDNodeRef> MDMap; |
854 | for (const MDNode *I : MD) { |
855 | DummyNodes.push_back(MDTuple::getTemporary(CalledFunc->getContext(), None)); |
856 | MDMap[I].reset(DummyNodes.back().get()); |
857 | } |
858 | |
859 | // Create new metadata nodes to replace the dummy nodes, replacing old |
860 | // metadata references with either a dummy node or an already-created new |
861 | // node. |
862 | for (const MDNode *I : MD) { |
863 | SmallVector<Metadata *, 4> NewOps; |
864 | for (unsigned i = 0, ie = I->getNumOperands(); i != ie; ++i) { |
865 | const Metadata *V = I->getOperand(i); |
866 | if (const MDNode *M = dyn_cast<MDNode>(V)) |
867 | NewOps.push_back(MDMap[M]); |
868 | else |
869 | NewOps.push_back(const_cast<Metadata *>(V)); |
870 | } |
871 | |
872 | MDNode *NewM = MDNode::get(CalledFunc->getContext(), NewOps); |
873 | MDTuple *TempM = cast<MDTuple>(MDMap[I]); |
874 | assert(TempM->isTemporary() && "Expected temporary node")((TempM->isTemporary() && "Expected temporary node" ) ? static_cast<void> (0) : __assert_fail ("TempM->isTemporary() && \"Expected temporary node\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Transforms/Utils/InlineFunction.cpp" , 874, __PRETTY_FUNCTION__)); |
875 | |
876 | TempM->replaceAllUsesWith(NewM); |
877 | } |
878 | |
879 | // Now replace the metadata in the new inlined instructions with the |
880 | // repacements from the map. |
881 | for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end(); |
882 | VMI != VMIE; ++VMI) { |
883 | if (!VMI->second) |
884 | continue; |
885 | |
886 | Instruction *NI = dyn_cast<Instruction>(VMI->second); |
887 | if (!NI) |
888 | continue; |
889 | |
890 | if (MDNode *M = NI->getMetadata(LLVMContext::MD_alias_scope)) { |
891 | MDNode *NewMD = MDMap[M]; |
892 | // If the call site also had alias scope metadata (a list of scopes to |
893 | // which instructions inside it might belong), propagate those scopes to |
894 | // the inlined instructions. |
895 | if (MDNode *CSM = |
896 | CS.getInstruction()->getMetadata(LLVMContext::MD_alias_scope)) |
897 | NewMD = MDNode::concatenate(NewMD, CSM); |
898 | NI->setMetadata(LLVMContext::MD_alias_scope, NewMD); |
899 | } else if (NI->mayReadOrWriteMemory()) { |
900 | if (MDNode *M = |
901 | CS.getInstruction()->getMetadata(LLVMContext::MD_alias_scope)) |
902 | NI->setMetadata(LLVMContext::MD_alias_scope, M); |
903 | } |
904 | |
905 | if (MDNode *M = NI->getMetadata(LLVMContext::MD_noalias)) { |
906 | MDNode *NewMD = MDMap[M]; |
907 | // If the call site also had noalias metadata (a list of scopes with |
908 | // which instructions inside it don't alias), propagate those scopes to |
909 | // the inlined instructions. |
910 | if (MDNode *CSM = |
911 | CS.getInstruction()->getMetadata(LLVMContext::MD_noalias)) |
912 | NewMD = MDNode::concatenate(NewMD, CSM); |
913 | NI->setMetadata(LLVMContext::MD_noalias, NewMD); |
914 | } else if (NI->mayReadOrWriteMemory()) { |
915 | if (MDNode *M = CS.getInstruction()->getMetadata(LLVMContext::MD_noalias)) |
916 | NI->setMetadata(LLVMContext::MD_noalias, M); |
917 | } |
918 | } |
919 | } |
920 | |
921 | /// If the inlined function has noalias arguments, |
922 | /// then add new alias scopes for each noalias argument, tag the mapped noalias |
923 | /// parameters with noalias metadata specifying the new scope, and tag all |
924 | /// non-derived loads, stores and memory intrinsics with the new alias scopes. |
925 | static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap, |
926 | const DataLayout &DL, AAResults *CalleeAAR) { |
927 | if (!EnableNoAliasConversion) |
928 | return; |
929 | |
930 | const Function *CalledFunc = CS.getCalledFunction(); |
931 | SmallVector<const Argument *, 4> NoAliasArgs; |
932 | |
933 | for (const Argument &Arg : CalledFunc->args()) |
934 | if (Arg.hasNoAliasAttr() && !Arg.use_empty()) |
935 | NoAliasArgs.push_back(&Arg); |
936 | |
937 | if (NoAliasArgs.empty()) |
938 | return; |
939 | |
940 | // To do a good job, if a noalias variable is captured, we need to know if |
941 | // the capture point dominates the particular use we're considering. |
942 | DominatorTree DT; |
943 | DT.recalculate(const_cast<Function&>(*CalledFunc)); |
944 | |
945 | // noalias indicates that pointer values based on the argument do not alias |
946 | // pointer values which are not based on it. So we add a new "scope" for each |
947 | // noalias function argument. Accesses using pointers based on that argument |
948 | // become part of that alias scope, accesses using pointers not based on that |
949 | // argument are tagged as noalias with that scope. |
950 | |
951 | DenseMap<const Argument *, MDNode *> NewScopes; |
952 | MDBuilder MDB(CalledFunc->getContext()); |
953 | |
954 | // Create a new scope domain for this function. |
955 | MDNode *NewDomain = |
956 | MDB.createAnonymousAliasScopeDomain(CalledFunc->getName()); |
957 | for (unsigned i = 0, e = NoAliasArgs.size(); i != e; ++i) { |
958 | const Argument *A = NoAliasArgs[i]; |
959 | |
960 | std::string Name = CalledFunc->getName(); |
961 | if (A->hasName()) { |
962 | Name += ": %"; |
963 | Name += A->getName(); |
964 | } else { |
965 | Name += ": argument "; |
966 | Name += utostr(i); |
967 | } |
968 | |
969 | // Note: We always create a new anonymous root here. This is true regardless |
970 | // of the linkage of the callee because the aliasing "scope" is not just a |
971 | // property of the callee, but also all control dependencies in the caller. |
972 | MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name); |
973 | NewScopes.insert(std::make_pair(A, NewScope)); |
974 | } |
975 | |
976 | // Iterate over all new instructions in the map; for all memory-access |
977 | // instructions, add the alias scope metadata. |
978 | for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end(); |
979 | VMI != VMIE; ++VMI) { |
980 | if (const Instruction *I = dyn_cast<Instruction>(VMI->first)) { |
981 | if (!VMI->second) |
982 | continue; |
983 | |
984 | Instruction *NI = dyn_cast<Instruction>(VMI->second); |
985 | if (!NI) |
986 | continue; |
987 | |
988 | bool IsArgMemOnlyCall = false, IsFuncCall = false; |
989 | SmallVector<const Value *, 2> PtrArgs; |
990 | |
991 | if (const LoadInst *LI = dyn_cast<LoadInst>(I)) |
992 | PtrArgs.push_back(LI->getPointerOperand()); |
993 | else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) |
994 | PtrArgs.push_back(SI->getPointerOperand()); |
995 | else if (const VAArgInst *VAAI = dyn_cast<VAArgInst>(I)) |
996 | PtrArgs.push_back(VAAI->getPointerOperand()); |
997 | else if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(I)) |
998 | PtrArgs.push_back(CXI->getPointerOperand()); |
999 | else if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(I)) |
1000 | PtrArgs.push_back(RMWI->getPointerOperand()); |
1001 | else if (const auto *Call = dyn_cast<CallBase>(I)) { |
1002 | // If we know that the call does not access memory, then we'll still |
1003 | // know that about the inlined clone of this call site, and we don't |
1004 | // need to add metadata. |
1005 | if (Call->doesNotAccessMemory()) |
1006 | continue; |
1007 | |
1008 | IsFuncCall = true; |
1009 | if (CalleeAAR) { |
1010 | FunctionModRefBehavior MRB = CalleeAAR->getModRefBehavior(Call); |
1011 | if (MRB == FMRB_OnlyAccessesArgumentPointees || |
1012 | MRB == FMRB_OnlyReadsArgumentPointees) |
1013 | IsArgMemOnlyCall = true; |
1014 | } |
1015 | |
1016 | for (Value *Arg : Call->args()) { |
1017 | // We need to check the underlying objects of all arguments, not just |
1018 | // the pointer arguments, because we might be passing pointers as |
1019 | // integers, etc. |
1020 | // However, if we know that the call only accesses pointer arguments, |
1021 | // then we only need to check the pointer arguments. |
1022 | if (IsArgMemOnlyCall && !Arg->getType()->isPointerTy()) |
1023 | continue; |
1024 | |
1025 | PtrArgs.push_back(Arg); |
1026 | } |
1027 | } |
1028 | |
1029 | // If we found no pointers, then this instruction is not suitable for |
1030 | // pairing with an instruction to receive aliasing metadata. |
1031 | // However, if this is a call, this we might just alias with none of the |
1032 | // noalias arguments. |
1033 | if (PtrArgs.empty() && !IsFuncCall) |
1034 | continue; |
1035 | |
1036 | // It is possible that there is only one underlying object, but you |
1037 | // need to go through several PHIs to see it, and thus could be |
1038 | // repeated in the Objects list. |
1039 | SmallPtrSet<const Value *, 4> ObjSet; |
1040 | SmallVector<Metadata *, 4> Scopes, NoAliases; |
1041 | |
1042 | SmallSetVector<const Argument *, 4> NAPtrArgs; |
1043 | for (const Value *V : PtrArgs) { |
1044 | SmallVector<const Value *, 4> Objects; |
1045 | GetUnderlyingObjects(V, Objects, DL, /* LI = */ nullptr); |
1046 | |
1047 | for (const Value *O : Objects) |
1048 | ObjSet.insert(O); |
1049 | } |
1050 | |
1051 | // Figure out if we're derived from anything that is not a noalias |
1052 | // argument. |
1053 | bool CanDeriveViaCapture = false, UsesAliasingPtr = false; |
1054 | for (const Value *V : ObjSet) { |
1055 | // Is this value a constant that cannot be derived from any pointer |
1056 | // value (we need to exclude constant expressions, for example, that |
1057 | // are formed from arithmetic on global symbols). |
1058 | bool IsNonPtrConst = isa<ConstantInt>(V) || isa<ConstantFP>(V) || |
1059 | isa<ConstantPointerNull>(V) || |
1060 | isa<ConstantDataVector>(V) || isa<UndefValue>(V); |
1061 | if (IsNonPtrConst) |
1062 | continue; |
1063 | |
1064 | // If this is anything other than a noalias argument, then we cannot |
1065 | // completely describe the aliasing properties using alias.scope |
1066 | // metadata (and, thus, won't add any). |
1067 | if (const Argument *A = dyn_cast<Argument>(V)) { |
1068 | if (!A->hasNoAliasAttr()) |
1069 | UsesAliasingPtr = true; |
1070 | } else { |
1071 | UsesAliasingPtr = true; |
1072 | } |
1073 | |
1074 | // If this is not some identified function-local object (which cannot |
1075 | // directly alias a noalias argument), or some other argument (which, |
1076 | // by definition, also cannot alias a noalias argument), then we could |
1077 | // alias a noalias argument that has been captured). |
1078 | if (!isa<Argument>(V) && |
1079 | !isIdentifiedFunctionLocal(const_cast<Value*>(V))) |
1080 | CanDeriveViaCapture = true; |
1081 | } |
1082 | |
1083 | // A function call can always get captured noalias pointers (via other |
1084 | // parameters, globals, etc.). |
1085 | if (IsFuncCall && !IsArgMemOnlyCall) |
1086 | CanDeriveViaCapture = true; |
1087 | |
1088 | // First, we want to figure out all of the sets with which we definitely |
1089 | // don't alias. Iterate over all noalias set, and add those for which: |
1090 | // 1. The noalias argument is not in the set of objects from which we |
1091 | // definitely derive. |
1092 | // 2. The noalias argument has not yet been captured. |
1093 | // An arbitrary function that might load pointers could see captured |
1094 | // noalias arguments via other noalias arguments or globals, and so we |
1095 | // must always check for prior capture. |
1096 | for (const Argument *A : NoAliasArgs) { |
1097 | if (!ObjSet.count(A) && (!CanDeriveViaCapture || |
1098 | // It might be tempting to skip the |
1099 | // PointerMayBeCapturedBefore check if |
1100 | // A->hasNoCaptureAttr() is true, but this is |
1101 | // incorrect because nocapture only guarantees |
1102 | // that no copies outlive the function, not |
1103 | // that the value cannot be locally captured. |
1104 | !PointerMayBeCapturedBefore(A, |
1105 | /* ReturnCaptures */ false, |
1106 | /* StoreCaptures */ false, I, &DT))) |
1107 | NoAliases.push_back(NewScopes[A]); |
1108 | } |
1109 | |
1110 | if (!NoAliases.empty()) |
1111 | NI->setMetadata(LLVMContext::MD_noalias, |
1112 | MDNode::concatenate( |
1113 | NI->getMetadata(LLVMContext::MD_noalias), |
1114 | MDNode::get(CalledFunc->getContext(), NoAliases))); |
1115 | |
1116 | // Next, we want to figure out all of the sets to which we might belong. |
1117 | // We might belong to a set if the noalias argument is in the set of |
1118 | // underlying objects. If there is some non-noalias argument in our list |
1119 | // of underlying objects, then we cannot add a scope because the fact |
1120 | // that some access does not alias with any set of our noalias arguments |
1121 | // cannot itself guarantee that it does not alias with this access |
1122 | // (because there is some pointer of unknown origin involved and the |
1123 | // other access might also depend on this pointer). We also cannot add |
1124 | // scopes to arbitrary functions unless we know they don't access any |
1125 | // non-parameter pointer-values. |
1126 | bool CanAddScopes = !UsesAliasingPtr; |
1127 | if (CanAddScopes && IsFuncCall) |
1128 | CanAddScopes = IsArgMemOnlyCall; |
1129 | |
1130 | if (CanAddScopes) |
1131 | for (const Argument *A : NoAliasArgs) { |
1132 | if (ObjSet.count(A)) |
1133 | Scopes.push_back(NewScopes[A]); |
1134 | } |
1135 | |
1136 | if (!Scopes.empty()) |
1137 | NI->setMetadata( |
1138 | LLVMContext::MD_alias_scope, |
1139 | MDNode::concatenate(NI->getMetadata(LLVMContext::MD_alias_scope), |
1140 | MDNode::get(CalledFunc->getContext(), Scopes))); |
1141 | } |
1142 | } |
1143 | } |
1144 | |
1145 | /// If the inlined function has non-byval align arguments, then |
1146 | /// add @llvm.assume-based alignment assumptions to preserve this information. |
1147 | static void AddAlignmentAssumptions(CallSite CS, InlineFunctionInfo &IFI) { |
1148 | if (!PreserveAlignmentAssumptions || !IFI.GetAssumptionCache) |
1149 | return; |
1150 | |
1151 | AssumptionCache *AC = &(*IFI.GetAssumptionCache)(*CS.getCaller()); |
1152 | auto &DL = CS.getCaller()->getParent()->getDataLayout(); |
1153 | |
1154 | // To avoid inserting redundant assumptions, we should check for assumptions |
1155 | // already in the caller. To do this, we might need a DT of the caller. |
1156 | DominatorTree DT; |
1157 | bool DTCalculated = false; |
1158 | |
1159 | Function *CalledFunc = CS.getCalledFunction(); |
1160 | for (Argument &Arg : CalledFunc->args()) { |
1161 | unsigned Align = Arg.getType()->isPointerTy() ? Arg.getParamAlignment() : 0; |
1162 | if (Align && !Arg.hasByValOrInAllocaAttr() && !Arg.hasNUses(0)) { |
1163 | if (!DTCalculated) { |
1164 | DT.recalculate(*CS.getCaller()); |
1165 | DTCalculated = true; |
1166 | } |
1167 | |
1168 | // If we can already prove the asserted alignment in the context of the |
1169 | // caller, then don't bother inserting the assumption. |
1170 | Value *ArgVal = CS.getArgument(Arg.getArgNo()); |
1171 | if (getKnownAlignment(ArgVal, DL, CS.getInstruction(), AC, &DT) >= Align) |
1172 | continue; |
1173 | |
1174 | CallInst *NewAsmp = IRBuilder<>(CS.getInstruction()) |
1175 | .CreateAlignmentAssumption(DL, ArgVal, Align); |
1176 | AC->registerAssumption(NewAsmp); |
1177 | } |
1178 | } |
1179 | } |
1180 | |
1181 | /// Once we have cloned code over from a callee into the caller, |
1182 | /// update the specified callgraph to reflect the changes we made. |
1183 | /// Note that it's possible that not all code was copied over, so only |
1184 | /// some edges of the callgraph may remain. |
1185 | static void UpdateCallGraphAfterInlining(CallSite CS, |
1186 | Function::iterator FirstNewBlock, |
1187 | ValueToValueMapTy &VMap, |
1188 | InlineFunctionInfo &IFI) { |
1189 | CallGraph &CG = *IFI.CG; |
1190 | const Function *Caller = CS.getCaller(); |
1191 | const Function *Callee = CS.getCalledFunction(); |
1192 | CallGraphNode *CalleeNode = CG[Callee]; |
1193 | CallGraphNode *CallerNode = CG[Caller]; |
1194 | |
1195 | // Since we inlined some uninlined call sites in the callee into the caller, |
1196 | // add edges from the caller to all of the callees of the callee. |
1197 | CallGraphNode::iterator I = CalleeNode->begin(), E = CalleeNode->end(); |
1198 | |
1199 | // Consider the case where CalleeNode == CallerNode. |
1200 | CallGraphNode::CalledFunctionsVector CallCache; |
1201 | if (CalleeNode == CallerNode) { |
1202 | CallCache.assign(I, E); |
1203 | I = CallCache.begin(); |
1204 | E = CallCache.end(); |
1205 | } |
1206 | |
1207 | for (; I != E; ++I) { |
1208 | const Value *OrigCall = I->first; |
1209 | |
1210 | ValueToValueMapTy::iterator VMI = VMap.find(OrigCall); |
1211 | // Only copy the edge if the call was inlined! |
1212 | if (VMI == VMap.end() || VMI->second == nullptr) |
1213 | continue; |
1214 | |
1215 | // If the call was inlined, but then constant folded, there is no edge to |
1216 | // add. Check for this case. |
1217 | auto *NewCall = dyn_cast<CallBase>(VMI->second); |
1218 | if (!NewCall) |
1219 | continue; |
1220 | |
1221 | // We do not treat intrinsic calls like real function calls because we |
1222 | // expect them to become inline code; do not add an edge for an intrinsic. |
1223 | if (NewCall->getCalledFunction() && |
1224 | NewCall->getCalledFunction()->isIntrinsic()) |
1225 | continue; |
1226 | |
1227 | // Remember that this call site got inlined for the client of |
1228 | // InlineFunction. |
1229 | IFI.InlinedCalls.push_back(NewCall); |
1230 | |
1231 | // It's possible that inlining the callsite will cause it to go from an |
1232 | // indirect to a direct call by resolving a function pointer. If this |
1233 | // happens, set the callee of the new call site to a more precise |
1234 | // destination. This can also happen if the call graph node of the caller |
1235 | // was just unnecessarily imprecise. |
1236 | if (!I->second->getFunction()) |
1237 | if (Function *F = NewCall->getCalledFunction()) { |
1238 | // Indirect call site resolved to direct call. |
1239 | CallerNode->addCalledFunction(NewCall, CG[F]); |
1240 | |
1241 | continue; |
1242 | } |
1243 | |
1244 | CallerNode->addCalledFunction(NewCall, I->second); |
1245 | } |
1246 | |
1247 | // Update the call graph by deleting the edge from Callee to Caller. We must |
1248 | // do this after the loop above in case Caller and Callee are the same. |
1249 | CallerNode->removeCallEdgeFor(*cast<CallBase>(CS.getInstruction())); |
1250 | } |
1251 | |
1252 | static void HandleByValArgumentInit(Value *Dst, Value *Src, Module *M, |
1253 | BasicBlock *InsertBlock, |
1254 | InlineFunctionInfo &IFI) { |
1255 | Type *AggTy = cast<PointerType>(Src->getType())->getElementType(); |
1256 | IRBuilder<> Builder(InsertBlock, InsertBlock->begin()); |
1257 | |
1258 | Value *Size = Builder.getInt64(M->getDataLayout().getTypeStoreSize(AggTy)); |
1259 | |
1260 | // Always generate a memcpy of alignment 1 here because we don't know |
1261 | // the alignment of the src pointer. Other optimizations can infer |
1262 | // better alignment. |
1263 | Builder.CreateMemCpy(Dst, /*DstAlign*/1, Src, /*SrcAlign*/1, Size); |
1264 | } |
1265 | |
1266 | /// When inlining a call site that has a byval argument, |
1267 | /// we have to make the implicit memcpy explicit by adding it. |
1268 | static Value *HandleByValArgument(Value *Arg, Instruction *TheCall, |
1269 | const Function *CalledFunc, |
1270 | InlineFunctionInfo &IFI, |
1271 | unsigned ByValAlignment) { |
1272 | PointerType *ArgTy = cast<PointerType>(Arg->getType()); |
1273 | Type *AggTy = ArgTy->getElementType(); |
1274 | |
1275 | Function *Caller = TheCall->getFunction(); |
1276 | const DataLayout &DL = Caller->getParent()->getDataLayout(); |
1277 | |
1278 | // If the called function is readonly, then it could not mutate the caller's |
1279 | // copy of the byval'd memory. In this case, it is safe to elide the copy and |
1280 | // temporary. |
1281 | if (CalledFunc->onlyReadsMemory()) { |
1282 | // If the byval argument has a specified alignment that is greater than the |
1283 | // passed in pointer, then we either have to round up the input pointer or |
1284 | // give up on this transformation. |
1285 | if (ByValAlignment <= 1) // 0 = unspecified, 1 = no particular alignment. |
1286 | return Arg; |
1287 | |
1288 | AssumptionCache *AC = |
1289 | IFI.GetAssumptionCache ? &(*IFI.GetAssumptionCache)(*Caller) : nullptr; |
1290 | |
1291 | // If the pointer is already known to be sufficiently aligned, or if we can |
1292 | // round it up to a larger alignment, then we don't need a temporary. |
1293 | if (getOrEnforceKnownAlignment(Arg, ByValAlignment, DL, TheCall, AC) >= |
1294 | ByValAlignment) |
1295 | return Arg; |
1296 | |
1297 | // Otherwise, we have to make a memcpy to get a safe alignment. This is bad |
1298 | // for code quality, but rarely happens and is required for correctness. |
1299 | } |
1300 | |
1301 | // Create the alloca. If we have DataLayout, use nice alignment. |
1302 | unsigned Align = DL.getPrefTypeAlignment(AggTy); |
1303 | |
1304 | // If the byval had an alignment specified, we *must* use at least that |
1305 | // alignment, as it is required by the byval argument (and uses of the |
1306 | // pointer inside the callee). |
1307 | Align = std::max(Align, ByValAlignment); |
1308 | |
1309 | Value *NewAlloca = new AllocaInst(AggTy, DL.getAllocaAddrSpace(), |
1310 | nullptr, Align, Arg->getName(), |
1311 | &*Caller->begin()->begin()); |
1312 | IFI.StaticAllocas.push_back(cast<AllocaInst>(NewAlloca)); |
1313 | |
1314 | // Uses of the argument in the function should use our new alloca |
1315 | // instead. |
1316 | return NewAlloca; |
1317 | } |
1318 | |
1319 | // Check whether this Value is used by a lifetime intrinsic. |
1320 | static bool isUsedByLifetimeMarker(Value *V) { |
1321 | for (User *U : V->users()) |
1322 | if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) |
1323 | if (II->isLifetimeStartOrEnd()) |
1324 | return true; |
1325 | return false; |
1326 | } |
1327 | |
1328 | // Check whether the given alloca already has |
1329 | // lifetime.start or lifetime.end intrinsics. |
1330 | static bool hasLifetimeMarkers(AllocaInst *AI) { |
1331 | Type *Ty = AI->getType(); |
1332 | Type *Int8PtrTy = Type::getInt8PtrTy(Ty->getContext(), |
1333 | Ty->getPointerAddressSpace()); |
1334 | if (Ty == Int8PtrTy) |
1335 | return isUsedByLifetimeMarker(AI); |
1336 | |
1337 | // Do a scan to find all the casts to i8*. |
1338 | for (User *U : AI->users()) { |
1339 | if (U->getType() != Int8PtrTy) continue; |
1340 | if (U->stripPointerCasts() != AI) continue; |
1341 | if (isUsedByLifetimeMarker(U)) |
1342 | return true; |
1343 | } |
1344 | return false; |
1345 | } |
1346 | |
1347 | /// Return the result of AI->isStaticAlloca() if AI were moved to the entry |
1348 | /// block. Allocas used in inalloca calls and allocas of dynamic array size |
1349 | /// cannot be static. |
1350 | static bool allocaWouldBeStaticInEntry(const AllocaInst *AI ) { |
1351 | return isa<Constant>(AI->getArraySize()) && !AI->isUsedWithInAlloca(); |
1352 | } |
1353 | |
1354 | /// Returns a DebugLoc for a new DILocation which is a clone of \p OrigDL |
1355 | /// inlined at \p InlinedAt. \p IANodes is an inlined-at cache. |
1356 | static DebugLoc inlineDebugLoc(DebugLoc OrigDL, DILocation *InlinedAt, |
1357 | LLVMContext &Ctx, |
1358 | DenseMap<const MDNode *, MDNode *> &IANodes) { |
1359 | auto IA = DebugLoc::appendInlinedAt(OrigDL, InlinedAt, Ctx, IANodes); |
1360 | return DebugLoc::get(OrigDL.getLine(), OrigDL.getCol(), OrigDL.getScope(), |
1361 | IA); |
1362 | } |
1363 | |
1364 | /// Returns the LoopID for a loop which has has been cloned from another |
1365 | /// function for inlining with the new inlined-at start and end locs. |
1366 | static MDNode *inlineLoopID(const MDNode *OrigLoopId, DILocation *InlinedAt, |
1367 | LLVMContext &Ctx, |
1368 | DenseMap<const MDNode *, MDNode *> &IANodes) { |
1369 | assert(OrigLoopId && OrigLoopId->getNumOperands() > 0 &&((OrigLoopId && OrigLoopId->getNumOperands() > 0 && "Loop ID needs at least one operand") ? static_cast <void> (0) : __assert_fail ("OrigLoopId && OrigLoopId->getNumOperands() > 0 && \"Loop ID needs at least one operand\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Transforms/Utils/InlineFunction.cpp" , 1370, __PRETTY_FUNCTION__)) |
1370 | "Loop ID needs at least one operand")((OrigLoopId && OrigLoopId->getNumOperands() > 0 && "Loop ID needs at least one operand") ? static_cast <void> (0) : __assert_fail ("OrigLoopId && OrigLoopId->getNumOperands() > 0 && \"Loop ID needs at least one operand\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Transforms/Utils/InlineFunction.cpp" , 1370, __PRETTY_FUNCTION__)); |
1371 | assert(OrigLoopId && OrigLoopId->getOperand(0).get() == OrigLoopId &&((OrigLoopId && OrigLoopId->getOperand(0).get() == OrigLoopId && "Loop ID should refer to itself") ? static_cast <void> (0) : __assert_fail ("OrigLoopId && OrigLoopId->getOperand(0).get() == OrigLoopId && \"Loop ID should refer to itself\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Transforms/Utils/InlineFunction.cpp" , 1372, __PRETTY_FUNCTION__)) |
1372 | "Loop ID should refer to itself")((OrigLoopId && OrigLoopId->getOperand(0).get() == OrigLoopId && "Loop ID should refer to itself") ? static_cast <void> (0) : __assert_fail ("OrigLoopId && OrigLoopId->getOperand(0).get() == OrigLoopId && \"Loop ID should refer to itself\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Transforms/Utils/InlineFunction.cpp" , 1372, __PRETTY_FUNCTION__)); |
1373 | |
1374 | // Save space for the self-referential LoopID. |
1375 | SmallVector<Metadata *, 4> MDs = {nullptr}; |
1376 | |
1377 | for (unsigned i = 1; i < OrigLoopId->getNumOperands(); ++i) { |
1378 | Metadata *MD = OrigLoopId->getOperand(i); |
1379 | // Update the DILocations to encode the inlined-at metadata. |
1380 | if (DILocation *DL = dyn_cast<DILocation>(MD)) |
1381 | MDs.push_back(inlineDebugLoc(DL, InlinedAt, Ctx, IANodes)); |
1382 | else |
1383 | MDs.push_back(MD); |
1384 | } |
1385 | |
1386 | MDNode *NewLoopID = MDNode::getDistinct(Ctx, MDs); |
1387 | // Insert the self-referential LoopID. |
1388 | NewLoopID->replaceOperandWith(0, NewLoopID); |
1389 | return NewLoopID; |
1390 | } |
1391 | |
1392 | /// Update inlined instructions' line numbers to |
1393 | /// to encode location where these instructions are inlined. |
1394 | static void fixupLineNumbers(Function *Fn, Function::iterator FI, |
1395 | Instruction *TheCall, bool CalleeHasDebugInfo) { |
1396 | const DebugLoc &TheCallDL = TheCall->getDebugLoc(); |
1397 | if (!TheCallDL) |
1398 | return; |
1399 | |
1400 | auto &Ctx = Fn->getContext(); |
1401 | DILocation *InlinedAtNode = TheCallDL; |
1402 | |
1403 | // Create a unique call site, not to be confused with any other call from the |
1404 | // same location. |
1405 | InlinedAtNode = DILocation::getDistinct( |
1406 | Ctx, InlinedAtNode->getLine(), InlinedAtNode->getColumn(), |
1407 | InlinedAtNode->getScope(), InlinedAtNode->getInlinedAt()); |
1408 | |
1409 | // Cache the inlined-at nodes as they're built so they are reused, without |
1410 | // this every instruction's inlined-at chain would become distinct from each |
1411 | // other. |
1412 | DenseMap<const MDNode *, MDNode *> IANodes; |
1413 | |
1414 | for (; FI != Fn->end(); ++FI) { |
1415 | for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); |
1416 | BI != BE; ++BI) { |
1417 | // Loop metadata needs to be updated so that the start and end locs |
1418 | // reference inlined-at locations. |
1419 | if (MDNode *LoopID = BI->getMetadata(LLVMContext::MD_loop)) { |
1420 | MDNode *NewLoopID = |
1421 | inlineLoopID(LoopID, InlinedAtNode, BI->getContext(), IANodes); |
1422 | BI->setMetadata(LLVMContext::MD_loop, NewLoopID); |
1423 | } |
1424 | |
1425 | if (DebugLoc DL = BI->getDebugLoc()) { |
1426 | DebugLoc IDL = |
1427 | inlineDebugLoc(DL, InlinedAtNode, BI->getContext(), IANodes); |
1428 | BI->setDebugLoc(IDL); |
1429 | continue; |
1430 | } |
1431 | |
1432 | if (CalleeHasDebugInfo) |
1433 | continue; |
1434 | |
1435 | // If the inlined instruction has no line number, make it look as if it |
1436 | // originates from the call location. This is important for |
1437 | // ((__always_inline__, __nodebug__)) functions which must use caller |
1438 | // location for all instructions in their function body. |
1439 | |
1440 | // Don't update static allocas, as they may get moved later. |
1441 | if (auto *AI = dyn_cast<AllocaInst>(BI)) |
1442 | if (allocaWouldBeStaticInEntry(AI)) |
1443 | continue; |
1444 | |
1445 | BI->setDebugLoc(TheCallDL); |
1446 | } |
1447 | } |
1448 | } |
1449 | |
1450 | /// Update the block frequencies of the caller after a callee has been inlined. |
1451 | /// |
1452 | /// Each block cloned into the caller has its block frequency scaled by the |
1453 | /// ratio of CallSiteFreq/CalleeEntryFreq. This ensures that the cloned copy of |
1454 | /// callee's entry block gets the same frequency as the callsite block and the |
1455 | /// relative frequencies of all cloned blocks remain the same after cloning. |
1456 | static void updateCallerBFI(BasicBlock *CallSiteBlock, |
1457 | const ValueToValueMapTy &VMap, |
1458 | BlockFrequencyInfo *CallerBFI, |
1459 | BlockFrequencyInfo *CalleeBFI, |
1460 | const BasicBlock &CalleeEntryBlock) { |
1461 | SmallPtrSet<BasicBlock *, 16> ClonedBBs; |
1462 | for (auto const &Entry : VMap) { |
1463 | if (!isa<BasicBlock>(Entry.first) || !Entry.second) |
1464 | continue; |
1465 | auto *OrigBB = cast<BasicBlock>(Entry.first); |
1466 | auto *ClonedBB = cast<BasicBlock>(Entry.second); |
1467 | uint64_t Freq = CalleeBFI->getBlockFreq(OrigBB).getFrequency(); |
1468 | if (!ClonedBBs.insert(ClonedBB).second) { |
1469 | // Multiple blocks in the callee might get mapped to one cloned block in |
1470 | // the caller since we prune the callee as we clone it. When that happens, |
1471 | // we want to use the maximum among the original blocks' frequencies. |
1472 | uint64_t NewFreq = CallerBFI->getBlockFreq(ClonedBB).getFrequency(); |
1473 | if (NewFreq > Freq) |
1474 | Freq = NewFreq; |
1475 | } |
1476 | CallerBFI->setBlockFreq(ClonedBB, Freq); |
1477 | } |
1478 | BasicBlock *EntryClone = cast<BasicBlock>(VMap.lookup(&CalleeEntryBlock)); |
1479 | CallerBFI->setBlockFreqAndScale( |
1480 | EntryClone, CallerBFI->getBlockFreq(CallSiteBlock).getFrequency(), |
1481 | ClonedBBs); |
1482 | } |
1483 | |
1484 | /// Update the branch metadata for cloned call instructions. |
1485 | static void updateCallProfile(Function *Callee, const ValueToValueMapTy &VMap, |
1486 | const ProfileCount &CalleeEntryCount, |
1487 | const Instruction *TheCall, |
1488 | ProfileSummaryInfo *PSI, |
1489 | BlockFrequencyInfo *CallerBFI) { |
1490 | if (!CalleeEntryCount.hasValue() || CalleeEntryCount.isSynthetic() || |
1491 | CalleeEntryCount.getCount() < 1) |
1492 | return; |
1493 | auto CallSiteCount = PSI ? PSI->getProfileCount(TheCall, CallerBFI) : None; |
1494 | int64_t CallCount = |
1495 | std::min(CallSiteCount.hasValue() ? CallSiteCount.getValue() : 0, |
1496 | CalleeEntryCount.getCount()); |
1497 | updateProfileCallee(Callee, -CallCount, &VMap); |
1498 | } |
1499 | |
1500 | void llvm::updateProfileCallee( |
1501 | Function *Callee, int64_t entryDelta, |
1502 | const ValueMap<const Value *, WeakTrackingVH> *VMap) { |
1503 | auto CalleeCount = Callee->getEntryCount(); |
1504 | if (!CalleeCount.hasValue()) |
1505 | return; |
1506 | |
1507 | uint64_t priorEntryCount = CalleeCount.getCount(); |
1508 | uint64_t newEntryCount = priorEntryCount; |
Value stored to 'newEntryCount' during its initialization is never read | |
1509 | |
1510 | // Since CallSiteCount is an estimate, it could exceed the original callee |
1511 | // count and has to be set to 0 so guard against underflow. |
1512 | if (entryDelta < 0 && static_cast<uint64_t>(-entryDelta) > priorEntryCount) |
1513 | newEntryCount = 0; |
1514 | else |
1515 | newEntryCount = priorEntryCount + entryDelta; |
1516 | |
1517 | Callee->setEntryCount(newEntryCount); |
1518 | |
1519 | // During inlining ? |
1520 | if (VMap) { |
1521 | uint64_t cloneEntryCount = priorEntryCount - newEntryCount; |
1522 | for (auto const &Entry : *VMap) |
1523 | if (isa<CallInst>(Entry.first)) |
1524 | if (auto *CI = dyn_cast_or_null<CallInst>(Entry.second)) |
1525 | CI->updateProfWeight(cloneEntryCount, priorEntryCount); |
1526 | } |
1527 | for (BasicBlock &BB : *Callee) |
1528 | // No need to update the callsite if it is pruned during inlining. |
1529 | if (!VMap || VMap->count(&BB)) |
1530 | for (Instruction &I : BB) |
1531 | if (CallInst *CI = dyn_cast<CallInst>(&I)) |
1532 | CI->updateProfWeight(newEntryCount, priorEntryCount); |
1533 | } |
1534 | |
1535 | /// This function inlines the called function into the basic block of the |
1536 | /// caller. This returns false if it is not possible to inline this call. |
1537 | /// The program is still in a well defined state if this occurs though. |
1538 | /// |
1539 | /// Note that this only does one level of inlining. For example, if the |
1540 | /// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now |
1541 | /// exists in the instruction stream. Similarly this will inline a recursive |
1542 | /// function by one level. |
1543 | llvm::InlineResult llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, |
1544 | AAResults *CalleeAAR, |
1545 | bool InsertLifetime, |
1546 | Function *ForwardVarArgsTo) { |
1547 | Instruction *TheCall = CS.getInstruction(); |
1548 | assert(TheCall->getParent() && TheCall->getFunction()((TheCall->getParent() && TheCall->getFunction( ) && "Instruction not in function!") ? static_cast< void> (0) : __assert_fail ("TheCall->getParent() && TheCall->getFunction() && \"Instruction not in function!\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Transforms/Utils/InlineFunction.cpp" , 1549, __PRETTY_FUNCTION__)) |
1549 | && "Instruction not in function!")((TheCall->getParent() && TheCall->getFunction( ) && "Instruction not in function!") ? static_cast< void> (0) : __assert_fail ("TheCall->getParent() && TheCall->getFunction() && \"Instruction not in function!\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Transforms/Utils/InlineFunction.cpp" , 1549, __PRETTY_FUNCTION__)); |
1550 | |
1551 | // FIXME: we don't inline callbr yet. |
1552 | if (isa<CallBrInst>(TheCall)) |
1553 | return false; |
1554 | |
1555 | // If IFI has any state in it, zap it before we fill it in. |
1556 | IFI.reset(); |
1557 | |
1558 | Function *CalledFunc = CS.getCalledFunction(); |
1559 | if (!CalledFunc || // Can't inline external function or indirect |
1560 | CalledFunc->isDeclaration()) // call! |
1561 | return "external or indirect"; |
1562 | |
1563 | // The inliner does not know how to inline through calls with operand bundles |
1564 | // in general ... |
1565 | if (CS.hasOperandBundles()) { |
1566 | for (int i = 0, e = CS.getNumOperandBundles(); i != e; ++i) { |
1567 | uint32_t Tag = CS.getOperandBundleAt(i).getTagID(); |
1568 | // ... but it knows how to inline through "deopt" operand bundles ... |
1569 | if (Tag == LLVMContext::OB_deopt) |
1570 | continue; |
1571 | // ... and "funclet" operand bundles. |
1572 | if (Tag == LLVMContext::OB_funclet) |
1573 | continue; |
1574 | |
1575 | return "unsupported operand bundle"; |
1576 | } |
1577 | } |
1578 | |
1579 | // If the call to the callee cannot throw, set the 'nounwind' flag on any |
1580 | // calls that we inline. |
1581 | bool MarkNoUnwind = CS.doesNotThrow(); |
1582 | |
1583 | BasicBlock *OrigBB = TheCall->getParent(); |
1584 | Function *Caller = OrigBB->getParent(); |
1585 | |
1586 | // GC poses two hazards to inlining, which only occur when the callee has GC: |
1587 | // 1. If the caller has no GC, then the callee's GC must be propagated to the |
1588 | // caller. |
1589 | // 2. If the caller has a differing GC, it is invalid to inline. |
1590 | if (CalledFunc->hasGC()) { |
1591 | if (!Caller->hasGC()) |
1592 | Caller->setGC(CalledFunc->getGC()); |
1593 | else if (CalledFunc->getGC() != Caller->getGC()) |
1594 | return "incompatible GC"; |
1595 | } |
1596 | |
1597 | // Get the personality function from the callee if it contains a landing pad. |
1598 | Constant *CalledPersonality = |
1599 | CalledFunc->hasPersonalityFn() |
1600 | ? CalledFunc->getPersonalityFn()->stripPointerCasts() |
1601 | : nullptr; |
1602 | |
1603 | // Find the personality function used by the landing pads of the caller. If it |
1604 | // exists, then check to see that it matches the personality function used in |
1605 | // the callee. |
1606 | Constant *CallerPersonality = |
1607 | Caller->hasPersonalityFn() |
1608 | ? Caller->getPersonalityFn()->stripPointerCasts() |
1609 | : nullptr; |
1610 | if (CalledPersonality) { |
1611 | if (!CallerPersonality) |
1612 | Caller->setPersonalityFn(CalledPersonality); |
1613 | // If the personality functions match, then we can perform the |
1614 | // inlining. Otherwise, we can't inline. |
1615 | // TODO: This isn't 100% true. Some personality functions are proper |
1616 | // supersets of others and can be used in place of the other. |
1617 | else if (CalledPersonality != CallerPersonality) |
1618 | return "incompatible personality"; |
1619 | } |
1620 | |
1621 | // We need to figure out which funclet the callsite was in so that we may |
1622 | // properly nest the callee. |
1623 | Instruction *CallSiteEHPad = nullptr; |
1624 | if (CallerPersonality) { |
1625 | EHPersonality Personality = classifyEHPersonality(CallerPersonality); |
1626 | if (isScopedEHPersonality(Personality)) { |
1627 | Optional<OperandBundleUse> ParentFunclet = |
1628 | CS.getOperandBundle(LLVMContext::OB_funclet); |
1629 | if (ParentFunclet) |
1630 | CallSiteEHPad = cast<FuncletPadInst>(ParentFunclet->Inputs.front()); |
1631 | |
1632 | // OK, the inlining site is legal. What about the target function? |
1633 | |
1634 | if (CallSiteEHPad) { |
1635 | if (Personality == EHPersonality::MSVC_CXX) { |
1636 | // The MSVC personality cannot tolerate catches getting inlined into |
1637 | // cleanup funclets. |
1638 | if (isa<CleanupPadInst>(CallSiteEHPad)) { |
1639 | // Ok, the call site is within a cleanuppad. Let's check the callee |
1640 | // for catchpads. |
1641 | for (const BasicBlock &CalledBB : *CalledFunc) { |
1642 | if (isa<CatchSwitchInst>(CalledBB.getFirstNonPHI())) |
1643 | return "catch in cleanup funclet"; |
1644 | } |
1645 | } |
1646 | } else if (isAsynchronousEHPersonality(Personality)) { |
1647 | // SEH is even less tolerant, there may not be any sort of exceptional |
1648 | // funclet in the callee. |
1649 | for (const BasicBlock &CalledBB : *CalledFunc) { |
1650 | if (CalledBB.isEHPad()) |
1651 | return "SEH in cleanup funclet"; |
1652 | } |
1653 | } |
1654 | } |
1655 | } |
1656 | } |
1657 | |
1658 | // Determine if we are dealing with a call in an EHPad which does not unwind |
1659 | // to caller. |
1660 | bool EHPadForCallUnwindsLocally = false; |
1661 | if (CallSiteEHPad && CS.isCall()) { |
1662 | UnwindDestMemoTy FuncletUnwindMap; |
1663 | Value *CallSiteUnwindDestToken = |
1664 | getUnwindDestToken(CallSiteEHPad, FuncletUnwindMap); |
1665 | |
1666 | EHPadForCallUnwindsLocally = |
1667 | CallSiteUnwindDestToken && |
1668 | !isa<ConstantTokenNone>(CallSiteUnwindDestToken); |
1669 | } |
1670 | |
1671 | // Get an iterator to the last basic block in the function, which will have |
1672 | // the new function inlined after it. |
1673 | Function::iterator LastBlock = --Caller->end(); |
1674 | |
1675 | // Make sure to capture all of the return instructions from the cloned |
1676 | // function. |
1677 | SmallVector<ReturnInst*, 8> Returns; |
1678 | ClonedCodeInfo InlinedFunctionInfo; |
1679 | Function::iterator FirstNewBlock; |
1680 | |
1681 | { // Scope to destroy VMap after cloning. |
1682 | ValueToValueMapTy VMap; |
1683 | // Keep a list of pair (dst, src) to emit byval initializations. |
1684 | SmallVector<std::pair<Value*, Value*>, 4> ByValInit; |
1685 | |
1686 | auto &DL = Caller->getParent()->getDataLayout(); |
1687 | |
1688 | // Calculate the vector of arguments to pass into the function cloner, which |
1689 | // matches up the formal to the actual argument values. |
1690 | CallSite::arg_iterator AI = CS.arg_begin(); |
1691 | unsigned ArgNo = 0; |
1692 | for (Function::arg_iterator I = CalledFunc->arg_begin(), |
1693 | E = CalledFunc->arg_end(); I != E; ++I, ++AI, ++ArgNo) { |
1694 | Value *ActualArg = *AI; |
1695 | |
1696 | // When byval arguments actually inlined, we need to make the copy implied |
1697 | // by them explicit. However, we don't do this if the callee is readonly |
1698 | // or readnone, because the copy would be unneeded: the callee doesn't |
1699 | // modify the struct. |
1700 | if (CS.isByValArgument(ArgNo)) { |
1701 | ActualArg = HandleByValArgument(ActualArg, TheCall, CalledFunc, IFI, |
1702 | CalledFunc->getParamAlignment(ArgNo)); |
1703 | if (ActualArg != *AI) |
1704 | ByValInit.push_back(std::make_pair(ActualArg, (Value*) *AI)); |
1705 | } |
1706 | |
1707 | VMap[&*I] = ActualArg; |
1708 | } |
1709 | |
1710 | // Add alignment assumptions if necessary. We do this before the inlined |
1711 | // instructions are actually cloned into the caller so that we can easily |
1712 | // check what will be known at the start of the inlined code. |
1713 | AddAlignmentAssumptions(CS, IFI); |
1714 | |
1715 | // We want the inliner to prune the code as it copies. We would LOVE to |
1716 | // have no dead or constant instructions leftover after inlining occurs |
1717 | // (which can happen, e.g., because an argument was constant), but we'll be |
1718 | // happy with whatever the cloner can do. |
1719 | CloneAndPruneFunctionInto(Caller, CalledFunc, VMap, |
1720 | /*ModuleLevelChanges=*/false, Returns, ".i", |
1721 | &InlinedFunctionInfo, TheCall); |
1722 | // Remember the first block that is newly cloned over. |
1723 | FirstNewBlock = LastBlock; ++FirstNewBlock; |
1724 | |
1725 | if (IFI.CallerBFI != nullptr && IFI.CalleeBFI != nullptr) |
1726 | // Update the BFI of blocks cloned into the caller. |
1727 | updateCallerBFI(OrigBB, VMap, IFI.CallerBFI, IFI.CalleeBFI, |
1728 | CalledFunc->front()); |
1729 | |
1730 | updateCallProfile(CalledFunc, VMap, CalledFunc->getEntryCount(), TheCall, |
1731 | IFI.PSI, IFI.CallerBFI); |
1732 | |
1733 | // Inject byval arguments initialization. |
1734 | for (std::pair<Value*, Value*> &Init : ByValInit) |
1735 | HandleByValArgumentInit(Init.first, Init.second, Caller->getParent(), |
1736 | &*FirstNewBlock, IFI); |
1737 | |
1738 | Optional<OperandBundleUse> ParentDeopt = |
1739 | CS.getOperandBundle(LLVMContext::OB_deopt); |
1740 | if (ParentDeopt) { |
1741 | SmallVector<OperandBundleDef, 2> OpDefs; |
1742 | |
1743 | for (auto &VH : InlinedFunctionInfo.OperandBundleCallSites) { |
1744 | Instruction *I = dyn_cast_or_null<Instruction>(VH); |
1745 | if (!I) continue; // instruction was DCE'd or RAUW'ed to undef |
1746 | |
1747 | OpDefs.clear(); |
1748 | |
1749 | CallSite ICS(I); |
1750 | OpDefs.reserve(ICS.getNumOperandBundles()); |
1751 | |
1752 | for (unsigned i = 0, e = ICS.getNumOperandBundles(); i < e; ++i) { |
1753 | auto ChildOB = ICS.getOperandBundleAt(i); |
1754 | if (ChildOB.getTagID() != LLVMContext::OB_deopt) { |
1755 | // If the inlined call has other operand bundles, let them be |
1756 | OpDefs.emplace_back(ChildOB); |
1757 | continue; |
1758 | } |
1759 | |
1760 | // It may be useful to separate this logic (of handling operand |
1761 | // bundles) out to a separate "policy" component if this gets crowded. |
1762 | // Prepend the parent's deoptimization continuation to the newly |
1763 | // inlined call's deoptimization continuation. |
1764 | std::vector<Value *> MergedDeoptArgs; |
1765 | MergedDeoptArgs.reserve(ParentDeopt->Inputs.size() + |
1766 | ChildOB.Inputs.size()); |
1767 | |
1768 | MergedDeoptArgs.insert(MergedDeoptArgs.end(), |
1769 | ParentDeopt->Inputs.begin(), |
1770 | ParentDeopt->Inputs.end()); |
1771 | MergedDeoptArgs.insert(MergedDeoptArgs.end(), ChildOB.Inputs.begin(), |
1772 | ChildOB.Inputs.end()); |
1773 | |
1774 | OpDefs.emplace_back("deopt", std::move(MergedDeoptArgs)); |
1775 | } |
1776 | |
1777 | Instruction *NewI = nullptr; |
1778 | if (isa<CallInst>(I)) |
1779 | NewI = CallInst::Create(cast<CallInst>(I), OpDefs, I); |
1780 | else if (isa<CallBrInst>(I)) |
1781 | NewI = CallBrInst::Create(cast<CallBrInst>(I), OpDefs, I); |
1782 | else |
1783 | NewI = InvokeInst::Create(cast<InvokeInst>(I), OpDefs, I); |
1784 | |
1785 | // Note: the RAUW does the appropriate fixup in VMap, so we need to do |
1786 | // this even if the call returns void. |
1787 | I->replaceAllUsesWith(NewI); |
1788 | |
1789 | VH = nullptr; |
1790 | I->eraseFromParent(); |
1791 | } |
1792 | } |
1793 | |
1794 | // Update the callgraph if requested. |
1795 | if (IFI.CG) |
1796 | UpdateCallGraphAfterInlining(CS, FirstNewBlock, VMap, IFI); |
1797 | |
1798 | // For 'nodebug' functions, the associated DISubprogram is always null. |
1799 | // Conservatively avoid propagating the callsite debug location to |
1800 | // instructions inlined from a function whose DISubprogram is not null. |
1801 | fixupLineNumbers(Caller, FirstNewBlock, TheCall, |
1802 | CalledFunc->getSubprogram() != nullptr); |
1803 | |
1804 | // Clone existing noalias metadata if necessary. |
1805 | CloneAliasScopeMetadata(CS, VMap); |
1806 | |
1807 | // Add noalias metadata if necessary. |
1808 | AddAliasScopeMetadata(CS, VMap, DL, CalleeAAR); |
1809 | |
1810 | // Propagate llvm.mem.parallel_loop_access if necessary. |
1811 | PropagateParallelLoopAccessMetadata(CS, VMap); |
1812 | |
1813 | // Register any cloned assumptions. |
1814 | if (IFI.GetAssumptionCache) |
1815 | for (BasicBlock &NewBlock : |
1816 | make_range(FirstNewBlock->getIterator(), Caller->end())) |
1817 | for (Instruction &I : NewBlock) { |
1818 | if (auto *II = dyn_cast<IntrinsicInst>(&I)) |
1819 | if (II->getIntrinsicID() == Intrinsic::assume) |
1820 | (*IFI.GetAssumptionCache)(*Caller).registerAssumption(II); |
1821 | } |
1822 | } |
1823 | |
1824 | // If there are any alloca instructions in the block that used to be the entry |
1825 | // block for the callee, move them to the entry block of the caller. First |
1826 | // calculate which instruction they should be inserted before. We insert the |
1827 | // instructions at the end of the current alloca list. |
1828 | { |
1829 | BasicBlock::iterator InsertPoint = Caller->begin()->begin(); |
1830 | for (BasicBlock::iterator I = FirstNewBlock->begin(), |
1831 | E = FirstNewBlock->end(); I != E; ) { |
1832 | AllocaInst *AI = dyn_cast<AllocaInst>(I++); |
1833 | if (!AI) continue; |
1834 | |
1835 | // If the alloca is now dead, remove it. This often occurs due to code |
1836 | // specialization. |
1837 | if (AI->use_empty()) { |
1838 | AI->eraseFromParent(); |
1839 | continue; |
1840 | } |
1841 | |
1842 | if (!allocaWouldBeStaticInEntry(AI)) |
1843 | continue; |
1844 | |
1845 | // Keep track of the static allocas that we inline into the caller. |
1846 | IFI.StaticAllocas.push_back(AI); |
1847 | |
1848 | // Scan for the block of allocas that we can move over, and move them |
1849 | // all at once. |
1850 | while (isa<AllocaInst>(I) && |
1851 | allocaWouldBeStaticInEntry(cast<AllocaInst>(I))) { |
1852 | IFI.StaticAllocas.push_back(cast<AllocaInst>(I)); |
1853 | ++I; |
1854 | } |
1855 | |
1856 | // Transfer all of the allocas over in a block. Using splice means |
1857 | // that the instructions aren't removed from the symbol table, then |
1858 | // reinserted. |
1859 | Caller->getEntryBlock().getInstList().splice( |
1860 | InsertPoint, FirstNewBlock->getInstList(), AI->getIterator(), I); |
1861 | } |
1862 | // Move any dbg.declares describing the allocas into the entry basic block. |
1863 | DIBuilder DIB(*Caller->getParent()); |
1864 | for (auto &AI : IFI.StaticAllocas) |
1865 | replaceDbgDeclareForAlloca(AI, AI, DIB, DIExpression::ApplyOffset, 0); |
1866 | } |
1867 | |
1868 | SmallVector<Value*,4> VarArgsToForward; |
1869 | SmallVector<AttributeSet, 4> VarArgsAttrs; |
1870 | for (unsigned i = CalledFunc->getFunctionType()->getNumParams(); |
1871 | i < CS.getNumArgOperands(); i++) { |
1872 | VarArgsToForward.push_back(CS.getArgOperand(i)); |
1873 | VarArgsAttrs.push_back(CS.getAttributes().getParamAttributes(i)); |
1874 | } |
1875 | |
1876 | bool InlinedMustTailCalls = false, InlinedDeoptimizeCalls = false; |
1877 | if (InlinedFunctionInfo.ContainsCalls) { |
1878 | CallInst::TailCallKind CallSiteTailKind = CallInst::TCK_None; |
1879 | if (CallInst *CI = dyn_cast<CallInst>(TheCall)) |
1880 | CallSiteTailKind = CI->getTailCallKind(); |
1881 | |
1882 | // For inlining purposes, the "notail" marker is the same as no marker. |
1883 | if (CallSiteTailKind == CallInst::TCK_NoTail) |
1884 | CallSiteTailKind = CallInst::TCK_None; |
1885 | |
1886 | for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; |
1887 | ++BB) { |
1888 | for (auto II = BB->begin(); II != BB->end();) { |
1889 | Instruction &I = *II++; |
1890 | CallInst *CI = dyn_cast<CallInst>(&I); |
1891 | if (!CI) |
1892 | continue; |
1893 | |
1894 | // Forward varargs from inlined call site to calls to the |
1895 | // ForwardVarArgsTo function, if requested, and to musttail calls. |
1896 | if (!VarArgsToForward.empty() && |
1897 | ((ForwardVarArgsTo && |
1898 | CI->getCalledFunction() == ForwardVarArgsTo) || |
1899 | CI->isMustTailCall())) { |
1900 | // Collect attributes for non-vararg parameters. |
1901 | AttributeList Attrs = CI->getAttributes(); |
1902 | SmallVector<AttributeSet, 8> ArgAttrs; |
1903 | if (!Attrs.isEmpty() || !VarArgsAttrs.empty()) { |
1904 | for (unsigned ArgNo = 0; |
1905 | ArgNo < CI->getFunctionType()->getNumParams(); ++ArgNo) |
1906 | ArgAttrs.push_back(Attrs.getParamAttributes(ArgNo)); |
1907 | } |
1908 | |
1909 | // Add VarArg attributes. |
1910 | ArgAttrs.append(VarArgsAttrs.begin(), VarArgsAttrs.end()); |
1911 | Attrs = AttributeList::get(CI->getContext(), Attrs.getFnAttributes(), |
1912 | Attrs.getRetAttributes(), ArgAttrs); |
1913 | // Add VarArgs to existing parameters. |
1914 | SmallVector<Value *, 6> Params(CI->arg_operands()); |
1915 | Params.append(VarArgsToForward.begin(), VarArgsToForward.end()); |
1916 | CallInst *NewCI = CallInst::Create( |
1917 | CI->getFunctionType(), CI->getCalledOperand(), Params, "", CI); |
1918 | NewCI->setDebugLoc(CI->getDebugLoc()); |
1919 | NewCI->setAttributes(Attrs); |
1920 | NewCI->setCallingConv(CI->getCallingConv()); |
1921 | CI->replaceAllUsesWith(NewCI); |
1922 | CI->eraseFromParent(); |
1923 | CI = NewCI; |
1924 | } |
1925 | |
1926 | if (Function *F = CI->getCalledFunction()) |
1927 | InlinedDeoptimizeCalls |= |
1928 | F->getIntrinsicID() == Intrinsic::experimental_deoptimize; |
1929 | |
1930 | // We need to reduce the strength of any inlined tail calls. For |
1931 | // musttail, we have to avoid introducing potential unbounded stack |
1932 | // growth. For example, if functions 'f' and 'g' are mutually recursive |
1933 | // with musttail, we can inline 'g' into 'f' so long as we preserve |
1934 | // musttail on the cloned call to 'f'. If either the inlined call site |
1935 | // or the cloned call site is *not* musttail, the program already has |
1936 | // one frame of stack growth, so it's safe to remove musttail. Here is |
1937 | // a table of example transformations: |
1938 | // |
1939 | // f -> musttail g -> musttail f ==> f -> musttail f |
1940 | // f -> musttail g -> tail f ==> f -> tail f |
1941 | // f -> g -> musttail f ==> f -> f |
1942 | // f -> g -> tail f ==> f -> f |
1943 | // |
1944 | // Inlined notail calls should remain notail calls. |
1945 | CallInst::TailCallKind ChildTCK = CI->getTailCallKind(); |
1946 | if (ChildTCK != CallInst::TCK_NoTail) |
1947 | ChildTCK = std::min(CallSiteTailKind, ChildTCK); |
1948 | CI->setTailCallKind(ChildTCK); |
1949 | InlinedMustTailCalls |= CI->isMustTailCall(); |
1950 | |
1951 | // Calls inlined through a 'nounwind' call site should be marked |
1952 | // 'nounwind'. |
1953 | if (MarkNoUnwind) |
1954 | CI->setDoesNotThrow(); |
1955 | } |
1956 | } |
1957 | } |
1958 | |
1959 | // Leave lifetime markers for the static alloca's, scoping them to the |
1960 | // function we just inlined. |
1961 | if (InsertLifetime && !IFI.StaticAllocas.empty()) { |
1962 | IRBuilder<> builder(&FirstNewBlock->front()); |
1963 | for (unsigned ai = 0, ae = IFI.StaticAllocas.size(); ai != ae; ++ai) { |
1964 | AllocaInst *AI = IFI.StaticAllocas[ai]; |
1965 | // Don't mark swifterror allocas. They can't have bitcast uses. |
1966 | if (AI->isSwiftError()) |
1967 | continue; |
1968 | |
1969 | // If the alloca is already scoped to something smaller than the whole |
1970 | // function then there's no need to add redundant, less accurate markers. |
1971 | if (hasLifetimeMarkers(AI)) |
1972 | continue; |
1973 | |
1974 | // Try to determine the size of the allocation. |
1975 | ConstantInt *AllocaSize = nullptr; |
1976 | if (ConstantInt *AIArraySize = |
1977 | dyn_cast<ConstantInt>(AI->getArraySize())) { |
1978 | auto &DL = Caller->getParent()->getDataLayout(); |
1979 | Type *AllocaType = AI->getAllocatedType(); |
1980 | uint64_t AllocaTypeSize = DL.getTypeAllocSize(AllocaType); |
1981 | uint64_t AllocaArraySize = AIArraySize->getLimitedValue(); |
1982 | |
1983 | // Don't add markers for zero-sized allocas. |
1984 | if (AllocaArraySize == 0) |
1985 | continue; |
1986 | |
1987 | // Check that array size doesn't saturate uint64_t and doesn't |
1988 | // overflow when it's multiplied by type size. |
1989 | if (AllocaArraySize != std::numeric_limits<uint64_t>::max() && |
1990 | std::numeric_limits<uint64_t>::max() / AllocaArraySize >= |
1991 | AllocaTypeSize) { |
1992 | AllocaSize = ConstantInt::get(Type::getInt64Ty(AI->getContext()), |
1993 | AllocaArraySize * AllocaTypeSize); |
1994 | } |
1995 | } |
1996 | |
1997 | builder.CreateLifetimeStart(AI, AllocaSize); |
1998 | for (ReturnInst *RI : Returns) { |
1999 | // Don't insert llvm.lifetime.end calls between a musttail or deoptimize |
2000 | // call and a return. The return kills all local allocas. |
2001 | if (InlinedMustTailCalls && |
2002 | RI->getParent()->getTerminatingMustTailCall()) |
2003 | continue; |
2004 | if (InlinedDeoptimizeCalls && |
2005 | RI->getParent()->getTerminatingDeoptimizeCall()) |
2006 | continue; |
2007 | IRBuilder<>(RI).CreateLifetimeEnd(AI, AllocaSize); |
2008 | } |
2009 | } |
2010 | } |
2011 | |
2012 | // If the inlined code contained dynamic alloca instructions, wrap the inlined |
2013 | // code with llvm.stacksave/llvm.stackrestore intrinsics. |
2014 | if (InlinedFunctionInfo.ContainsDynamicAllocas) { |
2015 | Module *M = Caller->getParent(); |
2016 | // Get the two intrinsics we care about. |
2017 | Function *StackSave = Intrinsic::getDeclaration(M, Intrinsic::stacksave); |
2018 | Function *StackRestore=Intrinsic::getDeclaration(M,Intrinsic::stackrestore); |
2019 | |
2020 | // Insert the llvm.stacksave. |
2021 | CallInst *SavedPtr = IRBuilder<>(&*FirstNewBlock, FirstNewBlock->begin()) |
2022 | .CreateCall(StackSave, {}, "savedstack"); |
2023 | |
2024 | // Insert a call to llvm.stackrestore before any return instructions in the |
2025 | // inlined function. |
2026 | for (ReturnInst *RI : Returns) { |
2027 | // Don't insert llvm.stackrestore calls between a musttail or deoptimize |
2028 | // call and a return. The return will restore the stack pointer. |
2029 | if (InlinedMustTailCalls && RI->getParent()->getTerminatingMustTailCall()) |
2030 | continue; |
2031 | if (InlinedDeoptimizeCalls && RI->getParent()->getTerminatingDeoptimizeCall()) |
2032 | continue; |
2033 | IRBuilder<>(RI).CreateCall(StackRestore, SavedPtr); |
2034 | } |
2035 | } |
2036 | |
2037 | // If we are inlining for an invoke instruction, we must make sure to rewrite |
2038 | // any call instructions into invoke instructions. This is sensitive to which |
2039 | // funclet pads were top-level in the inlinee, so must be done before |
2040 | // rewriting the "parent pad" links. |
2041 | if (auto *II = dyn_cast<InvokeInst>(TheCall)) { |
2042 | BasicBlock *UnwindDest = II->getUnwindDest(); |
2043 | Instruction *FirstNonPHI = UnwindDest->getFirstNonPHI(); |
2044 | if (isa<LandingPadInst>(FirstNonPHI)) { |
2045 | HandleInlinedLandingPad(II, &*FirstNewBlock, InlinedFunctionInfo); |
2046 | } else { |
2047 | HandleInlinedEHPad(II, &*FirstNewBlock, InlinedFunctionInfo); |
2048 | } |
2049 | } |
2050 | |
2051 | // Update the lexical scopes of the new funclets and callsites. |
2052 | // Anything that had 'none' as its parent is now nested inside the callsite's |
2053 | // EHPad. |
2054 | |
2055 | if (CallSiteEHPad) { |
2056 | for (Function::iterator BB = FirstNewBlock->getIterator(), |
2057 | E = Caller->end(); |
2058 | BB != E; ++BB) { |
2059 | // Add bundle operands to any top-level call sites. |
2060 | SmallVector<OperandBundleDef, 1> OpBundles; |
2061 | for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E;) { |
2062 | Instruction *I = &*BBI++; |
2063 | CallSite CS(I); |
2064 | if (!CS) |
2065 | continue; |
2066 | |
2067 | // Skip call sites which are nounwind intrinsics. |
2068 | auto *CalledFn = |
2069 | dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts()); |
2070 | if (CalledFn && CalledFn->isIntrinsic() && CS.doesNotThrow()) |
2071 | continue; |
2072 | |
2073 | // Skip call sites which already have a "funclet" bundle. |
2074 | if (CS.getOperandBundle(LLVMContext::OB_funclet)) |
2075 | continue; |
2076 | |
2077 | CS.getOperandBundlesAsDefs(OpBundles); |
2078 | OpBundles.emplace_back("funclet", CallSiteEHPad); |
2079 | |
2080 | Instruction *NewInst; |
2081 | if (CS.isCall()) |
2082 | NewInst = CallInst::Create(cast<CallInst>(I), OpBundles, I); |
2083 | else if (CS.isCallBr()) |
2084 | NewInst = CallBrInst::Create(cast<CallBrInst>(I), OpBundles, I); |
2085 | else |
2086 | NewInst = InvokeInst::Create(cast<InvokeInst>(I), OpBundles, I); |
2087 | NewInst->takeName(I); |
2088 | I->replaceAllUsesWith(NewInst); |
2089 | I->eraseFromParent(); |
2090 | |
2091 | OpBundles.clear(); |
2092 | } |
2093 | |
2094 | // It is problematic if the inlinee has a cleanupret which unwinds to |
2095 | // caller and we inline it into a call site which doesn't unwind but into |
2096 | // an EH pad that does. Such an edge must be dynamically unreachable. |
2097 | // As such, we replace the cleanupret with unreachable. |
2098 | if (auto *CleanupRet = dyn_cast<CleanupReturnInst>(BB->getTerminator())) |
2099 | if (CleanupRet->unwindsToCaller() && EHPadForCallUnwindsLocally) |
2100 | changeToUnreachable(CleanupRet, /*UseLLVMTrap=*/false); |
2101 | |
2102 | Instruction *I = BB->getFirstNonPHI(); |
2103 | if (!I->isEHPad()) |
2104 | continue; |
2105 | |
2106 | if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(I)) { |
2107 | if (isa<ConstantTokenNone>(CatchSwitch->getParentPad())) |
2108 | CatchSwitch->setParentPad(CallSiteEHPad); |
2109 | } else { |
2110 | auto *FPI = cast<FuncletPadInst>(I); |
2111 | if (isa<ConstantTokenNone>(FPI->getParentPad())) |
2112 | FPI->setParentPad(CallSiteEHPad); |
2113 | } |
2114 | } |
2115 | } |
2116 | |
2117 | if (InlinedDeoptimizeCalls) { |
2118 | // We need to at least remove the deoptimizing returns from the Return set, |
2119 | // so that the control flow from those returns does not get merged into the |
2120 | // caller (but terminate it instead). If the caller's return type does not |
2121 | // match the callee's return type, we also need to change the return type of |
2122 | // the intrinsic. |
2123 | if (Caller->getReturnType() == TheCall->getType()) { |
2124 | auto NewEnd = llvm::remove_if(Returns, [](ReturnInst *RI) { |
2125 | return RI->getParent()->getTerminatingDeoptimizeCall() != nullptr; |
2126 | }); |
2127 | Returns.erase(NewEnd, Returns.end()); |
2128 | } else { |
2129 | SmallVector<ReturnInst *, 8> NormalReturns; |
2130 | Function *NewDeoptIntrinsic = Intrinsic::getDeclaration( |
2131 | Caller->getParent(), Intrinsic::experimental_deoptimize, |
2132 | {Caller->getReturnType()}); |
2133 | |
2134 | for (ReturnInst *RI : Returns) { |
2135 | CallInst *DeoptCall = RI->getParent()->getTerminatingDeoptimizeCall(); |
2136 | if (!DeoptCall) { |
2137 | NormalReturns.push_back(RI); |
2138 | continue; |
2139 | } |
2140 | |
2141 | // The calling convention on the deoptimize call itself may be bogus, |
2142 | // since the code we're inlining may have undefined behavior (and may |
2143 | // never actually execute at runtime); but all |
2144 | // @llvm.experimental.deoptimize declarations have to have the same |
2145 | // calling convention in a well-formed module. |
2146 | auto CallingConv = DeoptCall->getCalledFunction()->getCallingConv(); |
2147 | NewDeoptIntrinsic->setCallingConv(CallingConv); |
2148 | auto *CurBB = RI->getParent(); |
2149 | RI->eraseFromParent(); |
2150 | |
2151 | SmallVector<Value *, 4> CallArgs(DeoptCall->arg_begin(), |
2152 | DeoptCall->arg_end()); |
2153 | |
2154 | SmallVector<OperandBundleDef, 1> OpBundles; |
2155 | DeoptCall->getOperandBundlesAsDefs(OpBundles); |
2156 | DeoptCall->eraseFromParent(); |
2157 | assert(!OpBundles.empty() &&((!OpBundles.empty() && "Expected at least the deopt operand bundle" ) ? static_cast<void> (0) : __assert_fail ("!OpBundles.empty() && \"Expected at least the deopt operand bundle\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Transforms/Utils/InlineFunction.cpp" , 2158, __PRETTY_FUNCTION__)) |
2158 | "Expected at least the deopt operand bundle")((!OpBundles.empty() && "Expected at least the deopt operand bundle" ) ? static_cast<void> (0) : __assert_fail ("!OpBundles.empty() && \"Expected at least the deopt operand bundle\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Transforms/Utils/InlineFunction.cpp" , 2158, __PRETTY_FUNCTION__)); |
2159 | |
2160 | IRBuilder<> Builder(CurBB); |
2161 | CallInst *NewDeoptCall = |
2162 | Builder.CreateCall(NewDeoptIntrinsic, CallArgs, OpBundles); |
2163 | NewDeoptCall->setCallingConv(CallingConv); |
2164 | if (NewDeoptCall->getType()->isVoidTy()) |
2165 | Builder.CreateRetVoid(); |
2166 | else |
2167 | Builder.CreateRet(NewDeoptCall); |
2168 | } |
2169 | |
2170 | // Leave behind the normal returns so we can merge control flow. |
2171 | std::swap(Returns, NormalReturns); |
2172 | } |
2173 | } |
2174 | |
2175 | // Handle any inlined musttail call sites. In order for a new call site to be |
2176 | // musttail, the source of the clone and the inlined call site must have been |
2177 | // musttail. Therefore it's safe to return without merging control into the |
2178 | // phi below. |
2179 | if (InlinedMustTailCalls) { |
2180 | // Check if we need to bitcast the result of any musttail calls. |
2181 | Type *NewRetTy = Caller->getReturnType(); |
2182 | bool NeedBitCast = !TheCall->use_empty() && TheCall->getType() != NewRetTy; |
2183 | |
2184 | // Handle the returns preceded by musttail calls separately. |
2185 | SmallVector<ReturnInst *, 8> NormalReturns; |
2186 | for (ReturnInst *RI : Returns) { |
2187 | CallInst *ReturnedMustTail = |
2188 | RI->getParent()->getTerminatingMustTailCall(); |
2189 | if (!ReturnedMustTail) { |
2190 | NormalReturns.push_back(RI); |
2191 | continue; |
2192 | } |
2193 | if (!NeedBitCast) |
2194 | continue; |
2195 | |
2196 | // Delete the old return and any preceding bitcast. |
2197 | BasicBlock *CurBB = RI->getParent(); |
2198 | auto *OldCast = dyn_cast_or_null<BitCastInst>(RI->getReturnValue()); |
2199 | RI->eraseFromParent(); |
2200 | if (OldCast) |
2201 | OldCast->eraseFromParent(); |
2202 | |
2203 | // Insert a new bitcast and return with the right type. |
2204 | IRBuilder<> Builder(CurBB); |
2205 | Builder.CreateRet(Builder.CreateBitCast(ReturnedMustTail, NewRetTy)); |
2206 | } |
2207 | |
2208 | // Leave behind the normal returns so we can merge control flow. |
2209 | std::swap(Returns, NormalReturns); |
2210 | } |
2211 | |
2212 | // Now that all of the transforms on the inlined code have taken place but |
2213 | // before we splice the inlined code into the CFG and lose track of which |
2214 | // blocks were actually inlined, collect the call sites. We only do this if |
2215 | // call graph updates weren't requested, as those provide value handle based |
2216 | // tracking of inlined call sites instead. |
2217 | if (InlinedFunctionInfo.ContainsCalls && !IFI.CG) { |
2218 | // Otherwise just collect the raw call sites that were inlined. |
2219 | for (BasicBlock &NewBB : |
2220 | make_range(FirstNewBlock->getIterator(), Caller->end())) |
2221 | for (Instruction &I : NewBB) |
2222 | if (auto CS = CallSite(&I)) |
2223 | IFI.InlinedCallSites.push_back(CS); |
2224 | } |
2225 | |
2226 | // If we cloned in _exactly one_ basic block, and if that block ends in a |
2227 | // return instruction, we splice the body of the inlined callee directly into |
2228 | // the calling basic block. |
2229 | if (Returns.size() == 1 && std::distance(FirstNewBlock, Caller->end()) == 1) { |
2230 | // Move all of the instructions right before the call. |
2231 | OrigBB->getInstList().splice(TheCall->getIterator(), |
2232 | FirstNewBlock->getInstList(), |
2233 | FirstNewBlock->begin(), FirstNewBlock->end()); |
2234 | // Remove the cloned basic block. |
2235 | Caller->getBasicBlockList().pop_back(); |
2236 | |
2237 | // If the call site was an invoke instruction, add a branch to the normal |
2238 | // destination. |
2239 | if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) { |
2240 | BranchInst *NewBr = BranchInst::Create(II->getNormalDest(), TheCall); |
2241 | NewBr->setDebugLoc(Returns[0]->getDebugLoc()); |
2242 | } |
2243 | |
2244 | // If the return instruction returned a value, replace uses of the call with |
2245 | // uses of the returned value. |
2246 | if (!TheCall->use_empty()) { |
2247 | ReturnInst *R = Returns[0]; |
2248 | if (TheCall == R->getReturnValue()) |
2249 | TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType())); |
2250 | else |
2251 | TheCall->replaceAllUsesWith(R->getReturnValue()); |
2252 | } |
2253 | // Since we are now done with the Call/Invoke, we can delete it. |
2254 | TheCall->eraseFromParent(); |
2255 | |
2256 | // Since we are now done with the return instruction, delete it also. |
2257 | Returns[0]->eraseFromParent(); |
2258 | |
2259 | // We are now done with the inlining. |
2260 | return true; |
2261 | } |
2262 | |
2263 | // Otherwise, we have the normal case, of more than one block to inline or |
2264 | // multiple return sites. |
2265 | |
2266 | // We want to clone the entire callee function into the hole between the |
2267 | // "starter" and "ender" blocks. How we accomplish this depends on whether |
2268 | // this is an invoke instruction or a call instruction. |
2269 | BasicBlock *AfterCallBB; |
2270 | BranchInst *CreatedBranchToNormalDest = nullptr; |
2271 | if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) { |
2272 | |
2273 | // Add an unconditional branch to make this look like the CallInst case... |
2274 | CreatedBranchToNormalDest = BranchInst::Create(II->getNormalDest(), TheCall); |
2275 | |
2276 | // Split the basic block. This guarantees that no PHI nodes will have to be |
2277 | // updated due to new incoming edges, and make the invoke case more |
2278 | // symmetric to the call case. |
2279 | AfterCallBB = |
2280 | OrigBB->splitBasicBlock(CreatedBranchToNormalDest->getIterator(), |
2281 | CalledFunc->getName() + ".exit"); |
2282 | |
2283 | } else { // It's a call |
2284 | // If this is a call instruction, we need to split the basic block that |
2285 | // the call lives in. |
2286 | // |
2287 | AfterCallBB = OrigBB->splitBasicBlock(TheCall->getIterator(), |
2288 | CalledFunc->getName() + ".exit"); |
2289 | } |
2290 | |
2291 | if (IFI.CallerBFI) { |
2292 | // Copy original BB's block frequency to AfterCallBB |
2293 | IFI.CallerBFI->setBlockFreq( |
2294 | AfterCallBB, IFI.CallerBFI->getBlockFreq(OrigBB).getFrequency()); |
2295 | } |
2296 | |
2297 | // Change the branch that used to go to AfterCallBB to branch to the first |
2298 | // basic block of the inlined function. |
2299 | // |
2300 | Instruction *Br = OrigBB->getTerminator(); |
2301 | assert(Br && Br->getOpcode() == Instruction::Br &&((Br && Br->getOpcode() == Instruction::Br && "splitBasicBlock broken!") ? static_cast<void> (0) : __assert_fail ("Br && Br->getOpcode() == Instruction::Br && \"splitBasicBlock broken!\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Transforms/Utils/InlineFunction.cpp" , 2302, __PRETTY_FUNCTION__)) |
2302 | "splitBasicBlock broken!")((Br && Br->getOpcode() == Instruction::Br && "splitBasicBlock broken!") ? static_cast<void> (0) : __assert_fail ("Br && Br->getOpcode() == Instruction::Br && \"splitBasicBlock broken!\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Transforms/Utils/InlineFunction.cpp" , 2302, __PRETTY_FUNCTION__)); |
2303 | Br->setOperand(0, &*FirstNewBlock); |
2304 | |
2305 | // Now that the function is correct, make it a little bit nicer. In |
2306 | // particular, move the basic blocks inserted from the end of the function |
2307 | // into the space made by splitting the source basic block. |
2308 | Caller->getBasicBlockList().splice(AfterCallBB->getIterator(), |
2309 | Caller->getBasicBlockList(), FirstNewBlock, |
2310 | Caller->end()); |
2311 | |
2312 | // Handle all of the return instructions that we just cloned in, and eliminate |
2313 | // any users of the original call/invoke instruction. |
2314 | Type *RTy = CalledFunc->getReturnType(); |
2315 | |
2316 | PHINode *PHI = nullptr; |
2317 | if (Returns.size() > 1) { |
2318 | // The PHI node should go at the front of the new basic block to merge all |
2319 | // possible incoming values. |
2320 | if (!TheCall->use_empty()) { |
2321 | PHI = PHINode::Create(RTy, Returns.size(), TheCall->getName(), |
2322 | &AfterCallBB->front()); |
2323 | // Anything that used the result of the function call should now use the |
2324 | // PHI node as their operand. |
2325 | TheCall->replaceAllUsesWith(PHI); |
2326 | } |
2327 | |
2328 | // Loop over all of the return instructions adding entries to the PHI node |
2329 | // as appropriate. |
2330 | if (PHI) { |
2331 | for (unsigned i = 0, e = Returns.size(); i != e; ++i) { |
2332 | ReturnInst *RI = Returns[i]; |
2333 | assert(RI->getReturnValue()->getType() == PHI->getType() &&((RI->getReturnValue()->getType() == PHI->getType() && "Ret value not consistent in function!") ? static_cast<void > (0) : __assert_fail ("RI->getReturnValue()->getType() == PHI->getType() && \"Ret value not consistent in function!\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Transforms/Utils/InlineFunction.cpp" , 2334, __PRETTY_FUNCTION__)) |
2334 | "Ret value not consistent in function!")((RI->getReturnValue()->getType() == PHI->getType() && "Ret value not consistent in function!") ? static_cast<void > (0) : __assert_fail ("RI->getReturnValue()->getType() == PHI->getType() && \"Ret value not consistent in function!\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Transforms/Utils/InlineFunction.cpp" , 2334, __PRETTY_FUNCTION__)); |
2335 | PHI->addIncoming(RI->getReturnValue(), RI->getParent()); |
2336 | } |
2337 | } |
2338 | |
2339 | // Add a branch to the merge points and remove return instructions. |
2340 | DebugLoc Loc; |
2341 | for (unsigned i = 0, e = Returns.size(); i != e; ++i) { |
2342 | ReturnInst *RI = Returns[i]; |
2343 | BranchInst* BI = BranchInst::Create(AfterCallBB, RI); |
2344 | Loc = RI->getDebugLoc(); |
2345 | BI->setDebugLoc(Loc); |
2346 | RI->eraseFromParent(); |
2347 | } |
2348 | // We need to set the debug location to *somewhere* inside the |
2349 | // inlined function. The line number may be nonsensical, but the |
2350 | // instruction will at least be associated with the right |
2351 | // function. |
2352 | if (CreatedBranchToNormalDest) |
2353 | CreatedBranchToNormalDest->setDebugLoc(Loc); |
2354 | } else if (!Returns.empty()) { |
2355 | // Otherwise, if there is exactly one return value, just replace anything |
2356 | // using the return value of the call with the computed value. |
2357 | if (!TheCall->use_empty()) { |
2358 | if (TheCall == Returns[0]->getReturnValue()) |
2359 | TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType())); |
2360 | else |
2361 | TheCall->replaceAllUsesWith(Returns[0]->getReturnValue()); |
2362 | } |
2363 | |
2364 | // Update PHI nodes that use the ReturnBB to use the AfterCallBB. |
2365 | BasicBlock *ReturnBB = Returns[0]->getParent(); |
2366 | ReturnBB->replaceAllUsesWith(AfterCallBB); |
2367 | |
2368 | // Splice the code from the return block into the block that it will return |
2369 | // to, which contains the code that was after the call. |
2370 | AfterCallBB->getInstList().splice(AfterCallBB->begin(), |
2371 | ReturnBB->getInstList()); |
2372 | |
2373 | if (CreatedBranchToNormalDest) |
2374 | CreatedBranchToNormalDest->setDebugLoc(Returns[0]->getDebugLoc()); |
2375 | |
2376 | // Delete the return instruction now and empty ReturnBB now. |
2377 | Returns[0]->eraseFromParent(); |
2378 | ReturnBB->eraseFromParent(); |
2379 | } else if (!TheCall->use_empty()) { |
2380 | // No returns, but something is using the return value of the call. Just |
2381 | // nuke the result. |
2382 | TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType())); |
2383 | } |
2384 | |
2385 | // Since we are now done with the Call/Invoke, we can delete it. |
2386 | TheCall->eraseFromParent(); |
2387 | |
2388 | // If we inlined any musttail calls and the original return is now |
2389 | // unreachable, delete it. It can only contain a bitcast and ret. |
2390 | if (InlinedMustTailCalls && pred_begin(AfterCallBB) == pred_end(AfterCallBB)) |
2391 | AfterCallBB->eraseFromParent(); |
2392 | |
2393 | // We should always be able to fold the entry block of the function into the |
2394 | // single predecessor of the block... |
2395 | assert(cast<BranchInst>(Br)->isUnconditional() && "splitBasicBlock broken!")((cast<BranchInst>(Br)->isUnconditional() && "splitBasicBlock broken!") ? static_cast<void> (0) : __assert_fail ("cast<BranchInst>(Br)->isUnconditional() && \"splitBasicBlock broken!\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Transforms/Utils/InlineFunction.cpp" , 2395, __PRETTY_FUNCTION__)); |
2396 | BasicBlock *CalleeEntry = cast<BranchInst>(Br)->getSuccessor(0); |
2397 | |
2398 | // Splice the code entry block into calling block, right before the |
2399 | // unconditional branch. |
2400 | CalleeEntry->replaceAllUsesWith(OrigBB); // Update PHI nodes |
2401 | OrigBB->getInstList().splice(Br->getIterator(), CalleeEntry->getInstList()); |
2402 | |
2403 | // Remove the unconditional branch. |
2404 | OrigBB->getInstList().erase(Br); |
2405 | |
2406 | // Now we can remove the CalleeEntry block, which is now empty. |
2407 | Caller->getBasicBlockList().erase(CalleeEntry); |
2408 | |
2409 | // If we inserted a phi node, check to see if it has a single value (e.g. all |
2410 | // the entries are the same or undef). If so, remove the PHI so it doesn't |
2411 | // block other optimizations. |
2412 | if (PHI) { |
2413 | AssumptionCache *AC = |
2414 | IFI.GetAssumptionCache ? &(*IFI.GetAssumptionCache)(*Caller) : nullptr; |
2415 | auto &DL = Caller->getParent()->getDataLayout(); |
2416 | if (Value *V = SimplifyInstruction(PHI, {DL, nullptr, nullptr, AC})) { |
2417 | PHI->replaceAllUsesWith(V); |
2418 | PHI->eraseFromParent(); |
2419 | } |
2420 | } |
2421 | |
2422 | return true; |
2423 | } |