LLVM 23.0.0git
PartialInlining.cpp
Go to the documentation of this file.
1//===- PartialInlining.cpp - Inline parts of functions --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass performs partial inlining, typically by inlining an if statement
10// that surrounds the body of the function.
11//
12//===----------------------------------------------------------------------===//
13
15#include "llvm/ADT/DenseMap.h"
16#include "llvm/ADT/DenseSet.h"
18#include "llvm/ADT/STLExtras.h"
20#include "llvm/ADT/Statistic.h"
29#include "llvm/IR/Attributes.h"
30#include "llvm/IR/BasicBlock.h"
31#include "llvm/IR/CFG.h"
32#include "llvm/IR/DebugLoc.h"
34#include "llvm/IR/Dominators.h"
35#include "llvm/IR/Function.h"
36#include "llvm/IR/InstrTypes.h"
37#include "llvm/IR/Instruction.h"
40#include "llvm/IR/Intrinsics.h"
41#include "llvm/IR/Module.h"
42#include "llvm/IR/Operator.h"
44#include "llvm/IR/User.h"
50#include "llvm/Transforms/IPO.h"
54#include <algorithm>
55#include <cassert>
56#include <cstdint>
57#include <memory>
58#include <tuple>
59#include <vector>
60
61using namespace llvm;
62
63#define DEBUG_TYPE "partial-inlining"
64
65STATISTIC(NumPartialInlined,
66 "Number of callsites functions partially inlined into.");
67STATISTIC(NumColdOutlinePartialInlined, "Number of times functions with "
68 "cold outlined regions were partially "
69 "inlined into its caller(s).");
70STATISTIC(NumColdRegionsFound,
71 "Number of cold single entry/exit regions found.");
72STATISTIC(NumColdRegionsOutlined,
73 "Number of cold single entry/exit regions outlined.");
74
75// Command line option to disable partial-inlining. The default is false:
76static cl::opt<bool>
77 DisablePartialInlining("disable-partial-inlining", cl::init(false),
78 cl::Hidden, cl::desc("Disable partial inlining"));
79// Command line option to disable multi-region partial-inlining. The default is
80// false:
82 "disable-mr-partial-inlining", cl::init(false), cl::Hidden,
83 cl::desc("Disable multi-region partial inlining"));
84
85// Command line option to force outlining in regions with live exit variables.
86// The default is false:
87static cl::opt<bool>
88 ForceLiveExit("pi-force-live-exit-outline", cl::init(false), cl::Hidden,
89 cl::desc("Force outline regions with live exits"));
90
91// Command line option to enable marking outline functions with Cold Calling
92// Convention. The default is false:
93static cl::opt<bool>
94 MarkOutlinedColdCC("pi-mark-coldcc", cl::init(false), cl::Hidden,
95 cl::desc("Mark outline function calls with ColdCC"));
96
97// This is an option used by testing:
98static cl::opt<bool> SkipCostAnalysis("skip-partial-inlining-cost-analysis",
99
101 cl::desc("Skip Cost Analysis"));
102// Used to determine if a cold region is worth outlining based on
103// its inlining cost compared to the original function. Default is set at 10%.
104// ie. if the cold region reduces the inlining cost of the original function by
105// at least 10%.
107 "min-region-size-ratio", cl::init(0.1), cl::Hidden,
108 cl::desc("Minimum ratio comparing relative sizes of each "
109 "outline candidate and original function"));
110// Used to tune the minimum number of execution counts needed in the predecessor
111// block to the cold edge. ie. confidence interval.
113 MinBlockCounterExecution("min-block-execution", cl::init(100), cl::Hidden,
114 cl::desc("Minimum block executions to consider "
115 "its BranchProbabilityInfo valid"));
116// Used to determine when an edge is considered cold. Default is set to 10%. ie.
117// if the branch probability is 10% or less, then it is deemed as 'cold'.
119 "cold-branch-ratio", cl::init(0.1), cl::Hidden,
120 cl::desc("Minimum BranchProbability to consider a region cold."));
121
123 "max-num-inline-blocks", cl::init(5), cl::Hidden,
124 cl::desc("Max number of blocks to be partially inlined"));
125
126// Command line option to set the maximum number of partial inlining allowed
127// for the module. The default value of -1 means no limit.
129 "max-partial-inlining", cl::init(-1), cl::Hidden,
130 cl::desc("Max number of partial inlining. The default is unlimited"));
131
132// Used only when PGO or user annotated branch data is absent. It is
133// the least value that is used to weigh the outline region. If BFI
134// produces larger value, the BFI value will be used.
135static cl::opt<int>
136 OutlineRegionFreqPercent("outline-region-freq-percent", cl::init(75),
138 cl::desc("Relative frequency of outline region to "
139 "the entry block"));
140
142 "partial-inlining-extra-penalty", cl::init(0), cl::Hidden,
143 cl::desc("A debug option to add additional penalty to the computed one."));
144
145namespace {
146
147struct FunctionOutliningInfo {
148 FunctionOutliningInfo() = default;
149
150 // Returns the number of blocks to be inlined including all blocks
151 // in Entries and one return block.
152 unsigned getNumInlinedBlocks() const { return Entries.size() + 1; }
153
154 // A set of blocks including the function entry that guard
155 // the region to be outlined.
157
158 // The return block that is not included in the outlined region.
159 BasicBlock *ReturnBlock = nullptr;
160
161 // The dominating block of the region to be outlined.
162 BasicBlock *NonReturnBlock = nullptr;
163
164 // The set of blocks in Entries that are predecessors to ReturnBlock
165 SmallVector<BasicBlock *, 4> ReturnBlockPreds;
166};
167
168struct FunctionOutliningMultiRegionInfo {
169 FunctionOutliningMultiRegionInfo() = default;
170
171 // Container for outline regions
172 struct OutlineRegionInfo {
173 OutlineRegionInfo(ArrayRef<BasicBlock *> Region, BasicBlock *EntryBlock,
174 BasicBlock *ExitBlock, BasicBlock *ReturnBlock)
175 : Region(Region), EntryBlock(EntryBlock), ExitBlock(ExitBlock),
176 ReturnBlock(ReturnBlock) {}
177 SmallVector<BasicBlock *, 8> Region;
178 BasicBlock *EntryBlock;
179 BasicBlock *ExitBlock;
180 BasicBlock *ReturnBlock;
181 };
182
184};
185
186struct PartialInlinerImpl {
187
188 PartialInlinerImpl(
189 function_ref<AssumptionCache &(Function &)> GetAC,
190 function_ref<AssumptionCache *(Function &)> LookupAC,
191 function_ref<TargetTransformInfo &(Function &)> GTTI,
192 function_ref<const TargetLibraryInfo &(Function &)> GTLI,
193 ProfileSummaryInfo &ProfSI,
194 function_ref<BlockFrequencyInfo &(Function &)> GBFI = nullptr)
195 : GetAssumptionCache(GetAC), LookupAssumptionCache(LookupAC),
196 GetTTI(GTTI), GetBFI(GBFI), GetTLI(GTLI), PSI(ProfSI) {}
197
198 bool run(Module &M);
199 // Main part of the transformation that calls helper functions to find
200 // outlining candidates, clone & outline the function, and attempt to
201 // partially inline the resulting function. Returns true if
202 // inlining was successful, false otherwise. Also returns the outline
203 // function (only if we partially inlined early returns) as there is a
204 // possibility to further "peel" early return statements that were left in the
205 // outline function due to code size.
206 std::pair<bool, Function *> unswitchFunction(Function &F);
207
208 // This class speculatively clones the function to be partial inlined.
209 // At the end of partial inlining, the remaining callsites to the cloned
210 // function that are not partially inlined will be fixed up to reference
211 // the original function, and the cloned function will be erased.
212 struct FunctionCloner {
213 // Two constructors, one for single region outlining, the other for
214 // multi-region outlining.
215 FunctionCloner(Function *F, FunctionOutliningInfo *OI,
216 OptimizationRemarkEmitter &ORE,
217 function_ref<AssumptionCache *(Function &)> LookupAC,
218 function_ref<TargetTransformInfo &(Function &)> GetTTI);
219 FunctionCloner(Function *F, FunctionOutliningMultiRegionInfo *OMRI,
220 OptimizationRemarkEmitter &ORE,
221 function_ref<AssumptionCache *(Function &)> LookupAC,
222 function_ref<TargetTransformInfo &(Function &)> GetTTI);
223
224 ~FunctionCloner();
225
226 // Prepare for function outlining: making sure there is only
227 // one incoming edge from the extracted/outlined region to
228 // the return block.
229 void normalizeReturnBlock() const;
230
231 // Do function outlining for cold regions.
232 bool doMultiRegionFunctionOutlining();
233 // Do function outlining for region after early return block(s).
234 // NOTE: For vararg functions that do the vararg handling in the outlined
235 // function, we temporarily generate IR that does not properly
236 // forward varargs to the outlined function. Calling InlineFunction
237 // will update calls to the outlined functions to properly forward
238 // the varargs.
239 Function *doSingleRegionFunctionOutlining();
240
241 Function *OrigFunc = nullptr;
242 Function *ClonedFunc = nullptr;
243
244 typedef std::pair<Function *, BasicBlock *> FuncBodyCallerPair;
245 // Keep track of Outlined Functions and the basic block they're called from.
246 SmallVector<FuncBodyCallerPair, 4> OutlinedFunctions;
247
248 // ClonedFunc is inlined in one of its callers after function
249 // outlining.
250 bool IsFunctionInlined = false;
251 // The cost of the region to be outlined.
252 InstructionCost OutlinedRegionCost = 0;
253 // ClonedOI is specific to outlining non-early return blocks.
254 std::unique_ptr<FunctionOutliningInfo> ClonedOI = nullptr;
255 // ClonedOMRI is specific to outlining cold regions.
256 std::unique_ptr<FunctionOutliningMultiRegionInfo> ClonedOMRI = nullptr;
257 std::unique_ptr<BlockFrequencyInfo> ClonedFuncBFI = nullptr;
258 OptimizationRemarkEmitter &ORE;
259 function_ref<AssumptionCache *(Function &)> LookupAC;
260 function_ref<TargetTransformInfo &(Function &)> GetTTI;
261 };
262
263private:
264 int NumPartialInlining = 0;
265 function_ref<AssumptionCache &(Function &)> GetAssumptionCache;
266 function_ref<AssumptionCache *(Function &)> LookupAssumptionCache;
267 function_ref<TargetTransformInfo &(Function &)> GetTTI;
268 function_ref<BlockFrequencyInfo &(Function &)> GetBFI;
269 function_ref<const TargetLibraryInfo &(Function &)> GetTLI;
270 ProfileSummaryInfo &PSI;
271
272 // Return the frequency of the OutlininingBB relative to F's entry point.
273 // The result is no larger than 1 and is represented using BP.
274 // (Note that the outlined region's 'head' block can only have incoming
275 // edges from the guarding entry blocks).
276 BranchProbability
277 getOutliningCallBBRelativeFreq(FunctionCloner &Cloner) const;
278
279 // Return true if the callee of CB should be partially inlined with
280 // profit.
281 bool shouldPartialInline(CallBase &CB, FunctionCloner &Cloner,
282 BlockFrequency WeightedOutliningRcost,
283 OptimizationRemarkEmitter &ORE) const;
284
285 // Try to inline DuplicateFunction (cloned from F with call to
286 // the OutlinedFunction into its callers. Return true
287 // if there is any successful inlining.
288 bool tryPartialInline(FunctionCloner &Cloner);
289
290 // Compute the mapping from use site of DuplicationFunction to the enclosing
291 // BB's profile count.
292 void
293 computeCallsiteToProfCountMap(Function *DuplicateFunction,
294 DenseMap<User *, uint64_t> &SiteCountMap) const;
295
296 bool isLimitReached() const {
297 return (MaxNumPartialInlining != -1 &&
298 NumPartialInlining >= MaxNumPartialInlining);
299 }
300
301 static CallBase *getSupportedCallBase(User *U) {
302 if (isa<CallInst>(U) || isa<InvokeInst>(U))
303 return cast<CallBase>(U);
304 llvm_unreachable("All uses must be calls");
305 return nullptr;
306 }
307
308 static CallBase *getOneCallSiteTo(Function &F) {
309 User *User = *F.user_begin();
310 return getSupportedCallBase(User);
311 }
312
313 std::tuple<DebugLoc, BasicBlock *> getOneDebugLoc(Function &F) const {
314 CallBase *CB = getOneCallSiteTo(F);
315 DebugLoc DLoc = CB->getDebugLoc();
316 BasicBlock *Block = CB->getParent();
317 return std::make_tuple(DLoc, Block);
318 }
319
320 // Returns the costs associated with function outlining:
321 // - The first value is the non-weighted runtime cost for making the call
322 // to the outlined function, including the addtional setup cost in the
323 // outlined function itself;
324 // - The second value is the estimated size of the new call sequence in
325 // basic block Cloner.OutliningCallBB;
326 std::tuple<InstructionCost, InstructionCost>
327 computeOutliningCosts(FunctionCloner &Cloner) const;
328
329 // Compute the 'InlineCost' of block BB. InlineCost is a proxy used to
330 // approximate both the size and runtime cost (Note that in the current
331 // inline cost analysis, there is no clear distinction there either).
332 static InstructionCost computeBBInlineCost(BasicBlock *BB,
333 TargetTransformInfo *TTI);
334
335 std::unique_ptr<FunctionOutliningInfo>
336 computeOutliningInfo(Function &F) const;
337
338 std::unique_ptr<FunctionOutliningMultiRegionInfo>
339 computeOutliningColdRegionsInfo(Function &F,
340 OptimizationRemarkEmitter &ORE) const;
341};
342
343} // end anonymous namespace
344
345std::unique_ptr<FunctionOutliningMultiRegionInfo>
346PartialInlinerImpl::computeOutliningColdRegionsInfo(
347 Function &F, OptimizationRemarkEmitter &ORE) const {
348 BasicBlock *EntryBlock = &F.front();
349
350 DominatorTree DT(F);
351 LoopInfo LI(DT);
352 BranchProbabilityInfo BPI(F, LI);
353 std::unique_ptr<BlockFrequencyInfo> ScopedBFI;
354 BlockFrequencyInfo *BFI;
355 if (!GetBFI) {
356 ScopedBFI.reset(new BlockFrequencyInfo(F, BPI, LI));
357 BFI = ScopedBFI.get();
358 } else
359 BFI = &(GetBFI(F));
360
361 // Return if we don't have profiling information.
362 if (!PSI.hasInstrumentationProfile())
363 return std::unique_ptr<FunctionOutliningMultiRegionInfo>();
364
365 std::unique_ptr<FunctionOutliningMultiRegionInfo> OutliningInfo =
366 std::make_unique<FunctionOutliningMultiRegionInfo>();
367
368 auto IsSingleExit =
369 [&ORE](SmallVectorImpl<BasicBlock *> &BlockList) -> BasicBlock * {
370 BasicBlock *ExitBlock = nullptr;
371 for (auto *Block : BlockList) {
372 for (BasicBlock *Succ : successors(Block)) {
373 if (!is_contained(BlockList, Succ)) {
374 if (ExitBlock) {
375 ORE.emit([&]() {
376 return OptimizationRemarkMissed(DEBUG_TYPE, "MultiExitRegion",
377 &Succ->front())
378 << "Region dominated by "
379 << ore::NV("Block", BlockList.front()->getName())
380 << " has more than one region exit edge.";
381 });
382 return nullptr;
383 }
384
385 ExitBlock = Block;
386 }
387 }
388 }
389 return ExitBlock;
390 };
391
392 auto BBProfileCount = [BFI](BasicBlock *BB) {
393 return BFI->getBlockProfileCount(BB).value_or(0);
394 };
395
396 // Use the same computeBBInlineCost function to compute the cost savings of
397 // the outlining the candidate region.
398 TargetTransformInfo *FTTI = &GetTTI(F);
399 InstructionCost OverallFunctionCost = 0;
400 for (auto &BB : F)
401 OverallFunctionCost += computeBBInlineCost(&BB, FTTI);
402
403 LLVM_DEBUG(dbgs() << "OverallFunctionCost = " << OverallFunctionCost
404 << "\n";);
405
406 InstructionCost MinOutlineRegionCost = OverallFunctionCost.map(
407 [&](auto Cost) { return Cost * MinRegionSizeRatio; });
408
409 BranchProbability MinBranchProbability(
410 static_cast<int>(ColdBranchRatio * MinBlockCounterExecution),
412 bool ColdCandidateFound = false;
413 BasicBlock *CurrEntry = EntryBlock;
414 std::vector<BasicBlock *> DFS;
415 SmallPtrSet<BasicBlock *, 8> VisitedSet;
416 DFS.push_back(CurrEntry);
417 VisitedSet.insert(CurrEntry);
418
419 // Use Depth First Search on the basic blocks to find CFG edges that are
420 // considered cold.
421 // Cold regions considered must also have its inline cost compared to the
422 // overall inline cost of the original function. The region is outlined only
423 // if it reduced the inline cost of the function by 'MinOutlineRegionCost' or
424 // more.
425 while (!DFS.empty()) {
426 auto *ThisBB = DFS.back();
427 DFS.pop_back();
428 // Only consider regions with predecessor blocks that are considered
429 // not-cold (default: part of the top 99.99% of all block counters)
430 // AND greater than our minimum block execution count (default: 100).
431 if (PSI.isColdBlock(ThisBB, BFI) ||
432 BBProfileCount(ThisBB) < MinBlockCounterExecution)
433 continue;
434 for (auto SI = succ_begin(ThisBB); SI != succ_end(ThisBB); ++SI) {
435 if (!VisitedSet.insert(*SI).second)
436 continue;
437 DFS.push_back(*SI);
438 // If branch isn't cold, we skip to the next one.
439 BranchProbability SuccProb = BPI.getEdgeProbability(ThisBB, *SI);
440 if (SuccProb > MinBranchProbability)
441 continue;
442
443 LLVM_DEBUG(dbgs() << "Found cold edge: " << ThisBB->getName() << "->"
444 << SI->getName()
445 << "\nBranch Probability = " << SuccProb << "\n";);
446
447 SmallVector<BasicBlock *, 8> DominateVector;
448 DT.getDescendants(*SI, DominateVector);
449 assert(!DominateVector.empty() &&
450 "SI should be reachable and have at least itself as descendant");
451
452 // We can only outline single entry regions (for now).
453 if (!DominateVector.front()->hasNPredecessors(1)) {
454 LLVM_DEBUG(dbgs() << "ABORT: Block " << SI->getName()
455 << " doesn't have a single predecessor in the "
456 "dominator tree\n";);
457 continue;
458 }
459
460 BasicBlock *ExitBlock = nullptr;
461 // We can only outline single exit regions (for now).
462 if (!(ExitBlock = IsSingleExit(DominateVector))) {
463 LLVM_DEBUG(dbgs() << "ABORT: Block " << SI->getName()
464 << " doesn't have a unique successor\n";);
465 continue;
466 }
467
468 InstructionCost OutlineRegionCost = 0;
469 for (auto *BB : DominateVector)
470 OutlineRegionCost += computeBBInlineCost(BB, &GetTTI(*BB->getParent()));
471
472 LLVM_DEBUG(dbgs() << "OutlineRegionCost = " << OutlineRegionCost
473 << "\n";);
474
475 if (!SkipCostAnalysis && OutlineRegionCost < MinOutlineRegionCost) {
476 ORE.emit([&]() {
477 return OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly",
478 &SI->front())
479 << ore::NV("Callee", &F)
480 << " inline cost-savings smaller than "
481 << ore::NV("Cost", MinOutlineRegionCost);
482 });
483
484 LLVM_DEBUG(dbgs() << "ABORT: Outline region cost is smaller than "
485 << MinOutlineRegionCost << "\n";);
486 continue;
487 }
488
489 // For now, ignore blocks that belong to a SISE region that is a
490 // candidate for outlining. In the future, we may want to look
491 // at inner regions because the outer region may have live-exit
492 // variables.
493 VisitedSet.insert_range(DominateVector);
494
495 // ReturnBlock here means the block after the outline call
496 BasicBlock *ReturnBlock = ExitBlock->getSingleSuccessor();
497 FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegInfo(
498 DominateVector, DominateVector.front(), ExitBlock, ReturnBlock);
499 OutliningInfo->ORI.push_back(RegInfo);
500 LLVM_DEBUG(dbgs() << "Found Cold Candidate starting at block: "
501 << DominateVector.front()->getName() << "\n";);
502 ColdCandidateFound = true;
503 NumColdRegionsFound++;
504 }
505 }
506
507 if (ColdCandidateFound)
508 return OutliningInfo;
509
510 return std::unique_ptr<FunctionOutliningMultiRegionInfo>();
511}
512
513std::unique_ptr<FunctionOutliningInfo>
514PartialInlinerImpl::computeOutliningInfo(Function &F) const {
515 BasicBlock *EntryBlock = &F.front();
516 CondBrInst *BR = dyn_cast<CondBrInst>(EntryBlock->getTerminator());
517 if (!BR)
518 return std::unique_ptr<FunctionOutliningInfo>();
519
520 // Returns true if Succ is BB's successor
521 auto IsSuccessor = [](BasicBlock *Succ, BasicBlock *BB) {
522 return is_contained(successors(BB), Succ);
523 };
524
525 auto IsReturnBlock = [](BasicBlock *BB) {
526 Instruction *TI = BB->getTerminator();
527 return isa<ReturnInst>(TI);
528 };
529
530 auto GetReturnBlock = [&](BasicBlock *Succ1, BasicBlock *Succ2) {
531 if (IsReturnBlock(Succ1))
532 return std::make_tuple(Succ1, Succ2);
533 if (IsReturnBlock(Succ2))
534 return std::make_tuple(Succ2, Succ1);
535
536 return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr);
537 };
538
539 // Detect a triangular shape:
540 auto GetCommonSucc = [&](BasicBlock *Succ1, BasicBlock *Succ2) {
541 if (IsSuccessor(Succ1, Succ2))
542 return std::make_tuple(Succ1, Succ2);
543 if (IsSuccessor(Succ2, Succ1))
544 return std::make_tuple(Succ2, Succ1);
545
546 return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr);
547 };
548
549 std::unique_ptr<FunctionOutliningInfo> OutliningInfo =
550 std::make_unique<FunctionOutliningInfo>();
551
552 BasicBlock *CurrEntry = EntryBlock;
553 bool CandidateFound = false;
554 do {
555 // The number of blocks to be inlined has already reached
556 // the limit. When MaxNumInlineBlocks is set to 0 or 1, this
557 // disables partial inlining for the function.
558 if (OutliningInfo->getNumInlinedBlocks() >= MaxNumInlineBlocks)
559 break;
560
561 if (succ_size(CurrEntry) != 2)
562 break;
563
564 BasicBlock *Succ1 = *succ_begin(CurrEntry);
565 BasicBlock *Succ2 = *(succ_begin(CurrEntry) + 1);
566
567 BasicBlock *ReturnBlock, *NonReturnBlock;
568 std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
569
570 if (ReturnBlock) {
571 OutliningInfo->Entries.push_back(CurrEntry);
572 OutliningInfo->ReturnBlock = ReturnBlock;
573 OutliningInfo->NonReturnBlock = NonReturnBlock;
574 CandidateFound = true;
575 break;
576 }
577
578 BasicBlock *CommSucc, *OtherSucc;
579 std::tie(CommSucc, OtherSucc) = GetCommonSucc(Succ1, Succ2);
580
581 if (!CommSucc)
582 break;
583
584 OutliningInfo->Entries.push_back(CurrEntry);
585 CurrEntry = OtherSucc;
586 } while (true);
587
588 if (!CandidateFound)
589 return std::unique_ptr<FunctionOutliningInfo>();
590
591 // There should not be any successors (not in the entry set) other than
592 // {ReturnBlock, NonReturnBlock}
593 assert(OutliningInfo->Entries[0] == &F.front() &&
594 "Function Entry must be the first in Entries vector");
595 DenseSet<BasicBlock *> Entries(llvm::from_range, OutliningInfo->Entries);
596
597 // Returns true of BB has Predecessor which is not
598 // in Entries set.
599 auto HasNonEntryPred = [Entries](BasicBlock *BB) {
600 for (auto *Pred : predecessors(BB)) {
601 if (!Entries.count(Pred))
602 return true;
603 }
604 return false;
605 };
606 auto CheckAndNormalizeCandidate =
607 [Entries, HasNonEntryPred](FunctionOutliningInfo *OutliningInfo) {
608 for (BasicBlock *E : OutliningInfo->Entries) {
609 for (auto *Succ : successors(E)) {
610 if (Entries.count(Succ))
611 continue;
612 if (Succ == OutliningInfo->ReturnBlock)
613 OutliningInfo->ReturnBlockPreds.push_back(E);
614 else if (Succ != OutliningInfo->NonReturnBlock)
615 return false;
616 }
617 // There should not be any outside incoming edges either:
618 if (HasNonEntryPred(E))
619 return false;
620 }
621 return true;
622 };
623
624 if (!CheckAndNormalizeCandidate(OutliningInfo.get()))
625 return std::unique_ptr<FunctionOutliningInfo>();
626
627 // Now further growing the candidate's inlining region by
628 // peeling off dominating blocks from the outlining region:
629 while (OutliningInfo->getNumInlinedBlocks() < MaxNumInlineBlocks) {
630 BasicBlock *Cand = OutliningInfo->NonReturnBlock;
631 if (succ_size(Cand) != 2)
632 break;
633
634 if (HasNonEntryPred(Cand))
635 break;
636
637 BasicBlock *Succ1 = *succ_begin(Cand);
638 BasicBlock *Succ2 = *(succ_begin(Cand) + 1);
639
640 BasicBlock *ReturnBlock, *NonReturnBlock;
641 std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
642 if (!ReturnBlock || ReturnBlock != OutliningInfo->ReturnBlock)
643 break;
644
645 if (NonReturnBlock->getSinglePredecessor() != Cand)
646 break;
647
648 // Now grow and update OutlininigInfo:
649 OutliningInfo->Entries.push_back(Cand);
650 OutliningInfo->NonReturnBlock = NonReturnBlock;
651 OutliningInfo->ReturnBlockPreds.push_back(Cand);
652 Entries.insert(Cand);
653 }
654
655 return OutliningInfo;
656}
657
658// Check if there is PGO data or user annotated branch data:
659static bool hasProfileData(const Function &F, const FunctionOutliningInfo &OI) {
660 if (F.hasProfileData())
661 return true;
662 // Now check if any of the entry block has MD_prof data:
663 for (auto *E : OI.Entries) {
664 CondBrInst *BR = dyn_cast<CondBrInst>(E->getTerminator());
665 if (BR && hasBranchWeightMD(*BR))
666 return true;
667 }
668 return false;
669}
670
671BranchProbability PartialInlinerImpl::getOutliningCallBBRelativeFreq(
672 FunctionCloner &Cloner) const {
673 BasicBlock *OutliningCallBB = Cloner.OutlinedFunctions.back().second;
674 auto EntryFreq =
675 Cloner.ClonedFuncBFI->getBlockFreq(&Cloner.ClonedFunc->getEntryBlock());
676 auto OutliningCallFreq =
677 Cloner.ClonedFuncBFI->getBlockFreq(OutliningCallBB);
678 // FIXME Hackery needed because ClonedFuncBFI is based on the function BEFORE
679 // we outlined any regions, so we may encounter situations where the
680 // OutliningCallFreq is *slightly* bigger than the EntryFreq.
681 if (OutliningCallFreq.getFrequency() > EntryFreq.getFrequency())
682 OutliningCallFreq = EntryFreq;
683
684 auto OutlineRegionRelFreq = BranchProbability::getBranchProbability(
685 OutliningCallFreq.getFrequency(), EntryFreq.getFrequency());
686
687 if (hasProfileData(*Cloner.OrigFunc, *Cloner.ClonedOI))
688 return OutlineRegionRelFreq;
689
690 // When profile data is not available, we need to be conservative in
691 // estimating the overall savings. Static branch prediction can usually
692 // guess the branch direction right (taken/non-taken), but the guessed
693 // branch probability is usually not biased enough. In case when the
694 // outlined region is predicted to be likely, its probability needs
695 // to be made higher (more biased) to not under-estimate the cost of
696 // function outlining. On the other hand, if the outlined region
697 // is predicted to be less likely, the predicted probablity is usually
698 // higher than the actual. For instance, the actual probability of the
699 // less likely target is only 5%, but the guessed probablity can be
700 // 40%. In the latter case, there is no need for further adjustment.
701 // FIXME: add an option for this.
702 if (OutlineRegionRelFreq < BranchProbability(45, 100))
703 return OutlineRegionRelFreq;
704
705 OutlineRegionRelFreq = std::max(
706 OutlineRegionRelFreq, BranchProbability(OutlineRegionFreqPercent, 100));
707
708 return OutlineRegionRelFreq;
709}
710
711bool PartialInlinerImpl::shouldPartialInline(
712 CallBase &CB, FunctionCloner &Cloner, BlockFrequency WeightedOutliningRcost,
713 OptimizationRemarkEmitter &ORE) const {
714 using namespace ore;
715
717 assert(Callee == Cloner.ClonedFunc);
718
720 return isInlineViable(*Callee).isSuccess();
721
722 Function *Caller = CB.getCaller();
723 auto &CalleeTTI = GetTTI(*Callee);
724 bool RemarksEnabled =
725 Callee->getContext().getDiagHandlerPtr()->isMissedOptRemarkEnabled(
726 DEBUG_TYPE);
727 InlineCost IC =
728 getInlineCost(CB, getInlineParams(), CalleeTTI, GetAssumptionCache,
729 GetTLI, GetBFI, &PSI, RemarksEnabled ? &ORE : nullptr);
730
731 if (IC.isAlways()) {
732 ORE.emit([&]() {
733 return OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", &CB)
734 << NV("Callee", Cloner.OrigFunc)
735 << " should always be fully inlined, not partially";
736 });
737 return false;
738 }
739
740 if (IC.isNever()) {
741 ORE.emit([&]() {
742 return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", &CB)
743 << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
744 << NV("Caller", Caller)
745 << " because it should never be inlined (cost=never)";
746 });
747 return false;
748 }
749
750 if (!IC) {
751 ORE.emit([&]() {
752 return OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly", &CB)
753 << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
754 << NV("Caller", Caller) << " because too costly to inline (cost="
755 << NV("Cost", IC.getCost()) << ", threshold="
756 << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")";
757 });
758 return false;
759 }
760 const DataLayout &DL = Caller->getDataLayout();
761
762 // The savings of eliminating the call:
763 int NonWeightedSavings = getCallsiteCost(CalleeTTI, CB, DL);
764 BlockFrequency NormWeightedSavings(NonWeightedSavings);
765
766 // Weighted saving is smaller than weighted cost, return false
767 if (NormWeightedSavings < WeightedOutliningRcost) {
768 ORE.emit([&]() {
769 return OptimizationRemarkAnalysis(DEBUG_TYPE, "OutliningCallcostTooHigh",
770 &CB)
771 << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
772 << NV("Caller", Caller) << " runtime overhead (overhead="
773 << NV("Overhead", (unsigned)WeightedOutliningRcost.getFrequency())
774 << ", savings="
775 << NV("Savings", (unsigned)NormWeightedSavings.getFrequency())
776 << ")"
777 << " of making the outlined call is too high";
778 });
779
780 return false;
781 }
782
783 ORE.emit([&]() {
784 return OptimizationRemarkAnalysis(DEBUG_TYPE, "CanBePartiallyInlined", &CB)
785 << NV("Callee", Cloner.OrigFunc) << " can be partially inlined into "
786 << NV("Caller", Caller) << " with cost=" << NV("Cost", IC.getCost())
787 << " (threshold="
788 << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")";
789 });
790 return true;
791}
792
793// TODO: Ideally we should share Inliner's InlineCost Analysis code.
794// For now use a simplified version. The returned 'InlineCost' will be used
795// to esimate the size cost as well as runtime cost of the BB.
797PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB,
798 TargetTransformInfo *TTI) {
799 InstructionCost InlineCost = 0;
800 const DataLayout &DL = BB->getDataLayout();
802 for (Instruction &I : *BB) {
803 // Skip free instructions.
804 switch (I.getOpcode()) {
805 case Instruction::BitCast:
806 case Instruction::PtrToInt:
807 case Instruction::IntToPtr:
808 case Instruction::Alloca:
809 case Instruction::PHI:
810 continue;
811 case Instruction::GetElementPtr:
812 if (cast<GetElementPtrInst>(&I)->hasAllZeroIndices())
813 continue;
814 break;
815 default:
816 break;
817 }
818
819 if (I.isLifetimeStartOrEnd())
820 continue;
821
822 if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
823 Intrinsic::ID IID = II->getIntrinsicID();
825 FastMathFlags FMF;
826 for (Value *Val : II->args())
827 Tys.push_back(Val->getType());
828
829 if (auto *FPMO = dyn_cast<FPMathOperator>(II))
830 FMF = FPMO->getFastMathFlags();
831
832 IntrinsicCostAttributes ICA(IID, II->getType(), Tys, FMF);
834 continue;
835 }
836
837 if (CallInst *CI = dyn_cast<CallInst>(&I)) {
838 InlineCost += getCallsiteCost(*TTI, *CI, DL);
839 continue;
840 }
841
842 if (InvokeInst *II = dyn_cast<InvokeInst>(&I)) {
843 InlineCost += getCallsiteCost(*TTI, *II, DL);
844 continue;
845 }
846
847 if (SwitchInst *SI = dyn_cast<SwitchInst>(&I)) {
848 InlineCost += (SI->getNumCases() + 1) * InstrCost;
849 continue;
850 }
851 InlineCost += InstrCost;
852 }
853
854 return InlineCost;
855}
856
857std::tuple<InstructionCost, InstructionCost>
858PartialInlinerImpl::computeOutliningCosts(FunctionCloner &Cloner) const {
859 InstructionCost OutliningFuncCallCost = 0, OutlinedFunctionCost = 0;
860 for (auto FuncBBPair : Cloner.OutlinedFunctions) {
861 Function *OutlinedFunc = FuncBBPair.first;
862 BasicBlock* OutliningCallBB = FuncBBPair.second;
863 // Now compute the cost of the call sequence to the outlined function
864 // 'OutlinedFunction' in BB 'OutliningCallBB':
865 auto *OutlinedFuncTTI = &GetTTI(*OutlinedFunc);
866 OutliningFuncCallCost +=
867 computeBBInlineCost(OutliningCallBB, OutlinedFuncTTI);
868
869 // Now compute the cost of the extracted/outlined function itself:
870 for (BasicBlock &BB : *OutlinedFunc)
871 OutlinedFunctionCost += computeBBInlineCost(&BB, OutlinedFuncTTI);
872 }
873 assert(OutlinedFunctionCost >= Cloner.OutlinedRegionCost &&
874 "Outlined function cost should be no less than the outlined region");
875
876 // The code extractor introduces a new root and exit stub blocks with
877 // additional unconditional branches. Those branches will be eliminated
878 // later with bb layout. The cost should be adjusted accordingly:
879 OutlinedFunctionCost -=
880 2 * InlineConstants::getInstrCost() * Cloner.OutlinedFunctions.size();
881
882 InstructionCost OutliningRuntimeOverhead =
883 OutliningFuncCallCost +
884 (OutlinedFunctionCost - Cloner.OutlinedRegionCost) +
886
887 return std::make_tuple(OutliningFuncCallCost, OutliningRuntimeOverhead);
888}
889
890// Create the callsite to profile count map which is
891// used to update the original function's entry count,
892// after the function is partially inlined into the callsite.
893void PartialInlinerImpl::computeCallsiteToProfCountMap(
894 Function *DuplicateFunction,
895 DenseMap<User *, uint64_t> &CallSiteToProfCountMap) const {
896 std::vector<User *> Users(DuplicateFunction->user_begin(),
897 DuplicateFunction->user_end());
898 Function *CurrentCaller = nullptr;
899 std::unique_ptr<BlockFrequencyInfo> TempBFI;
900 BlockFrequencyInfo *CurrentCallerBFI = nullptr;
901
902 auto ComputeCurrBFI = [&,this](Function *Caller) {
903 // For the old pass manager:
904 if (!GetBFI) {
905 DominatorTree DT(*Caller);
906 LoopInfo LI(DT);
907 BranchProbabilityInfo BPI(*Caller, LI);
908 TempBFI.reset(new BlockFrequencyInfo(*Caller, BPI, LI));
909 CurrentCallerBFI = TempBFI.get();
910 } else {
911 // New pass manager:
912 CurrentCallerBFI = &(GetBFI(*Caller));
913 }
914 };
915
916 for (User *User : Users) {
917 CallBase *CB = getSupportedCallBase(User);
918 Function *Caller = CB->getCaller();
919 if (CurrentCaller != Caller) {
920 CurrentCaller = Caller;
921 ComputeCurrBFI(Caller);
922 } else {
923 assert(CurrentCallerBFI && "CallerBFI is not set");
924 }
925 BasicBlock *CallBB = CB->getParent();
926 auto Count = CurrentCallerBFI->getBlockProfileCount(CallBB);
927 if (Count)
928 CallSiteToProfCountMap[User] = *Count;
929 else
930 CallSiteToProfCountMap[User] = 0;
931 }
932}
933
934PartialInlinerImpl::FunctionCloner::FunctionCloner(
935 Function *F, FunctionOutliningInfo *OI, OptimizationRemarkEmitter &ORE,
936 function_ref<AssumptionCache *(Function &)> LookupAC,
937 function_ref<TargetTransformInfo &(Function &)> GetTTI)
938 : OrigFunc(F), ORE(ORE), LookupAC(LookupAC), GetTTI(GetTTI) {
939 ClonedOI = std::make_unique<FunctionOutliningInfo>();
940
941 // Clone the function, so that we can hack away on it.
943 ClonedFunc = CloneFunction(F, VMap);
944
945 ClonedOI->ReturnBlock = cast<BasicBlock>(VMap[OI->ReturnBlock]);
946 ClonedOI->NonReturnBlock = cast<BasicBlock>(VMap[OI->NonReturnBlock]);
947 for (BasicBlock *BB : OI->Entries)
948 ClonedOI->Entries.push_back(cast<BasicBlock>(VMap[BB]));
949
950 for (BasicBlock *E : OI->ReturnBlockPreds) {
951 BasicBlock *NewE = cast<BasicBlock>(VMap[E]);
952 ClonedOI->ReturnBlockPreds.push_back(NewE);
953 }
954 // Go ahead and update all uses to the duplicate, so that we can just
955 // use the inliner functionality when we're done hacking.
956 F->replaceAllUsesWith(ClonedFunc);
957}
958
959PartialInlinerImpl::FunctionCloner::FunctionCloner(
960 Function *F, FunctionOutliningMultiRegionInfo *OI,
964 : OrigFunc(F), ORE(ORE), LookupAC(LookupAC), GetTTI(GetTTI) {
965 ClonedOMRI = std::make_unique<FunctionOutliningMultiRegionInfo>();
966
967 // Clone the function, so that we can hack away on it.
969 ClonedFunc = CloneFunction(F, VMap);
970
971 // Go through all Outline Candidate Regions and update all BasicBlock
972 // information.
973 for (const FunctionOutliningMultiRegionInfo::OutlineRegionInfo &RegionInfo :
974 OI->ORI) {
976 for (BasicBlock *BB : RegionInfo.Region)
977 Region.push_back(cast<BasicBlock>(VMap[BB]));
978
979 BasicBlock *NewEntryBlock = cast<BasicBlock>(VMap[RegionInfo.EntryBlock]);
980 BasicBlock *NewExitBlock = cast<BasicBlock>(VMap[RegionInfo.ExitBlock]);
981 BasicBlock *NewReturnBlock = nullptr;
982 if (RegionInfo.ReturnBlock)
983 NewReturnBlock = cast<BasicBlock>(VMap[RegionInfo.ReturnBlock]);
984 FunctionOutliningMultiRegionInfo::OutlineRegionInfo MappedRegionInfo(
985 Region, NewEntryBlock, NewExitBlock, NewReturnBlock);
986 ClonedOMRI->ORI.push_back(MappedRegionInfo);
987 }
988 // Go ahead and update all uses to the duplicate, so that we can just
989 // use the inliner functionality when we're done hacking.
990 F->replaceAllUsesWith(ClonedFunc);
991}
992
993void PartialInlinerImpl::FunctionCloner::normalizeReturnBlock() const {
994 auto GetFirstPHI = [](BasicBlock *BB) {
995 BasicBlock::iterator I = BB->begin();
996 PHINode *FirstPhi = nullptr;
997 while (I != BB->end()) {
998 PHINode *Phi = dyn_cast<PHINode>(I);
999 if (!Phi)
1000 break;
1001 if (!FirstPhi) {
1002 FirstPhi = Phi;
1003 break;
1004 }
1005 }
1006 return FirstPhi;
1007 };
1008
1009 // Shouldn't need to normalize PHIs if we're not outlining non-early return
1010 // blocks.
1011 if (!ClonedOI)
1012 return;
1013
1014 // Special hackery is needed with PHI nodes that have inputs from more than
1015 // one extracted block. For simplicity, just split the PHIs into a two-level
1016 // sequence of PHIs, some of which will go in the extracted region, and some
1017 // of which will go outside.
1018 BasicBlock *PreReturn = ClonedOI->ReturnBlock;
1019 // only split block when necessary:
1020 PHINode *FirstPhi = GetFirstPHI(PreReturn);
1021 unsigned NumPredsFromEntries = ClonedOI->ReturnBlockPreds.size();
1022
1023 if (!FirstPhi || FirstPhi->getNumIncomingValues() <= NumPredsFromEntries + 1)
1024 return;
1025
1026 auto IsTrivialPhi = [](PHINode *PN) -> Value * {
1027 if (llvm::all_equal(PN->incoming_values()))
1028 return PN->getIncomingValue(0);
1029 return nullptr;
1030 };
1031
1032 ClonedOI->ReturnBlock = ClonedOI->ReturnBlock->splitBasicBlock(
1033 ClonedOI->ReturnBlock->getFirstNonPHIIt());
1034 BasicBlock::iterator I = PreReturn->begin();
1035 BasicBlock::iterator Ins = ClonedOI->ReturnBlock->begin();
1036 SmallVector<Instruction *, 4> DeadPhis;
1037 while (I != PreReturn->end()) {
1038 PHINode *OldPhi = dyn_cast<PHINode>(I);
1039 if (!OldPhi)
1040 break;
1041
1042 PHINode *RetPhi =
1043 PHINode::Create(OldPhi->getType(), NumPredsFromEntries + 1, "");
1044 RetPhi->insertBefore(Ins);
1045 OldPhi->replaceAllUsesWith(RetPhi);
1046 Ins = ClonedOI->ReturnBlock->getFirstNonPHIIt();
1047
1048 RetPhi->addIncoming(&*I, PreReturn);
1049 for (BasicBlock *E : ClonedOI->ReturnBlockPreds) {
1050 RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(E), E);
1051 OldPhi->removeIncomingValue(E);
1052 }
1053
1054 // After incoming values splitting, the old phi may become trivial.
1055 // Keeping the trivial phi can introduce definition inside the outline
1056 // region which is live-out, causing necessary overhead (load, store
1057 // arg passing etc).
1058 if (auto *OldPhiVal = IsTrivialPhi(OldPhi)) {
1059 OldPhi->replaceAllUsesWith(OldPhiVal);
1060 DeadPhis.push_back(OldPhi);
1061 }
1062 ++I;
1063 }
1064 for (auto *DP : DeadPhis)
1065 DP->eraseFromParent();
1066
1067 for (auto *E : ClonedOI->ReturnBlockPreds)
1068 E->getTerminator()->replaceUsesOfWith(PreReturn, ClonedOI->ReturnBlock);
1069}
1070
1071bool PartialInlinerImpl::FunctionCloner::doMultiRegionFunctionOutlining() {
1072
1073 auto ComputeRegionCost =
1074 [&](SmallVectorImpl<BasicBlock *> &Region) -> InstructionCost {
1076 for (BasicBlock* BB : Region)
1077 Cost += computeBBInlineCost(BB, &GetTTI(*BB->getParent()));
1078 return Cost;
1079 };
1080
1081 assert(ClonedOMRI && "Expecting OutlineInfo for multi region outline");
1082
1083 if (ClonedOMRI->ORI.empty())
1084 return false;
1085
1086 // The CodeExtractor needs a dominator tree.
1087 DominatorTree DT;
1088 DT.recalculate(*ClonedFunc);
1089
1090 // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo.
1091 LoopInfo LI(DT);
1092 BranchProbabilityInfo BPI(*ClonedFunc, LI);
1093 ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI));
1094
1095 // Cache and recycle the CodeExtractor analysis to avoid O(n^2) compile-time.
1096 CodeExtractorAnalysisCache CEAC(*ClonedFunc);
1097
1098 SetVector<Value *> Inputs, Outputs, Sinks;
1099 for (FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegionInfo :
1100 ClonedOMRI->ORI) {
1101 InstructionCost CurrentOutlinedRegionCost =
1102 ComputeRegionCost(RegionInfo.Region);
1103
1104 CodeExtractor CE(RegionInfo.Region, &DT, /*AggregateArgs*/ false,
1105 ClonedFuncBFI.get(), &BPI,
1106 LookupAC(*RegionInfo.EntryBlock->getParent()),
1107 /* AllowVarargs */ false);
1108
1109 CE.findInputsOutputs(Inputs, Outputs, Sinks);
1110
1111 LLVM_DEBUG({
1112 dbgs() << "inputs: " << Inputs.size() << "\n";
1113 dbgs() << "outputs: " << Outputs.size() << "\n";
1114 for (Value *value : Inputs)
1115 dbgs() << "value used in func: " << *value << "\n";
1116 for (Value *output : Outputs)
1117 dbgs() << "instr used in func: " << *output << "\n";
1118 });
1119
1120 // Do not extract regions that have live exit variables.
1121 if (Outputs.size() > 0 && !ForceLiveExit)
1122 continue;
1123
1124 if (Function *OutlinedFunc = CE.extractCodeRegion(CEAC)) {
1125 CallBase *OCS = PartialInlinerImpl::getOneCallSiteTo(*OutlinedFunc);
1126 BasicBlock *OutliningCallBB = OCS->getParent();
1127 assert(OutliningCallBB->getParent() == ClonedFunc);
1128 OutlinedFunctions.push_back(std::make_pair(OutlinedFunc,OutliningCallBB));
1129 NumColdRegionsOutlined++;
1130 OutlinedRegionCost += CurrentOutlinedRegionCost;
1131
1132 if (MarkOutlinedColdCC) {
1133 OutlinedFunc->setCallingConv(CallingConv::Cold);
1134 OCS->setCallingConv(CallingConv::Cold);
1135 }
1136 } else
1137 ORE.emit([&]() {
1138 return OptimizationRemarkMissed(DEBUG_TYPE, "ExtractFailed",
1139 &RegionInfo.Region.front()->front())
1140 << "Failed to extract region at block "
1141 << ore::NV("Block", RegionInfo.Region.front());
1142 });
1143 }
1144
1145 return !OutlinedFunctions.empty();
1146}
1147
1148Function *
1149PartialInlinerImpl::FunctionCloner::doSingleRegionFunctionOutlining() {
1150 // Returns true if the block is to be partial inlined into the caller
1151 // (i.e. not to be extracted to the out of line function)
1152 auto ToBeInlined = [&, this](BasicBlock *BB) {
1153 return BB == ClonedOI->ReturnBlock ||
1154 llvm::is_contained(ClonedOI->Entries, BB);
1155 };
1156
1157 assert(ClonedOI && "Expecting OutlineInfo for single region outline");
1158 // The CodeExtractor needs a dominator tree.
1159 DominatorTree DT;
1160 DT.recalculate(*ClonedFunc);
1161
1162 // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo.
1163 LoopInfo LI(DT);
1164 BranchProbabilityInfo BPI(*ClonedFunc, LI);
1165 ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI));
1166
1167 // Gather up the blocks that we're going to extract.
1168 std::vector<BasicBlock *> ToExtract;
1169 auto *ClonedFuncTTI = &GetTTI(*ClonedFunc);
1170 ToExtract.push_back(ClonedOI->NonReturnBlock);
1171 OutlinedRegionCost += PartialInlinerImpl::computeBBInlineCost(
1172 ClonedOI->NonReturnBlock, ClonedFuncTTI);
1173 for (BasicBlock *BB : depth_first(&ClonedFunc->getEntryBlock()))
1174 if (!ToBeInlined(BB) && BB != ClonedOI->NonReturnBlock) {
1175 ToExtract.push_back(BB);
1176 // FIXME: the code extractor may hoist/sink more code
1177 // into the outlined function which may make the outlining
1178 // overhead (the difference of the outlined function cost
1179 // and OutliningRegionCost) look larger.
1180 OutlinedRegionCost += computeBBInlineCost(BB, ClonedFuncTTI);
1181 }
1182
1183 // Extract the body of the if.
1184 CodeExtractorAnalysisCache CEAC(*ClonedFunc);
1185 Function *OutlinedFunc =
1186 CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false,
1187 ClonedFuncBFI.get(), &BPI, LookupAC(*ClonedFunc),
1188 /* AllowVarargs */ true)
1189 .extractCodeRegion(CEAC);
1190
1191 if (OutlinedFunc) {
1192 BasicBlock *OutliningCallBB =
1193 PartialInlinerImpl::getOneCallSiteTo(*OutlinedFunc)->getParent();
1194 assert(OutliningCallBB->getParent() == ClonedFunc);
1195 OutlinedFunctions.push_back(std::make_pair(OutlinedFunc, OutliningCallBB));
1196 } else
1197 ORE.emit([&]() {
1198 return OptimizationRemarkMissed(DEBUG_TYPE, "ExtractFailed",
1199 &ToExtract.front()->front())
1200 << "Failed to extract region at block "
1201 << ore::NV("Block", ToExtract.front());
1202 });
1203
1204 return OutlinedFunc;
1205}
1206
1207PartialInlinerImpl::FunctionCloner::~FunctionCloner() {
1208 // Ditch the duplicate, since we're done with it, and rewrite all remaining
1209 // users (function pointers, etc.) back to the original function.
1210 ClonedFunc->replaceAllUsesWith(OrigFunc);
1211 ClonedFunc->eraseFromParent();
1212 if (!IsFunctionInlined) {
1213 // Remove each function that was speculatively created if there is no
1214 // reference.
1215 for (auto FuncBBPair : OutlinedFunctions) {
1216 Function *Func = FuncBBPair.first;
1217 Func->eraseFromParent();
1218 }
1219 }
1220}
1221
1222std::pair<bool, Function *> PartialInlinerImpl::unswitchFunction(Function &F) {
1223 if (F.hasAddressTaken())
1224 return {false, nullptr};
1225
1226 // Let inliner handle it
1227 if (F.hasFnAttribute(Attribute::AlwaysInline))
1228 return {false, nullptr};
1229
1230 if (F.hasFnAttribute(Attribute::NoInline))
1231 return {false, nullptr};
1232
1233 if (PSI.isFunctionEntryCold(&F))
1234 return {false, nullptr};
1235
1236 if (F.users().empty())
1237 return {false, nullptr};
1238
1239 OptimizationRemarkEmitter ORE(&F);
1240
1241 // Only try to outline cold regions if we have a profile summary, which
1242 // implies we have profiling information.
1243 if (PSI.hasProfileSummary() && F.hasProfileData() &&
1245 std::unique_ptr<FunctionOutliningMultiRegionInfo> OMRI =
1246 computeOutliningColdRegionsInfo(F, ORE);
1247 if (OMRI) {
1248 FunctionCloner Cloner(&F, OMRI.get(), ORE, LookupAssumptionCache, GetTTI);
1249
1250 LLVM_DEBUG({
1251 dbgs() << "HotCountThreshold = " << PSI.getHotCountThreshold() << "\n";
1252 dbgs() << "ColdCountThreshold = " << PSI.getColdCountThreshold()
1253 << "\n";
1254 });
1255
1256 bool DidOutline = Cloner.doMultiRegionFunctionOutlining();
1257
1258 if (DidOutline) {
1259 LLVM_DEBUG({
1260 dbgs() << ">>>>>> Outlined (Cloned) Function >>>>>>\n";
1261 Cloner.ClonedFunc->print(dbgs());
1262 dbgs() << "<<<<<< Outlined (Cloned) Function <<<<<<\n";
1263 });
1264
1265 if (tryPartialInline(Cloner))
1266 return {true, nullptr};
1267 }
1268 }
1269 }
1270
1271 // Fall-thru to regular partial inlining if we:
1272 // i) can't find any cold regions to outline, or
1273 // ii) can't inline the outlined function anywhere.
1274 std::unique_ptr<FunctionOutliningInfo> OI = computeOutliningInfo(F);
1275 if (!OI)
1276 return {false, nullptr};
1277
1278 FunctionCloner Cloner(&F, OI.get(), ORE, LookupAssumptionCache, GetTTI);
1279 Cloner.normalizeReturnBlock();
1280
1281 Function *OutlinedFunction = Cloner.doSingleRegionFunctionOutlining();
1282
1283 if (!OutlinedFunction)
1284 return {false, nullptr};
1285
1286 if (tryPartialInline(Cloner))
1287 return {true, OutlinedFunction};
1288
1289 return {false, nullptr};
1290}
1291
1292bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {
1293 if (Cloner.OutlinedFunctions.empty())
1294 return false;
1295
1296 auto OutliningCosts = computeOutliningCosts(Cloner);
1297
1298 InstructionCost SizeCost = std::get<0>(OutliningCosts);
1299 InstructionCost NonWeightedRcost = std::get<1>(OutliningCosts);
1300
1301 assert(SizeCost.isValid() && NonWeightedRcost.isValid() &&
1302 "Expected valid costs");
1303
1304 // Only calculate RelativeToEntryFreq when we are doing single region
1305 // outlining.
1306 BranchProbability RelativeToEntryFreq;
1307 if (Cloner.ClonedOI)
1308 RelativeToEntryFreq = getOutliningCallBBRelativeFreq(Cloner);
1309 else
1310 // RelativeToEntryFreq doesn't make sense when we have more than one
1311 // outlined call because each call will have a different relative frequency
1312 // to the entry block. We can consider using the average, but the
1313 // usefulness of that information is questionable. For now, assume we never
1314 // execute the calls to outlined functions.
1315 RelativeToEntryFreq = BranchProbability(0, 1);
1316
1317 BlockFrequency WeightedRcost =
1318 BlockFrequency(NonWeightedRcost.getValue()) * RelativeToEntryFreq;
1319
1320 // The call sequence(s) to the outlined function(s) are larger than the sum of
1321 // the original outlined region size(s), it does not increase the chances of
1322 // inlining the function with outlining (The inliner uses the size increase to
1323 // model the cost of inlining a callee).
1324 if (!SkipCostAnalysis && Cloner.OutlinedRegionCost < SizeCost) {
1325 OptimizationRemarkEmitter OrigFuncORE(Cloner.OrigFunc);
1326 DebugLoc DLoc;
1328 std::tie(DLoc, Block) = getOneDebugLoc(*Cloner.ClonedFunc);
1329 OrigFuncORE.emit([&]() {
1330 return OptimizationRemarkAnalysis(DEBUG_TYPE, "OutlineRegionTooSmall",
1331 DLoc, Block)
1332 << ore::NV("Function", Cloner.OrigFunc)
1333 << " not partially inlined into callers (Original Size = "
1334 << ore::NV("OutlinedRegionOriginalSize", Cloner.OutlinedRegionCost)
1335 << ", Size of call sequence to outlined function = "
1336 << ore::NV("NewSize", SizeCost) << ")";
1337 });
1338 return false;
1339 }
1340
1341 assert(Cloner.OrigFunc->users().empty() &&
1342 "F's users should all be replaced!");
1343
1344 std::vector<User *> Users(Cloner.ClonedFunc->user_begin(),
1345 Cloner.ClonedFunc->user_end());
1346
1347 DenseMap<User *, uint64_t> CallSiteToProfCountMap;
1348 auto CalleeEntryCount = Cloner.OrigFunc->getEntryCount();
1349 if (CalleeEntryCount)
1350 computeCallsiteToProfCountMap(Cloner.ClonedFunc, CallSiteToProfCountMap);
1351
1352 uint64_t CalleeEntryCountV =
1353 (CalleeEntryCount ? CalleeEntryCount->getCount() : 0);
1354
1355 bool AnyInline = false;
1356 for (User *User : Users) {
1357 CallBase *CB = getSupportedCallBase(User);
1358
1359 if (isLimitReached())
1360 continue;
1361
1362 OptimizationRemarkEmitter CallerORE(CB->getCaller());
1363 if (!shouldPartialInline(*CB, Cloner, WeightedRcost, CallerORE))
1364 continue;
1365
1366 // Construct remark before doing the inlining, as after successful inlining
1367 // the callsite is removed.
1368 OptimizationRemark OR(DEBUG_TYPE, "PartiallyInlined", CB);
1369 OR << ore::NV("Callee", Cloner.OrigFunc) << " partially inlined into "
1370 << ore::NV("Caller", CB->getCaller());
1371
1372 InlineFunctionInfo IFI(GetAssumptionCache, &PSI);
1373 // We can only forward varargs when we outlined a single region, else we
1374 // bail on vararg functions.
1375 if (!InlineFunction(*CB, IFI, /*MergeAttributes=*/false, nullptr, true,
1376 (Cloner.ClonedOI ? Cloner.OutlinedFunctions.back().first
1377 : nullptr))
1378 .isSuccess())
1379 continue;
1380
1381 CallerORE.emit(OR);
1382
1383 // Now update the entry count:
1384 if (CalleeEntryCountV) {
1385 if (auto It = CallSiteToProfCountMap.find(User);
1386 It != CallSiteToProfCountMap.end()) {
1387 uint64_t CallSiteCount = It->second;
1388 CalleeEntryCountV -= std::min(CalleeEntryCountV, CallSiteCount);
1389 }
1390 }
1391
1392 AnyInline = true;
1393 NumPartialInlining++;
1394 // Update the stats
1395 if (Cloner.ClonedOI)
1396 NumPartialInlined++;
1397 else
1398 NumColdOutlinePartialInlined++;
1399 }
1400
1401 if (AnyInline) {
1402 Cloner.IsFunctionInlined = true;
1403 if (CalleeEntryCount)
1404 Cloner.OrigFunc->setEntryCount(Function::ProfileCount(
1405 CalleeEntryCountV, CalleeEntryCount->getType()));
1406 OptimizationRemarkEmitter OrigFuncORE(Cloner.OrigFunc);
1407 OrigFuncORE.emit([&]() {
1408 return OptimizationRemark(DEBUG_TYPE, "PartiallyInlined", Cloner.OrigFunc)
1409 << "Partially inlined into at least one caller";
1410 });
1411 }
1412
1413 return AnyInline;
1414}
1415
1416bool PartialInlinerImpl::run(Module &M) {
1418 return false;
1419
1420 std::vector<Function *> Worklist;
1421 Worklist.reserve(M.size());
1422 for (Function &F : M)
1423 if (!F.use_empty() && !F.isDeclaration())
1424 Worklist.push_back(&F);
1425
1426 bool Changed = false;
1427 while (!Worklist.empty()) {
1428 Function *CurrFunc = Worklist.back();
1429 Worklist.pop_back();
1430
1431 if (CurrFunc->use_empty())
1432 continue;
1433
1434 std::pair<bool, Function *> Result = unswitchFunction(*CurrFunc);
1435 if (Result.second)
1436 Worklist.push_back(Result.second);
1437 Changed |= Result.first;
1438 }
1439
1440 return Changed;
1441}
1442
1445 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
1446
1447 auto GetAssumptionCache = [&FAM](Function &F) -> AssumptionCache & {
1448 return FAM.getResult<AssumptionAnalysis>(F);
1449 };
1450
1451 auto LookupAssumptionCache = [&FAM](Function &F) -> AssumptionCache * {
1452 return FAM.getCachedResult<AssumptionAnalysis>(F);
1453 };
1454
1455 auto GetBFI = [&FAM](Function &F) -> BlockFrequencyInfo & {
1456 return FAM.getResult<BlockFrequencyAnalysis>(F);
1457 };
1458
1459 auto GetTTI = [&FAM](Function &F) -> TargetTransformInfo & {
1460 return FAM.getResult<TargetIRAnalysis>(F);
1461 };
1462
1463 auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
1464 return FAM.getResult<TargetLibraryAnalysis>(F);
1465 };
1466
1468
1469 if (PartialInlinerImpl(GetAssumptionCache, LookupAssumptionCache, GetTTI,
1470 GetTLI, PSI, GetBFI)
1471 .run(M))
1472 return PreservedAnalyses::none();
1473 return PreservedAnalyses::all();
1474}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static MachineBasicBlock * OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
cl::opt< unsigned > MinBlockCounterExecution
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
#define DEBUG_TYPE
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
Module.h This file contains the declarations for the Module class.
iv Induction Variable Users
Definition IVUsers.cpp:48
static cl::opt< int > InstrCost("inline-instr-cost", cl::Hidden, cl::init(5), cl::desc("Cost of a single instruction when inlining"))
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Machine Check Debug Module
uint64_t IntrinsicInst * II
static cl::opt< unsigned > MaxNumInlineBlocks("max-num-inline-blocks", cl::init(5), cl::Hidden, cl::desc("Max number of blocks to be partially inlined"))
static cl::opt< int > OutlineRegionFreqPercent("outline-region-freq-percent", cl::init(75), cl::Hidden, cl::desc("Relative frequency of outline region to " "the entry block"))
static cl::opt< bool > MarkOutlinedColdCC("pi-mark-coldcc", cl::init(false), cl::Hidden, cl::desc("Mark outline function calls with ColdCC"))
static cl::opt< float > MinRegionSizeRatio("min-region-size-ratio", cl::init(0.1), cl::Hidden, cl::desc("Minimum ratio comparing relative sizes of each " "outline candidate and original function"))
static cl::opt< bool > DisableMultiRegionPartialInline("disable-mr-partial-inlining", cl::init(false), cl::Hidden, cl::desc("Disable multi-region partial inlining"))
cl::opt< unsigned > MinBlockCounterExecution("min-block-execution", cl::init(100), cl::Hidden, cl::desc("Minimum block executions to consider " "its BranchProbabilityInfo valid"))
static cl::opt< int > MaxNumPartialInlining("max-partial-inlining", cl::init(-1), cl::Hidden, cl::desc("Max number of partial inlining. The default is unlimited"))
static cl::opt< bool > DisablePartialInlining("disable-partial-inlining", cl::init(false), cl::Hidden, cl::desc("Disable partial inlining"))
static bool hasProfileData(const Function &F, const FunctionOutliningInfo &OI)
static cl::opt< float > ColdBranchRatio("cold-branch-ratio", cl::init(0.1), cl::Hidden, cl::desc("Minimum BranchProbability to consider a region cold."))
static cl::opt< bool > ForceLiveExit("pi-force-live-exit-outline", cl::init(false), cl::Hidden, cl::desc("Force outline regions with live exits"))
static cl::opt< unsigned > ExtraOutliningPenalty("partial-inlining-extra-penalty", cl::init(0), cl::Hidden, cl::desc("A debug option to add additional penalty to the computed one."))
static cl::opt< bool > SkipCostAnalysis("skip-partial-inlining-cost-analysis", cl::ReallyHidden, cl::desc("Skip Cost Analysis"))
FunctionAnalysisManager FAM
This file contains the declarations for profiling metadata utility functions.
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
This pass exposes codegen information to IR-level passes.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator end()
Definition BasicBlock.h:462
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:449
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const Instruction & front() const
Definition BasicBlock.h:472
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
LLVM_ABI std::optional< uint64_t > getBlockProfileCount(const BasicBlock *BB, bool AllowSynthetic=false) const
Returns the estimated profile count of BB.
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
static LLVM_ABI BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
void setCallingConv(CallingConv::ID CC)
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
LLVM_ABI Function * getCaller()
Helper to get the caller (the parent function).
Conditional Branch instruction.
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
iterator end()
Definition DenseMap.h:81
void recalculate(ParentType &Func)
recalculate - compute a dominator tree for the given function
void setCallingConv(CallingConv::ID CC)
Definition Function.h:276
int getCost() const
Get the inline cost estimate.
Definition InlineCost.h:146
bool isAlways() const
Definition InlineCost.h:140
int getCostDelta() const
Get the cost delta from the threshold for inlining.
Definition InlineCost.h:176
bool isNever() const
Definition InlineCost.h:141
bool isSuccess() const
Definition InlineCost.h:190
auto map(const Function &F) const -> InstructionCost
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI void insertBefore(InstListType::iterator InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified position.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
The optimization diagnostic interface.
LLVM_ABI void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
LLVM_ABI Value * removeIncomingValue(unsigned Idx, bool DeletePHIIfEmpty=true)
Remove an incoming value.
Value * getIncomingValueForBlock(const BasicBlock *BB) const
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
PreservedAnalyses run(Module &M, ModuleAnalysisManager &)
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:103
void insert_range(Range &&R)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
LLVM_ABI InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const
@ TCK_SizeAndLatency
The weighted sum of size and latency.
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
user_iterator user_begin()
Definition Value.h:403
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:553
bool use_empty() const
Definition Value.h:347
user_iterator user_end()
Definition Value.h:411
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition ilist_node.h:34
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ BR
Control flow instructions. These all have token chains.
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
LLVM_ABI int getInstrCost()
@ CE
Windows NT (Windows on ARM)
Definition MCAsmInfo.h:48
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
@ User
could "use" a pointer
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI InlineResult InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, bool MergeAttributes=false, AAResults *CalleeAAR=nullptr, bool InsertLifetime=true, Function *ForwardVarArgsTo=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
This function inlines the called function into the basic block of the caller.
InstructionCost Cost
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
auto successors(const MachineBasicBlock *BB)
constexpr from_range_t from_range
InnerAnalysisManagerProxy< FunctionAnalysisManager, Module > FunctionAnalysisManagerModuleProxy
Provide the FunctionAnalysisManager to Module proxy.
LLVM_ABI InlineResult isInlineViable(Function &Callee)
Check if it is mechanically possible to inline the function Callee, based on the contents of the func...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
auto succ_size(const MachineBasicBlock *BB)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
LLVM_ABI InlineCost getInlineCost(CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< const TargetLibraryInfo &(Function &)> GetTLI, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr, function_ref< EphemeralValuesCache &(Function &)> GetEphValuesCache=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
TargetTransformInfo TTI
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
LLVM_ABI int getCallsiteCost(const TargetTransformInfo &TTI, const CallBase &Call, const DataLayout &DL)
Return the cost associated with a callsite, including parameter passing and the call/return instructi...
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1947
iterator_range< df_iterator< T > > depth_first(const T &G)
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
Definition STLExtras.h:2166
LLVM_ABI bool hasBranchWeightMD(const Instruction &I)
Checks if an instructions has Branch Weight Metadata.
LLVM_ABI Function * CloneFunction(Function *F, ValueToValueMapTy &VMap, ClonedCodeInfo *CodeInfo=nullptr)
Return a copy of the specified function and add it to that function's module.
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
Definition MIRParser.h:39