LLVM 17.0.0git
FunctionSpecialization.cpp
Go to the documentation of this file.
1//===- FunctionSpecialization.cpp - Function Specialization ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This specialises functions with constant parameters. Constant parameters
10// like function pointers and constant globals are propagated to the callee by
11// specializing the function. The main benefit of this pass at the moment is
12// that indirect calls are transformed into direct calls, which provides inline
13// opportunities that the inliner would not have been able to achieve. That's
14// why function specialisation is run before the inliner in the optimisation
15// pipeline; that is by design. Otherwise, we would only benefit from constant
16// passing, which is a valid use-case too, but hasn't been explored much in
17// terms of performance uplifts, cost-model and compile-time impact.
18//
19// Current limitations:
20// - It does not yet handle integer ranges. We do support "literal constants",
21// but that's off by default under an option.
22// - The cost-model could be further looked into (it mainly focuses on inlining
23// benefits),
24//
25// Ideas:
26// - With a function specialization attribute for arguments, we could have
27// a direct way to steer function specialization, avoiding the cost-model,
28// and thus control compile-times / code-size.
29//
30// Todos:
31// - Specializing recursive functions relies on running the transformation a
32// number of times, which is controlled by option
33// `func-specialization-max-iters`. Thus, increasing this value and the
34// number of iterations, will linearly increase the number of times recursive
35// functions get specialized, see also the discussion in
36// https://reviews.llvm.org/D106426 for details. Perhaps there is a
37// compile-time friendlier way to control/limit the number of specialisations
38// for recursive functions.
39// - Don't transform the function if function specialization does not trigger;
40// the SCCPSolver may make IR changes.
41//
42// References:
43// - 2021 LLVM Dev Mtg “Introducing function specialisation, and can we enable
44// it by default?”, https://www.youtube.com/watch?v=zJiCjeXgV5Q
45//
46//===----------------------------------------------------------------------===//
47
49#include "llvm/ADT/Statistic.h"
61#include <cmath>
62
63using namespace llvm;
64
65#define DEBUG_TYPE "function-specialization"
66
67STATISTIC(NumSpecsCreated, "Number of specializations created");
68
70 "force-specialization", cl::init(false), cl::Hidden, cl::desc(
71 "Force function specialization for every call site with a constant "
72 "argument"));
73
75 "funcspec-max-clones", cl::init(3), cl::Hidden, cl::desc(
76 "The maximum number of clones allowed for a single function "
77 "specialization"));
78
80 "funcspec-min-function-size", cl::init(100), cl::Hidden, cl::desc(
81 "Don't specialize functions that have less than this number of "
82 "instructions"));
83
85 "funcspec-avg-loop-iters", cl::init(10), cl::Hidden, cl::desc(
86 "Average loop iteration count"));
87
89 "funcspec-on-address", cl::init(false), cl::Hidden, cl::desc(
90 "Enable function specialization on the address of global values"));
91
92// Disabled by default as it can significantly increase compilation times.
93//
94// https://llvm-compile-time-tracker.com
95// https://github.com/nikic/llvm-compile-time-tracker
97 "funcspec-for-literal-constant", cl::init(false), cl::Hidden, cl::desc(
98 "Enable specialization of functions that take a literal constant as an "
99 "argument"));
100
101Constant *FunctionSpecializer::getPromotableAlloca(AllocaInst *Alloca,
102 CallInst *Call) {
103 Value *StoreValue = nullptr;
104 for (auto *User : Alloca->users()) {
105 // We can't use llvm::isAllocaPromotable() as that would fail because of
106 // the usage in the CallInst, which is what we check here.
107 if (User == Call)
108 continue;
109 if (auto *Bitcast = dyn_cast<BitCastInst>(User)) {
110 if (!Bitcast->hasOneUse() || *Bitcast->user_begin() != Call)
111 return nullptr;
112 continue;
113 }
114
115 if (auto *Store = dyn_cast<StoreInst>(User)) {
116 // This is a duplicate store, bail out.
117 if (StoreValue || Store->isVolatile())
118 return nullptr;
119 StoreValue = Store->getValueOperand();
120 continue;
121 }
122 // Bail if there is any other unknown usage.
123 return nullptr;
124 }
125
126 if (!StoreValue)
127 return nullptr;
128
129 return getCandidateConstant(StoreValue);
130}
131
132// A constant stack value is an AllocaInst that has a single constant
133// value stored to it. Return this constant if such an alloca stack value
134// is a function argument.
135Constant *FunctionSpecializer::getConstantStackValue(CallInst *Call,
136 Value *Val) {
137 if (!Val)
138 return nullptr;
139 Val = Val->stripPointerCasts();
140 if (auto *ConstVal = dyn_cast<ConstantInt>(Val))
141 return ConstVal;
142 auto *Alloca = dyn_cast<AllocaInst>(Val);
143 if (!Alloca || !Alloca->getAllocatedType()->isIntegerTy())
144 return nullptr;
145 return getPromotableAlloca(Alloca, Call);
146}
147
148// To support specializing recursive functions, it is important to propagate
149// constant arguments because after a first iteration of specialisation, a
150// reduced example may look like this:
151//
152// define internal void @RecursiveFn(i32* arg1) {
153// %temp = alloca i32, align 4
154// store i32 2 i32* %temp, align 4
155// call void @RecursiveFn.1(i32* nonnull %temp)
156// ret void
157// }
158//
159// Before a next iteration, we need to propagate the constant like so
160// which allows further specialization in next iterations.
161//
162// @funcspec.arg = internal constant i32 2
163//
164// define internal void @someFunc(i32* arg1) {
165// call void @otherFunc(i32* nonnull @funcspec.arg)
166// ret void
167// }
168//
169void FunctionSpecializer::promoteConstantStackValues() {
170 // Iterate over the argument tracked functions see if there
171 // are any new constant values for the call instruction via
172 // stack variables.
173 for (Function &F : M) {
174 if (!Solver.isArgumentTrackedFunction(&F))
175 continue;
176
177 for (auto *User : F.users()) {
178
179 auto *Call = dyn_cast<CallInst>(User);
180 if (!Call)
181 continue;
182
183 if (!Solver.isBlockExecutable(Call->getParent()))
184 continue;
185
186 bool Changed = false;
187 for (const Use &U : Call->args()) {
188 unsigned Idx = Call->getArgOperandNo(&U);
189 Value *ArgOp = Call->getArgOperand(Idx);
190 Type *ArgOpType = ArgOp->getType();
191
192 if (!Call->onlyReadsMemory(Idx) || !ArgOpType->isPointerTy())
193 continue;
194
195 auto *ConstVal = getConstantStackValue(Call, ArgOp);
196 if (!ConstVal)
197 continue;
198
199 Value *GV = new GlobalVariable(M, ConstVal->getType(), true,
201 "funcspec.arg");
202 if (ArgOpType != ConstVal->getType())
203 GV = ConstantExpr::getBitCast(cast<Constant>(GV), ArgOpType);
204
205 Call->setArgOperand(Idx, GV);
206 Changed = true;
207 }
208
209 // Add the changed CallInst to Solver Worklist
210 if (Changed)
211 Solver.visitCall(*Call);
212 }
213 }
214}
215
216// ssa_copy intrinsics are introduced by the SCCP solver. These intrinsics
217// interfere with the promoteConstantStackValues() optimization.
218static void removeSSACopy(Function &F) {
219 for (BasicBlock &BB : F) {
220 for (Instruction &Inst : llvm::make_early_inc_range(BB)) {
221 auto *II = dyn_cast<IntrinsicInst>(&Inst);
222 if (!II)
223 continue;
224 if (II->getIntrinsicID() != Intrinsic::ssa_copy)
225 continue;
226 Inst.replaceAllUsesWith(II->getOperand(0));
227 Inst.eraseFromParent();
228 }
229 }
230}
231
232/// Remove any ssa_copy intrinsics that may have been introduced.
233void FunctionSpecializer::cleanUpSSA() {
234 for (Function *F : Specializations)
236}
237
238
239template <> struct llvm::DenseMapInfo<SpecSig> {
240 static inline SpecSig getEmptyKey() { return {~0U, {}}; }
241
242 static inline SpecSig getTombstoneKey() { return {~1U, {}}; }
243
244 static unsigned getHashValue(const SpecSig &S) {
245 return static_cast<unsigned>(hash_value(S));
246 }
247
248 static bool isEqual(const SpecSig &LHS, const SpecSig &RHS) {
249 return LHS == RHS;
250 }
251};
252
255 if (NumSpecsCreated > 0)
256 dbgs() << "FnSpecialization: Created " << NumSpecsCreated
257 << " specializations in module " << M.getName() << "\n");
258 // Eliminate dead code.
259 removeDeadFunctions();
260 cleanUpSSA();
261}
262
263/// Attempt to specialize functions in the module to enable constant
264/// propagation across function boundaries.
265///
266/// \returns true if at least one function is specialized.
268 // Find possible specializations for each function.
269 SpecMap SM;
270 SmallVector<Spec, 32> AllSpecs;
271 unsigned NumCandidates = 0;
272 for (Function &F : M) {
273 if (!isCandidateFunction(&F))
274 continue;
275
276 auto Cost = getSpecializationCost(&F);
277 if (!Cost.isValid()) {
278 LLVM_DEBUG(dbgs() << "FnSpecialization: Invalid specialization cost for "
279 << F.getName() << "\n");
280 continue;
281 }
282
283 LLVM_DEBUG(dbgs() << "FnSpecialization: Specialization cost for "
284 << F.getName() << " is " << Cost << "\n");
285
286 if (!findSpecializations(&F, Cost, AllSpecs, SM)) {
288 dbgs() << "FnSpecialization: No possible specializations found for "
289 << F.getName() << "\n");
290 continue;
291 }
292
293 ++NumCandidates;
294 }
295
296 if (!NumCandidates) {
298 dbgs()
299 << "FnSpecialization: No possible specializations found in module\n");
300 return false;
301 }
302
303 // Choose the most profitable specialisations, which fit in the module
304 // specialization budget, which is derived from maximum number of
305 // specializations per specialization candidate function.
306 auto CompareGain = [&AllSpecs](unsigned I, unsigned J) {
307 return AllSpecs[I].Gain > AllSpecs[J].Gain;
308 };
309 const unsigned NSpecs =
310 std::min(NumCandidates * MaxClones, unsigned(AllSpecs.size()));
311 SmallVector<unsigned> BestSpecs(NSpecs + 1);
312 std::iota(BestSpecs.begin(), BestSpecs.begin() + NSpecs, 0);
313 if (AllSpecs.size() > NSpecs) {
314 LLVM_DEBUG(dbgs() << "FnSpecialization: Number of candidates exceed "
315 << "the maximum number of clones threshold.\n"
316 << "FnSpecialization: Specializing the "
317 << NSpecs
318 << " most profitable candidates.\n");
319 std::make_heap(BestSpecs.begin(), BestSpecs.begin() + NSpecs, CompareGain);
320 for (unsigned I = NSpecs, N = AllSpecs.size(); I < N; ++I) {
321 BestSpecs[NSpecs] = I;
322 std::push_heap(BestSpecs.begin(), BestSpecs.end(), CompareGain);
323 std::pop_heap(BestSpecs.begin(), BestSpecs.end(), CompareGain);
324 }
325 }
326
327 LLVM_DEBUG(dbgs() << "FnSpecialization: List of specializations \n";
328 for (unsigned I = 0; I < NSpecs; ++I) {
329 const Spec &S = AllSpecs[BestSpecs[I]];
330 dbgs() << "FnSpecialization: Function " << S.F->getName()
331 << " , gain " << S.Gain << "\n";
332 for (const ArgInfo &Arg : S.Sig.Args)
333 dbgs() << "FnSpecialization: FormalArg = "
334 << Arg.Formal->getNameOrAsOperand()
335 << ", ActualArg = " << Arg.Actual->getNameOrAsOperand()
336 << "\n";
337 });
338
339 // Create the chosen specializations.
340 SmallPtrSet<Function *, 8> OriginalFuncs;
342 for (unsigned I = 0; I < NSpecs; ++I) {
343 Spec &S = AllSpecs[BestSpecs[I]];
344 S.Clone = createSpecialization(S.F, S.Sig);
345
346 // Update the known call sites to call the clone.
347 for (CallBase *Call : S.CallSites) {
348 LLVM_DEBUG(dbgs() << "FnSpecialization: Redirecting " << *Call
349 << " to call " << S.Clone->getName() << "\n");
350 Call->setCalledFunction(S.Clone);
351 }
352
353 Clones.push_back(S.Clone);
354 OriginalFuncs.insert(S.F);
355 }
356
357 Solver.solveWhileResolvedUndefsIn(Clones);
358
359 // Update the rest of the call sites - these are the recursive calls, calls
360 // to discarded specialisations and calls that may match a specialisation
361 // after the solver runs.
362 for (Function *F : OriginalFuncs) {
363 auto [Begin, End] = SM[F];
364 updateCallSites(F, AllSpecs.begin() + Begin, AllSpecs.begin() + End);
365 }
366
367 promoteConstantStackValues();
368 return true;
369}
370
371void FunctionSpecializer::removeDeadFunctions() {
372 for (Function *F : FullySpecialized) {
373 LLVM_DEBUG(dbgs() << "FnSpecialization: Removing dead function "
374 << F->getName() << "\n");
375 if (FAM)
376 FAM->clear(*F, F->getName());
377 F->eraseFromParent();
378 }
379 FullySpecialized.clear();
380}
381
382// Compute the code metrics for function \p F.
383CodeMetrics &FunctionSpecializer::analyzeFunction(Function *F) {
384 auto I = FunctionMetrics.insert({F, CodeMetrics()});
385 CodeMetrics &Metrics = I.first->second;
386 if (I.second) {
387 // The code metrics were not cached.
389 CodeMetrics::collectEphemeralValues(F, &(GetAC)(*F), EphValues);
390 for (BasicBlock &BB : *F)
391 Metrics.analyzeBasicBlock(&BB, (GetTTI)(*F), EphValues);
392
393 LLVM_DEBUG(dbgs() << "FnSpecialization: Code size of function "
394 << F->getName() << " is " << Metrics.NumInsts
395 << " instructions\n");
396 }
397 return Metrics;
398}
399
400/// Clone the function \p F and remove the ssa_copy intrinsics added by
401/// the SCCPSolver in the cloned version.
404 Function *Clone = CloneFunction(F, Mappings);
405 removeSSACopy(*Clone);
406 return Clone;
407}
408
409bool FunctionSpecializer::findSpecializations(Function *F, InstructionCost Cost,
410 SmallVectorImpl<Spec> &AllSpecs,
411 SpecMap &SM) {
412 // A mapping from a specialisation signature to the index of the respective
413 // entry in the all specialisation array. Used to ensure uniqueness of
414 // specialisations.
416
417 // Get a list of interesting arguments.
419 for (Argument &Arg : F->args())
420 if (isArgumentInteresting(&Arg))
421 Args.push_back(&Arg);
422
423 if (Args.empty())
424 return false;
425
426 bool Found = false;
427 for (User *U : F->users()) {
428 if (!isa<CallInst>(U) && !isa<InvokeInst>(U))
429 continue;
430 auto &CS = *cast<CallBase>(U);
431
432 // The user instruction does not call our function.
433 if (CS.getCalledFunction() != F)
434 continue;
435
436 // If the call site has attribute minsize set, that callsite won't be
437 // specialized.
438 if (CS.hasFnAttr(Attribute::MinSize))
439 continue;
440
441 // If the parent of the call site will never be executed, we don't need
442 // to worry about the passed value.
443 if (!Solver.isBlockExecutable(CS.getParent()))
444 continue;
445
446 // Examine arguments and create a specialisation candidate from the
447 // constant operands of this call site.
448 SpecSig S;
449 for (Argument *A : Args) {
450 Constant *C = getCandidateConstant(CS.getArgOperand(A->getArgNo()));
451 if (!C)
452 continue;
453 LLVM_DEBUG(dbgs() << "FnSpecialization: Found interesting argument "
454 << A->getName() << " : " << C->getNameOrAsOperand()
455 << "\n");
456 S.Args.push_back({A, C});
457 }
458
459 if (S.Args.empty())
460 continue;
461
462 // Check if we have encountered the same specialisation already.
463 if (auto It = UM.find(S); It != UM.end()) {
464 // Existing specialisation. Add the call to the list to rewrite, unless
465 // it's a recursive call. A specialisation, generated because of a
466 // recursive call may end up as not the best specialisation for all
467 // the cloned instances of this call, which result from specialising
468 // functions. Hence we don't rewrite the call directly, but match it with
469 // the best specialisation once all specialisations are known.
470 if (CS.getFunction() == F)
471 continue;
472 const unsigned Index = It->second;
473 AllSpecs[Index].CallSites.push_back(&CS);
474 } else {
475 // Calculate the specialisation gain.
476 InstructionCost Gain = 0 - Cost;
477 for (ArgInfo &A : S.Args)
478 Gain +=
479 getSpecializationBonus(A.Formal, A.Actual, Solver.getLoopInfo(*F));
480
481 // Discard unprofitable specialisations.
482 if (!ForceSpecialization && Gain <= 0)
483 continue;
484
485 // Create a new specialisation entry.
486 auto &Spec = AllSpecs.emplace_back(F, S, Gain);
487 if (CS.getFunction() != F)
488 Spec.CallSites.push_back(&CS);
489 const unsigned Index = AllSpecs.size() - 1;
490 UM[S] = Index;
491 if (auto [It, Inserted] = SM.try_emplace(F, Index, Index + 1); !Inserted)
492 It->second.second = Index + 1;
493 Found = true;
494 }
495 }
496
497 return Found;
498}
499
500bool FunctionSpecializer::isCandidateFunction(Function *F) {
501 if (F->isDeclaration())
502 return false;
503
504 if (F->hasFnAttribute(Attribute::NoDuplicate))
505 return false;
506
507 if (!Solver.isArgumentTrackedFunction(F))
508 return false;
509
510 // Do not specialize the cloned function again.
511 if (Specializations.contains(F))
512 return false;
513
514 // If we're optimizing the function for size, we shouldn't specialize it.
515 if (F->hasOptSize() ||
517 return false;
518
519 // Exit if the function is not executable. There's no point in specializing
520 // a dead function.
521 if (!Solver.isBlockExecutable(&F->getEntryBlock()))
522 return false;
523
524 // It wastes time to specialize a function which would get inlined finally.
525 if (F->hasFnAttribute(Attribute::AlwaysInline))
526 return false;
527
528 LLVM_DEBUG(dbgs() << "FnSpecialization: Try function: " << F->getName()
529 << "\n");
530 return true;
531}
532
533Function *FunctionSpecializer::createSpecialization(Function *F, const SpecSig &S) {
535
536 // Initialize the lattice state of the arguments of the function clone,
537 // marking the argument on which we specialized the function constant
538 // with the given value.
539 Solver.markArgInFuncSpecialization(Clone, S.Args);
540
541 Solver.addArgumentTrackedFunction(Clone);
542 Solver.markBlockExecutable(&Clone->front());
543
544 // Mark all the specialized functions
545 Specializations.insert(Clone);
546 ++NumSpecsCreated;
547
548 return Clone;
549}
550
551/// Compute and return the cost of specializing function \p F.
552InstructionCost FunctionSpecializer::getSpecializationCost(Function *F) {
553 CodeMetrics &Metrics = analyzeFunction(F);
554 // If the code metrics reveal that we shouldn't duplicate the function, we
555 // shouldn't specialize it. Set the specialization cost to Invalid.
556 // Or if the lines of codes implies that this function is easy to get
557 // inlined so that we shouldn't specialize it.
558 if (Metrics.notDuplicatable || !Metrics.NumInsts.isValid() ||
559 (!ForceSpecialization && !F->hasFnAttribute(Attribute::NoInline) &&
560 Metrics.NumInsts < MinFunctionSize))
562
563 // Otherwise, set the specialization cost to be the cost of all the
564 // instructions in the function.
565 return Metrics.NumInsts * InlineConstants::getInstrCost();
566}
567
569 const LoopInfo &LI) {
570 auto *I = dyn_cast_or_null<Instruction>(U);
571 // If not an instruction we do not know how to evaluate.
572 // Keep minimum possible cost for now so that it doesnt affect
573 // specialization.
574 if (!I)
575 return std::numeric_limits<unsigned>::min();
576
579
580 // Increase the cost if it is inside the loop.
581 unsigned LoopDepth = LI.getLoopDepth(I->getParent());
582 Cost *= std::pow((double)AvgLoopIters, LoopDepth);
583
584 // Traverse recursively if there are more uses.
585 // TODO: Any other instructions to be added here?
586 if (I->mayReadFromMemory() || I->isCast())
587 for (auto *User : I->users())
588 Cost += getUserBonus(User, TTI, LI);
589
590 return Cost;
591}
592
593/// Compute a bonus for replacing argument \p A with constant \p C.
595FunctionSpecializer::getSpecializationBonus(Argument *A, Constant *C,
596 const LoopInfo &LI) {
597 Function *F = A->getParent();
598 auto &TTI = (GetTTI)(*F);
599 LLVM_DEBUG(dbgs() << "FnSpecialization: Analysing bonus for constant: "
600 << C->getNameOrAsOperand() << "\n");
601
602 InstructionCost TotalCost = 0;
603 for (auto *U : A->users()) {
604 TotalCost += getUserBonus(U, TTI, LI);
605 LLVM_DEBUG(dbgs() << "FnSpecialization: User cost ";
606 TotalCost.print(dbgs()); dbgs() << " for: " << *U << "\n");
607 }
608
609 // The below heuristic is only concerned with exposing inlining
610 // opportunities via indirect call promotion. If the argument is not a
611 // (potentially casted) function pointer, give up.
612 Function *CalledFunction = dyn_cast<Function>(C->stripPointerCasts());
613 if (!CalledFunction)
614 return TotalCost;
615
616 // Get TTI for the called function (used for the inline cost).
617 auto &CalleeTTI = (GetTTI)(*CalledFunction);
618
619 // Look at all the call sites whose called value is the argument.
620 // Specializing the function on the argument would allow these indirect
621 // calls to be promoted to direct calls. If the indirect call promotion
622 // would likely enable the called function to be inlined, specializing is a
623 // good idea.
624 int Bonus = 0;
625 for (User *U : A->users()) {
626 if (!isa<CallInst>(U) && !isa<InvokeInst>(U))
627 continue;
628 auto *CS = cast<CallBase>(U);
629 if (CS->getCalledOperand() != A)
630 continue;
631 if (CS->getFunctionType() != CalledFunction->getFunctionType())
632 continue;
633
634 // Get the cost of inlining the called function at this call site. Note
635 // that this is only an estimate. The called function may eventually
636 // change in a way that leads to it not being inlined here, even though
637 // inlining looks profitable now. For example, one of its called
638 // functions may be inlined into it, making the called function too large
639 // to be inlined into this call site.
640 //
641 // We apply a boost for performing indirect call promotion by increasing
642 // the default threshold by the threshold for indirect calls.
643 auto Params = getInlineParams();
644 Params.DefaultThreshold += InlineConstants::IndirectCallThreshold;
645 InlineCost IC =
646 getInlineCost(*CS, CalledFunction, Params, CalleeTTI, GetAC, GetTLI);
647
648 // We clamp the bonus for this call to be between zero and the default
649 // threshold.
650 if (IC.isAlways())
651 Bonus += Params.DefaultThreshold;
652 else if (IC.isVariable() && IC.getCostDelta() > 0)
653 Bonus += IC.getCostDelta();
654
655 LLVM_DEBUG(dbgs() << "FnSpecialization: Inlining bonus " << Bonus
656 << " for user " << *U << "\n");
657 }
658
659 return TotalCost + Bonus;
660}
661
662/// Determine if it is possible to specialise the function for constant values
663/// of the formal parameter \p A.
664bool FunctionSpecializer::isArgumentInteresting(Argument *A) {
665 // No point in specialization if the argument is unused.
666 if (A->user_empty())
667 return false;
668
669 // For now, don't attempt to specialize functions based on the values of
670 // composite types.
671 Type *ArgTy = A->getType();
672 if (!ArgTy->isSingleValueType())
673 return false;
674
675 // Specialization of integer and floating point types needs to be explicitly
676 // enabled.
678 (ArgTy->isIntegerTy() || ArgTy->isFloatingPointTy()))
679 return false;
680
681 // SCCP solver does not record an argument that will be constructed on
682 // stack.
683 if (A->hasByValAttr() && !A->getParent()->onlyReadsMemory())
684 return false;
685
686 // Check the lattice value and decide if we should attemt to specialize,
687 // based on this argument. No point in specialization, if the lattice value
688 // is already a constant.
689 const ValueLatticeElement &LV = Solver.getLatticeValueFor(A);
690 if (LV.isUnknownOrUndef() || LV.isConstant() ||
692 LLVM_DEBUG(dbgs() << "FnSpecialization: Nothing to do, parameter "
693 << A->getNameOrAsOperand() << " is already constant\n");
694 return false;
695 }
696
697 LLVM_DEBUG(dbgs() << "FnSpecialization: Found interesting parameter "
698 << A->getNameOrAsOperand() << "\n");
699
700 return true;
701}
702
703/// Check if the valuy \p V (an actual argument) is a constant or can only
704/// have a constant value. Return that constant.
705Constant *FunctionSpecializer::getCandidateConstant(Value *V) {
706 if (isa<PoisonValue>(V))
707 return nullptr;
708
709 // TrackValueOfGlobalVariable only tracks scalar global variables.
710 if (auto *GV = dyn_cast<GlobalVariable>(V)) {
711 // Check if we want to specialize on the address of non-constant
712 // global values.
713 if (!GV->isConstant() && !SpecializeOnAddress)
714 return nullptr;
715
716 if (!GV->getValueType()->isSingleValueType())
717 return nullptr;
718 }
719
720 // Select for possible specialisation values that are constants or
721 // are deduced to be constants or constant ranges with a single element.
722 Constant *C = dyn_cast<Constant>(V);
723 if (!C) {
724 const ValueLatticeElement &LV = Solver.getLatticeValueFor(V);
725 if (LV.isConstant())
726 C = LV.getConstant();
727 else if (LV.isConstantRange() && LV.getConstantRange().isSingleElement()) {
728 assert(V->getType()->isIntegerTy() && "Non-integral constant range");
729 C = Constant::getIntegerValue(V->getType(),
731 } else
732 return nullptr;
733 }
734
735 return C;
736}
737
738void FunctionSpecializer::updateCallSites(Function *F, const Spec *Begin,
739 const Spec *End) {
740 // Collect the call sites that need updating.
742 for (User *U : F->users())
743 if (auto *CS = dyn_cast<CallBase>(U);
744 CS && CS->getCalledFunction() == F &&
745 Solver.isBlockExecutable(CS->getParent()))
746 ToUpdate.push_back(CS);
747
748 unsigned NCallsLeft = ToUpdate.size();
749 for (CallBase *CS : ToUpdate) {
750 bool ShouldDecrementCount = CS->getFunction() == F;
751
752 // Find the best matching specialisation.
753 const Spec *BestSpec = nullptr;
754 for (const Spec &S : make_range(Begin, End)) {
755 if (!S.Clone || (BestSpec && S.Gain <= BestSpec->Gain))
756 continue;
757
758 if (any_of(S.Sig.Args, [CS, this](const ArgInfo &Arg) {
759 unsigned ArgNo = Arg.Formal->getArgNo();
760 return getCandidateConstant(CS->getArgOperand(ArgNo)) != Arg.Actual;
761 }))
762 continue;
763
764 BestSpec = &S;
765 }
766
767 if (BestSpec) {
768 LLVM_DEBUG(dbgs() << "FnSpecialization: Redirecting " << *CS
769 << " to call " << BestSpec->Clone->getName() << "\n");
770 CS->setCalledFunction(BestSpec->Clone);
771 ShouldDecrementCount = true;
772 }
773
774 if (ShouldDecrementCount)
775 --NCallsLeft;
776 }
777
778 // If the function has been completely specialized, the original function
779 // is no longer needed. Mark it unreachable.
780 if (NCallsLeft == 0) {
782 FullySpecialized.insert(F);
783 }
784}
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
static cl::opt< bool > ForceSpecialization("force-specialization", cl::init(false), cl::Hidden, cl::desc("Force function specialization for every call site with a constant " "argument"))
static void removeSSACopy(Function &F)
static cl::opt< unsigned > MaxClones("funcspec-max-clones", cl::init(3), cl::Hidden, cl::desc("The maximum number of clones allowed for a single function " "specialization"))
static InstructionCost getUserBonus(User *U, llvm::TargetTransformInfo &TTI, const LoopInfo &LI)
static cl::opt< unsigned > AvgLoopIters("funcspec-avg-loop-iters", cl::init(10), cl::Hidden, cl::desc("Average loop iteration count"))
static Function * cloneCandidateFunction(Function *F)
Clone the function F and remove the ssa_copy intrinsics added by the SCCPSolver in the cloned version...
static cl::opt< unsigned > MinFunctionSize("funcspec-min-function-size", cl::init(100), cl::Hidden, cl::desc("Don't specialize functions that have less than this number of " "instructions"))
static cl::opt< bool > SpecializeOnAddress("funcspec-on-address", cl::init(false), cl::Hidden, cl::desc("Enable function specialization on the address of global values"))
static cl::opt< bool > SpecializeLiteralConstant("funcspec-for-literal-constant", cl::init(false), cl::Hidden, cl::desc("Enable specialization of functions that take a literal constant as an " "argument"))
Inject TLI Mappings
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Machine Trace Metrics
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
an instruction to allocate memory on the stack
Definition: Instructions.h:58
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
Definition: Instructions.h:118
void clear(IRUnitT &IR, llvm::StringRef Name)
Clear any cached analysis results for a single unit of IR.
This class represents an incoming formal argument to a Function.
Definition: Argument.h:28
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1186
This class represents a function call, abstracting a target machine's calling convention.
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2220
const APInt * getSingleElement() const
If this set contains a single element, return it, otherwise return null.
bool isSingleElement() const
Return true if this set contains exactly one member.
This is an important base class in LLVM.
Definition: Constant.h:41
static Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
Definition: Constants.cpp:386
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&... Args)
Definition: DenseMap.h:235
iterator end()
Definition: DenseMap.h:84
bool run()
Attempt to specialize functions in the module to enable constant propagation across function boundari...
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:174
const BasicBlock & front() const
Definition: Function.h:763
@ InternalLinkage
Rename collisions when linking (static functions).
Definition: GlobalValue.h:55
Represents the cost of inlining a function.
Definition: InlineCost.h:89
bool isAlways() const
Definition: InlineCost.h:138
int getCostDelta() const
Get the cost delta from the threshold for inlining.
Definition: InlineCost.h:174
bool isVariable() const
Definition: InlineCost.h:140
static InstructionCost getInvalid(CostType Val=0)
void print(raw_ostream &OS) const
unsigned getLoopDepth(const BlockT *BB) const
Return the loop nesting level of the specified block.
Definition: LoopInfo.h:999
void visitCall(CallInst &I)
void solveWhileResolvedUndefsIn(Module &M)
const LoopInfo & getLoopInfo(Function &F)
void addArgumentTrackedFunction(Function *F)
const ValueLatticeElement & getLatticeValueFor(Value *V) const
bool isBlockExecutable(BasicBlock *BB) const
bool markBlockExecutable(BasicBlock *BB)
markBlockExecutable - This method can be used by clients to mark all of the blocks that are known to ...
void markArgInFuncSpecialization(Function *F, const SmallVectorImpl< ArgInfo > &Args)
Mark the constant arguments of a new function specialization.
void markFunctionUnreachable(Function *F)
Mark all of the blocks in function F non-executable.
bool isArgumentTrackedFunction(Function *F)
Returns true if the given function is in the solver's set of argument-tracked functions.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:941
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:258
bool isSingleValueType() const
Return true if the type is a valid type for a register in codegen.
Definition: Type.h:289
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:185
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:231
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
This class represents lattice values for constants.
Definition: ValueLattice.h:29
const ConstantRange & getConstantRange(bool UndefAllowed=true) const
Returns the constant range for this value.
Definition: ValueLattice.h:272
bool isConstantRange(bool UndefAllowed=true) const
Returns true if this value is a constant range.
Definition: ValueLattice.h:252
bool isUnknownOrUndef() const
Definition: ValueLattice.h:242
Constant * getConstant() const
Definition: ValueLattice.h:258
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
iterator_range< user_iterator > users()
Definition: Value.h:421
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:685
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:308
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
const int IndirectCallThreshold
Definition: InlineCost.h:48
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
hash_code hash_value(const FixedPointSemantics &Val)
Definition: APFixedPoint.h:128
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:748
InlineCost getInlineCost(CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< const TargetLibraryInfo &(Function &)> GetTLI, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1826
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
Function * CloneFunction(Function *F, ValueToValueMapTy &VMap, ClonedCodeInfo *CodeInfo=nullptr)
Return a copy of the specified function and add it to that function's module.
#define N
Helper struct shared between Function Specialization and SCCP Solver.
Definition: SCCPSolver.h:51
Utility to calculate the size and a few similar metrics for a set of basic blocks.
Definition: CodeMetrics.h:31
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
Definition: CodeMetrics.cpp:70
static unsigned getHashValue(const SpecSig &S)
static bool isEqual(const SpecSig &LHS, const SpecSig &RHS)
An information struct used to provide DenseMap with the various necessary components for a given valu...
Definition: DenseMapInfo.h:51
SmallVector< ArgInfo, 4 > Args
SmallVector< CallBase * > CallSites
InstructionCost Gain