65#define DEBUG_TYPE "function-specialization"
67STATISTIC(NumSpecsCreated,
"Number of specializations created");
71 "Force function specialization for every call site with a constant "
76 "The maximum number of clones allowed for a single function "
81 "Don't specialize functions that have less than this number of "
86 "Average loop iteration count"));
90 "Enable function specialization on the address of global values"));
98 "Enable specialization of functions that take a literal constant as an "
103 Value *StoreValue =
nullptr;
109 if (
auto *Bitcast = dyn_cast<BitCastInst>(
User)) {
110 if (!Bitcast->hasOneUse() || *Bitcast->user_begin() != Call)
115 if (
auto *Store = dyn_cast<StoreInst>(
User)) {
117 if (StoreValue ||
Store->isVolatile())
119 StoreValue =
Store->getValueOperand();
129 return getCandidateConstant(StoreValue);
140 if (
auto *ConstVal = dyn_cast<ConstantInt>(Val))
142 auto *Alloca = dyn_cast<AllocaInst>(Val);
145 return getPromotableAlloca(Alloca, Call);
169void FunctionSpecializer::promoteConstantStackValues() {
177 for (
auto *
User :
F.users()) {
179 auto *
Call = dyn_cast<CallInst>(
User);
186 bool Changed =
false;
187 for (
const Use &U :
Call->args()) {
188 unsigned Idx =
Call->getArgOperandNo(&U);
195 auto *ConstVal = getConstantStackValue(Call, ArgOp);
202 if (ArgOpType != ConstVal->getType())
221 auto *II = dyn_cast<IntrinsicInst>(&Inst);
224 if (II->getIntrinsicID() != Intrinsic::ssa_copy)
226 Inst.replaceAllUsesWith(II->getOperand(0));
227 Inst.eraseFromParent();
233void FunctionSpecializer::cleanUpSSA() {
255 if (NumSpecsCreated > 0)
256 dbgs() <<
"FnSpecialization: Created " << NumSpecsCreated
257 <<
" specializations in module " << M.getName() <<
"\n");
259 removeDeadFunctions();
271 unsigned NumCandidates = 0;
273 if (!isCandidateFunction(&
F))
276 auto Cost = getSpecializationCost(&
F);
277 if (!
Cost.isValid()) {
278 LLVM_DEBUG(
dbgs() <<
"FnSpecialization: Invalid specialization cost for "
279 <<
F.getName() <<
"\n");
284 <<
F.getName() <<
" is " <<
Cost <<
"\n");
286 if (!findSpecializations(&
F,
Cost, AllSpecs, SM)) {
288 dbgs() <<
"FnSpecialization: No possible specializations found for "
289 <<
F.getName() <<
"\n");
296 if (!NumCandidates) {
299 <<
"FnSpecialization: No possible specializations found in module\n");
306 auto CompareGain = [&AllSpecs](
unsigned I,
unsigned J) {
307 return AllSpecs[
I].Gain > AllSpecs[J].Gain;
309 const unsigned NSpecs =
310 std::min(NumCandidates *
MaxClones,
unsigned(AllSpecs.
size()));
312 std::iota(BestSpecs.
begin(), BestSpecs.
begin() + NSpecs, 0);
313 if (AllSpecs.
size() > NSpecs) {
314 LLVM_DEBUG(
dbgs() <<
"FnSpecialization: Number of candidates exceed "
315 <<
"the maximum number of clones threshold.\n"
316 <<
"FnSpecialization: Specializing the "
318 <<
" most profitable candidates.\n");
319 std::make_heap(BestSpecs.
begin(), BestSpecs.
begin() + NSpecs, CompareGain);
320 for (
unsigned I = NSpecs,
N = AllSpecs.
size();
I <
N; ++
I) {
321 BestSpecs[NSpecs] =
I;
322 std::push_heap(BestSpecs.
begin(), BestSpecs.
end(), CompareGain);
323 std::pop_heap(BestSpecs.
begin(), BestSpecs.
end(), CompareGain);
327 LLVM_DEBUG(
dbgs() <<
"FnSpecialization: List of specializations \n";
328 for (
unsigned I = 0;
I < NSpecs; ++
I) {
329 const Spec &S = AllSpecs[BestSpecs[
I]];
330 dbgs() <<
"FnSpecialization: Function " << S.
F->
getName()
331 <<
" , gain " << S.
Gain <<
"\n";
333 dbgs() <<
"FnSpecialization: FormalArg = "
334 <<
Arg.Formal->getNameOrAsOperand()
335 <<
", ActualArg = " <<
Arg.Actual->getNameOrAsOperand()
342 for (
unsigned I = 0;
I < NSpecs; ++
I) {
343 Spec &S = AllSpecs[BestSpecs[
I]];
344 S.
Clone = createSpecialization(S.
F, S.
Sig);
350 Call->setCalledFunction(S.
Clone);
354 OriginalFuncs.insert(S.
F);
363 auto [Begin, End] = SM[
F];
364 updateCallSites(
F, AllSpecs.
begin() + Begin, AllSpecs.
begin() + End);
367 promoteConstantStackValues();
371void FunctionSpecializer::removeDeadFunctions() {
374 <<
F->getName() <<
"\n");
377 F->eraseFromParent();
379 FullySpecialized.clear();
391 Metrics.analyzeBasicBlock(&BB, (GetTTI)(*
F), EphValues);
394 <<
F->getName() <<
" is " <<
Metrics.NumInsts
395 <<
" instructions\n");
420 if (isArgumentInteresting(&
Arg))
427 for (
User *U :
F->users()) {
428 if (!isa<CallInst>(U) && !isa<InvokeInst>(U))
430 auto &CS = *cast<CallBase>(U);
433 if (CS.getCalledFunction() !=
F)
438 if (CS.hasFnAttr(Attribute::MinSize))
450 Constant *
C = getCandidateConstant(CS.getArgOperand(
A->getArgNo()));
453 LLVM_DEBUG(
dbgs() <<
"FnSpecialization: Found interesting argument "
454 <<
A->getName() <<
" : " <<
C->getNameOrAsOperand()
463 if (
auto It = UM.
find(S); It != UM.
end()) {
470 if (CS.getFunction() ==
F)
472 const unsigned Index = It->second;
479 getSpecializationBonus(
A.Formal,
A.Actual, Solver.
getLoopInfo(*
F));
487 if (CS.getFunction() !=
F)
489 const unsigned Index = AllSpecs.
size() - 1;
492 It->second.second =
Index + 1;
500bool FunctionSpecializer::isCandidateFunction(
Function *
F) {
501 if (
F->isDeclaration())
504 if (
F->hasFnAttribute(Attribute::NoDuplicate))
511 if (Specializations.contains(
F))
515 if (
F->hasOptSize() ||
525 if (
F->hasFnAttribute(Attribute::AlwaysInline))
528 LLVM_DEBUG(
dbgs() <<
"FnSpecialization: Try function: " <<
F->getName()
545 Specializations.insert(Clone);
570 auto *
I = dyn_cast_or_null<Instruction>(U);
575 return std::numeric_limits<unsigned>::min();
586 if (
I->mayReadFromMemory() ||
I->isCast())
587 for (
auto *
User :
I->users())
598 auto &
TTI = (GetTTI)(*
F);
599 LLVM_DEBUG(
dbgs() <<
"FnSpecialization: Analysing bonus for constant: "
600 <<
C->getNameOrAsOperand() <<
"\n");
603 for (
auto *U :
A->users()) {
612 Function *CalledFunction = dyn_cast<Function>(
C->stripPointerCasts());
617 auto &CalleeTTI = (GetTTI)(*CalledFunction);
625 for (
User *U :
A->users()) {
626 if (!isa<CallInst>(U) && !isa<InvokeInst>(U))
628 auto *CS = cast<CallBase>(U);
629 if (CS->getCalledOperand() !=
A)
646 getInlineCost(*CS, CalledFunction, Params, CalleeTTI, GetAC, GetTLI);
651 Bonus += Params.DefaultThreshold;
656 <<
" for user " << *U <<
"\n");
659 return TotalCost + Bonus;
664bool FunctionSpecializer::isArgumentInteresting(
Argument *
A) {
671 Type *ArgTy =
A->getType();
683 if (
A->hasByValAttr() && !
A->getParent()->onlyReadsMemory())
692 LLVM_DEBUG(
dbgs() <<
"FnSpecialization: Nothing to do, parameter "
693 <<
A->getNameOrAsOperand() <<
" is already constant\n");
697 LLVM_DEBUG(
dbgs() <<
"FnSpecialization: Found interesting parameter "
698 <<
A->getNameOrAsOperand() <<
"\n");
705Constant *FunctionSpecializer::getCandidateConstant(
Value *V) {
706 if (isa<PoisonValue>(V))
710 if (
auto *GV = dyn_cast<GlobalVariable>(V)) {
716 if (!GV->getValueType()->isSingleValueType())
728 assert(
V->getType()->isIntegerTy() &&
"Non-integral constant range");
738void FunctionSpecializer::updateCallSites(
Function *
F,
const Spec *Begin,
742 for (
User *U :
F->users())
743 if (
auto *CS = dyn_cast<CallBase>(U);
744 CS && CS->getCalledFunction() ==
F &&
748 unsigned NCallsLeft = ToUpdate.
size();
750 bool ShouldDecrementCount = CS->getFunction() ==
F;
753 const Spec *BestSpec =
nullptr;
755 if (!S.Clone || (BestSpec && S.Gain <= BestSpec->
Gain))
759 unsigned ArgNo = Arg.Formal->getArgNo();
760 return getCandidateConstant(CS->getArgOperand(ArgNo)) != Arg.Actual;
770 CS->setCalledFunction(BestSpec->
Clone);
771 ShouldDecrementCount =
true;
774 if (ShouldDecrementCount)
780 if (NCallsLeft == 0) {
782 FullySpecialized.insert(
F);
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static cl::opt< bool > ForceSpecialization("force-specialization", cl::init(false), cl::Hidden, cl::desc("Force function specialization for every call site with a constant " "argument"))
static void removeSSACopy(Function &F)
static cl::opt< unsigned > MaxClones("funcspec-max-clones", cl::init(3), cl::Hidden, cl::desc("The maximum number of clones allowed for a single function " "specialization"))
static InstructionCost getUserBonus(User *U, llvm::TargetTransformInfo &TTI, const LoopInfo &LI)
static cl::opt< unsigned > AvgLoopIters("funcspec-avg-loop-iters", cl::init(10), cl::Hidden, cl::desc("Average loop iteration count"))
static Function * cloneCandidateFunction(Function *F)
Clone the function F and remove the ssa_copy intrinsics added by the SCCPSolver in the cloned version...
static cl::opt< unsigned > MinFunctionSize("funcspec-min-function-size", cl::init(100), cl::Hidden, cl::desc("Don't specialize functions that have less than this number of " "instructions"))
static cl::opt< bool > SpecializeOnAddress("funcspec-on-address", cl::init(false), cl::Hidden, cl::desc("Enable function specialization on the address of global values"))
static cl::opt< bool > SpecializeLiteralConstant("funcspec-for-literal-constant", cl::init(false), cl::Hidden, cl::desc("Enable specialization of functions that take a literal constant as an " "argument"))
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
an instruction to allocate memory on the stack
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
void clear(IRUnitT &IR, llvm::StringRef Name)
Clear any cached analysis results for a single unit of IR.
This class represents an incoming formal argument to a Function.
LLVM Basic Block Representation.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
This class represents a function call, abstracting a target machine's calling convention.
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
const APInt * getSingleElement() const
If this set contains a single element, return it, otherwise return null.
bool isSingleElement() const
Return true if this set contains exactly one member.
This is an important base class in LLVM.
static Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&... Args)
bool run()
Attempt to specialize functions in the module to enable constant propagation across function boundari...
FunctionType * getFunctionType() const
Returns the FunctionType for me.
const BasicBlock & front() const
@ InternalLinkage
Rename collisions when linking (static functions).
Represents the cost of inlining a function.
int getCostDelta() const
Get the cost delta from the threshold for inlining.
static InstructionCost getInvalid(CostType Val=0)
void print(raw_ostream &OS) const
unsigned getLoopDepth(const BlockT *BB) const
Return the loop nesting level of the specified block.
void visitCall(CallInst &I)
void solveWhileResolvedUndefsIn(Module &M)
const LoopInfo & getLoopInfo(Function &F)
void addArgumentTrackedFunction(Function *F)
const ValueLatticeElement & getLatticeValueFor(Value *V) const
bool isBlockExecutable(BasicBlock *BB) const
bool markBlockExecutable(BasicBlock *BB)
markBlockExecutable - This method can be used by clients to mark all of the blocks that are known to ...
void markArgInFuncSpecialization(Function *F, const SmallVectorImpl< ArgInfo > &Args)
Mark the constant arguments of a new function specialization.
void markFunctionUnreachable(Function *F)
Mark all of the blocks in function F non-executable.
bool isArgumentTrackedFunction(Function *F)
Returns true if the given function is in the solver's set of argument-tracked functions.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isPointerTy() const
True if this is an instance of PointerType.
bool isSingleValueType() const
Return true if the type is a valid type for a register in codegen.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
A Use represents the edge between a Value definition and its users.
This class represents lattice values for constants.
const ConstantRange & getConstantRange(bool UndefAllowed=true) const
Returns the constant range for this value.
bool isConstantRange(bool UndefAllowed=true) const
Returns true if this value is a constant range.
bool isUnknownOrUndef() const
Constant * getConstant() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
iterator_range< user_iterator > users()
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
StringRef getName() const
Return a constant reference to the value's name.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
@ C
The default llvm calling convention, compatible with C.
const int IndirectCallThreshold
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
hash_code hash_value(const FixedPointSemantics &Val)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
InlineCost getInlineCost(CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< const TargetLibraryInfo &(Function &)> GetTLI, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
Function * CloneFunction(Function *F, ValueToValueMapTy &VMap, ClonedCodeInfo *CodeInfo=nullptr)
Return a copy of the specified function and add it to that function's module.
Helper struct shared between Function Specialization and SCCP Solver.
Utility to calculate the size and a few similar metrics for a set of basic blocks.
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
static unsigned getHashValue(const SpecSig &S)
static bool isEqual(const SpecSig &LHS, const SpecSig &RHS)
static SpecSig getEmptyKey()
static SpecSig getTombstoneKey()
An information struct used to provide DenseMap with the various necessary components for a given valu...
SmallVector< ArgInfo, 4 > Args
SmallVector< CallBase * > CallSites