Go to the documentation of this file.
64 #define DEBUG_TYPE "function-specialization"
66 STATISTIC(NumFuncSpecialized,
"Number of functions specialized");
70 cl::desc(
"Force function specialization for every call site with a "
71 "constant argument"));
75 cl::desc(
"The maximum number of iterations function specialization is run"),
80 cl::desc(
"The maximum number of clones allowed for a single function "
85 "func-specialization-size-threshold",
cl::Hidden,
86 cl::desc(
"Don't specialize functions that have less than this theshold "
87 "number of instructions"),
92 cl::desc(
"Average loop iteration count cost"),
97 cl::desc(
"Enable function specialization on the address of global values"));
108 cl::desc(
"Enable specialization of functions that take a literal constant "
114 struct SpecializationInfo {
142 Value *StoreValue =
nullptr;
154 if (
auto *
Store = dyn_cast<StoreInst>(
User)) {
156 if (StoreValue ||
Store->isVolatile())
158 StoreValue =
Store->getValueOperand();
164 return dyn_cast_or_null<Constant>(StoreValue);
175 if (
auto *ConstVal = dyn_cast<ConstantInt>(Val))
177 auto *Alloca = dyn_cast<AllocaInst>(Val);
178 if (!Alloca || !Alloca->getAllocatedType()->isIntegerTy())
209 for (
auto *
F : WorkList) {
211 for (
auto *
User :
F->users()) {
213 auto *Call = dyn_cast<CallInst>(
User);
217 bool Changed =
false;
218 for (
const Use &U : Call->args()) {
219 unsigned Idx = Call->getArgOperandNo(&U);
220 Value *ArgOp = Call->getArgOperand(Idx);
223 if (!Call->onlyReadsMemory(Idx) || !ArgOpType->
isPointerTy())
233 if (ArgOpType != ConstVal->getType())
236 Call->setArgOperand(Idx, GV);
252 auto *II = dyn_cast<IntrinsicInst>(&Inst);
255 if (II->getIntrinsicID() != Intrinsic::ssa_copy)
257 Inst.replaceAllUsesWith(II->getOperand(0));
258 Inst.eraseFromParent();
269 class FunctionSpecializer {
289 : Solver(Solver), GetAC(GetAC), GetTTI(GetTTI), GetTLI(GetTLI) {}
291 ~FunctionSpecializer() {
293 removeDeadInstructions();
294 removeDeadFunctions();
302 bool Changed =
false;
303 for (
auto *
F : Candidates) {
304 if (!isCandidateFunction(
F))
307 auto Cost = getSpecializationCost(
F);
308 if (!Cost.isValid()) {
310 dbgs() <<
"FnSpecialization: Invalid specialization cost.\n");
315 <<
F->getName() <<
" is " << Cost <<
"\n");
318 if (!calculateGains(
F, Cost, Specializations)) {
319 LLVM_DEBUG(
dbgs() <<
"FnSpecialization: No possible constants found\n");
324 for (
auto &Entry : Specializations)
325 specializeFunction(
F, Entry.second, WorkList);
328 updateSpecializedFuncs(Candidates, WorkList);
329 NumFuncSpecialized += NbFunctionsSpecialized;
333 void removeDeadInstructions() {
334 for (
auto *
I : ReplacedWithConstant) {
335 LLVM_DEBUG(
dbgs() <<
"FnSpecialization: Removing dead instruction " << *
I
337 I->eraseFromParent();
339 ReplacedWithConstant.clear();
342 void removeDeadFunctions() {
343 for (
auto *
F : FullySpecialized) {
345 <<
F->getName() <<
"\n");
346 F->eraseFromParent();
348 FullySpecialized.clear();
363 <<
"\nFnSpecialization: with " << *Const <<
"\n");
367 for (
auto *U : V->
users())
368 if (
auto *
I = dyn_cast<Instruction>(U))
370 UseInsts.push_back(
I);
374 for (
auto *
I : UseInsts)
378 if (
auto *
I = dyn_cast<Instruction>(V)) {
379 if (
I->isSafeToRemove()) {
380 ReplacedWithConstant.push_back(
I);
390 unsigned NbFunctionsSpecialized = 0;
401 Metrics.analyzeBasicBlock(&
BB, (GetTTI)(*
F), EphValues);
404 <<
F->getName() <<
" is " <<
Metrics.NumInsts
405 <<
" instructions\n");
434 if (!isArgumentInteresting(&FormalArg, ActualArgs)) {
436 << FormalArg.getNameOrAsOperand()
437 <<
" is not interesting\n");
441 for (
const auto &Entry : ActualArgs) {
445 auto I = Specializations.
insert({
Call, SpecializationInfo()});
446 SpecializationInfo &
S =
I.first->second;
451 S.Gain += getSpecializationBonus(&FormalArg, ActualArg);
452 S.Args.push_back({&FormalArg, ActualArg});
458 [](
const auto &Entry) {
return Entry.second.Gain <= 0; });
465 return L.second.Gain >
R.second.Gain;
470 LLVM_DEBUG(
dbgs() <<
"FnSpecialization: Number of candidates exceed "
471 <<
"the maximum number of clones threshold.\n"
472 <<
"FnSpecialization: Truncating worklist to "
477 LLVM_DEBUG(
dbgs() <<
"FnSpecialization: Specializations for function "
478 <<
F->getName() <<
"\n";
479 for (
const auto &Entry
481 dbgs() <<
"FnSpecialization: Gain = " << Entry.second.Gain
484 dbgs() <<
"FnSpecialization: FormalArg = "
485 <<
Arg.Formal->getNameOrAsOperand()
487 <<
Arg.Actual->getNameOrAsOperand() <<
"\n";
490 return !WorkList.empty();
495 if (SpecializedFuncs.contains(
F))
499 if (
F->hasOptSize() ||
509 if (
F->hasFnAttribute(Attribute::AlwaysInline))
512 LLVM_DEBUG(
dbgs() <<
"FnSpecialization: Try function: " <<
F->getName()
517 void specializeFunction(
Function *
F, SpecializationInfo &
S,
523 rewriteCallSites(Clone,
S.Args,
Mappings);
531 WorkList.push_back(Clone);
532 NbFunctionsSpecialized++;
536 if (
F->getNumUses() == 0 ||
all_of(
F->users(), [
F](
User *U) {
537 if (auto *CS = dyn_cast<CallBase>(U))
538 return CS->getFunction() == F;
542 FullySpecialized.insert(
F);
563 unsigned Penalty = NbFunctionsSpecialized + 1;
569 auto *
I = dyn_cast_or_null<Instruction>(U);
580 if (
I->mayReadFromMemory() ||
I->isCast())
581 for (
auto *
User :
I->users())
582 Cost += getUserBonus(
User,
TTI, LI);
595 auto &
TTI = (GetTTI)(*
F);
596 LLVM_DEBUG(
dbgs() <<
"FnSpecialization: Analysing bonus for constant: "
597 <<
C->getNameOrAsOperand() <<
"\n");
600 for (
auto *U :
A->users()) {
601 TotalCost += getUserBonus(U,
TTI, LI);
609 Function *CalledFunction = dyn_cast<Function>(
C->stripPointerCasts());
614 auto &CalleeTTI = (GetTTI)(*CalledFunction);
622 for (
User *U :
A->users()) {
623 if (!isa<CallInst>(U) && !isa<InvokeInst>(U))
625 auto *CS = cast<CallBase>(U);
626 if (CS->getCalledOperand() != A)
641 getInlineCost(*CS, CalledFunction, Params, CalleeTTI, GetAC, GetTLI);
646 Bonus += Params.DefaultThreshold;
651 <<
" for user " << *U <<
"\n");
654 return TotalCost + Bonus;
668 bool isArgumentInteresting(
Argument *A,
672 if (!
A->getType()->isSingleValueType() ||
A->user_empty())
679 <<
A->getNameOrAsOperand()
680 <<
" is already constant?\n");
702 LLVM_DEBUG(
dbgs() <<
"FnSpecialization: Found interesting argument "
703 <<
A->getNameOrAsOperand() <<
"\n");
709 void getPossibleConstants(
Argument *A,
715 if (
A->hasByValAttr() && !
F->onlyReadsMemory())
719 for (
User *U :
F->users()) {
720 if (!isa<CallInst>(U) && !isa<InvokeInst>(U))
722 auto &CS = *cast<CallBase>(U);
725 if (CS.hasFnAttr(Attribute::MinSize))
733 auto *V = CS.getArgOperand(
A->getArgNo());
734 if (isa<PoisonValue>(V))
738 if (
auto *GV = dyn_cast<GlobalVariable>(V)) {
741 if (!GV->isConstant())
745 if (!GV->getValueType()->isSingleValueType())
751 Constants.push_back({&CS, cast<Constant>(V)});
765 assert(!
Args.empty() &&
"Specialization without arguments");
769 for (
auto *U :
F->users()) {
770 if (!isa<CallInst>(U) && !isa<InvokeInst>(U))
772 auto &CS = *cast<CallBase>(U);
773 if (!CS.getCalledFunction() || CS.getCalledFunction() !=
F)
775 CallSitesToRewrite.push_back(&CS);
779 <<
F->getName() <<
" with " << Clone->
getName() <<
"\n");
781 for (
auto *CS : CallSitesToRewrite) {
783 << CS->getFunction()->getName() <<
" ->" << *CS
786 (CS->getFunction() == Clone &&
789 unsigned ArgNo =
Arg.Formal->getArgNo();
790 return CS->getArgOperand(ArgNo) ==
Mappings[
Arg.Formal];
794 unsigned ArgNo =
Arg.Formal->getArgNo();
795 return CS->getArgOperand(ArgNo) ==
Arg.Actual;
797 CS->setCalledFunction(Clone);
804 for (
auto *
F : WorkList) {
805 SpecializedFuncs.insert(
F);
809 if (
F->hasExactDefinition() && !
F->hasFnAttribute(Attribute::Naked))
813 Candidates.push_back(
F);
819 LLVM_DEBUG(
dbgs() <<
"FnSpecialization: Replaced constant argument: "
820 <<
Arg.getNameOrAsOperand() <<
"\n");
833 FunctionSpecializer
FS(Solver, GetAC, GetTTI, GetTLI);
834 bool Changed =
false;
839 if (
F.isDeclaration())
841 if (
F.hasFnAttribute(Attribute::NoDuplicate))
844 LLVM_DEBUG(
dbgs() <<
"\nFnSpecialization: Analysing decl: " <<
F.getName()
856 <<
"FnSpecialization: Doesn't have local linkage, or "
857 <<
"has its address taken\n");
872 G.removeDeadConstantUsers();
883 if (TrackedFuncs.empty()) {
889 auto RunSCCPSolver = [&](
auto &WorkList) {
890 bool ResolvedUndefs =
true;
892 while (ResolvedUndefs) {
900 ResolvedUndefs =
false;
903 ResolvedUndefs =
true;
906 for (
auto *
F : WorkList) {
913 Changed |=
FS.tryToReplaceWithConstant(&
I);
920 for (
auto *
F : FuncDecls)
925 RunSCCPSolver(FuncDecls);
930 FS.specializeFunctions(FuncDecls, WorkList)) {
931 LLVM_DEBUG(
dbgs() <<
"FnSpecialization: Finished iteration " <<
I <<
"\n");
934 RunSCCPSolver(WorkList);
943 LLVM_DEBUG(
dbgs() <<
"FnSpecialization: Number of specializations = "
944 << NumFuncSpecialized <<
"\n");
This class represents an incoming formal argument to a Function.
std::pair< CallBase *, Constant * > CallArgBinding
const ConstantRange & getConstantRange(bool UndefAllowed=true) const
Returns the constant range for this value.
This is an optimization pass for GlobalISel generic memory operations.
iterator erase(const_iterator CI)
void remove_if(Predicate Pred)
Remove the elements that match the predicate.
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
A parsed version of the target data layout string in and methods for querying it.
bool isPointerTy() const
True if this is an instance of PointerType.
Helper struct for bundling up the analysis results per function for IPSCCP.
static cl::opt< bool > SpecializeOnAddresses("func-specialization-on-address", cl::init(false), cl::Hidden, cl::desc("Enable function specialization on the address of global values"))
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Utility to calculate the size and a few similar metrics for a set of basic blocks.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
static bool isConstant(const ValueLatticeElement &LV)
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Constant * getConstant(const ValueLatticeElement &LV) const
Helper to return a Constant if LV is either a constant or a constant range with a single element.
The instances of the Type class are immutable: once they are created, they are never changed.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
bool markBlockExecutable(BasicBlock *BB)
markBlockExecutable - This method can be used by clients to mark all of the blocks that are known to ...
void visit(Instruction *I)
void print(raw_ostream &OS) const
void addArgumentTrackedFunction(Function *F)
static cl::opt< unsigned > AvgLoopIterationCount("func-specialization-avg-iters-cost", cl::Hidden, cl::desc("Average loop iteration count cost"), cl::init(10))
void markFunctionUnreachable(Function *F)
Mark all of the blocks in function F non-executable.
LLVM Basic Block Representation.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
bool isSingleValueType() const
Return true if the type is a valid type for a register in codegen.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
bool isUnknownOrUndef() const
InlineCost getInlineCost(CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< const TargetLibraryInfo &(Function &)> GetTLI, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
void markOverdefined(Value *V)
markOverdefined - Mark the specified value overdefined.
(vector float) vec_cmpeq(*A, *B) C
bool isSingleElement() const
Return true if this set contains exactly one member.
Represents the cost of inlining a function.
void addAnalysis(Function &F, AnalysisResultsForFn A)
static cl::opt< unsigned > SmallFunctionThreshold("func-specialization-size-threshold", cl::Hidden, cl::desc("Don't specialize functions that have less than this theshold " "number of instructions"), cl::init(100))
std::pair< CallBase *, SpecializationInfo > CallSpecBinding
STATISTIC(NumFunctions, "Total number of functions")
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
static bool tryToReplaceWithConstant(SCCPSolver &Solver, Value *V)
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
@ InternalLinkage
Rename collisions when linking (static functions).
bool canTrackGlobalVariableInterprocedurally(GlobalVariable *GV)
Determine if the value maintained in the given global variable can be tracked interprocedurally.
An efficient, type-erasing, non-owning reference to a callable.
A MapVector that performs no allocations if smaller than a certain size.
void visitCall(CallInst &I)
static cl::opt< unsigned > FuncSpecializationMaxIters("func-specialization-max-iters", cl::Hidden, cl::desc("The maximum number of iterations function specialization is run"), cl::init(1))
const ValueLatticeElement & getLatticeValueFor(Value *V) const
This is an important base class in LLVM.
const int IndirectCallThreshold
initializer< Ty > init(const Ty &Val)
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
SCCPSolver - This interface class is a general purpose solver for Sparse Conditional Constant Propaga...
int getCostDelta() const
Get the cost delta from the threshold for inlining.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
VectorType takeVector()
Clear the MapVector and return the underlying vector.
static cl::opt< unsigned > MaxClonesThreshold("func-specialization-max-clones", cl::Hidden, cl::desc("The maximum number of clones allowed for a single function " "specialization"), cl::init(3))
print Print MemDeps of function
A Module instance is used to store all the information related to an LLVM module.
bool runFunctionSpecialization(Module &M, const DataLayout &DL, std::function< TargetLibraryInfo &(Function &)> GetTLI, std::function< TargetTransformInfo &(Function &)> GetTTI, std::function< AssumptionCache &(Function &)> GetAC, function_ref< AnalysisResultsForFn(Function &)> GetAnalysis)
unsigned getLoopDepth(const BlockT *BB) const
Return the loop nesting level of the specified block.
void markArgInFuncSpecialization(Function *F, const SmallVectorImpl< ArgInfo > &Args)
Mark the constant arguments of a new function specialization.
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
A cache of @llvm.assume calls within a function.
Type * getType() const
All values are typed, get the type of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
static Constant * getConstantStackValue(CallInst *Call, Value *Val, SCCPSolver &Solver)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
void removeLatticeValueFor(Value *V)
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
This class represents lattice values for constants.
static void constantArgPropagation(FuncList &WorkList, Module &M, SCCPSolver &Solver)
Function * CloneFunction(Function *F, ValueToValueMapTy &VMap, ClonedCodeInfo *CodeInfo=nullptr)
Return a copy of the specified function and add it to that function's module.
StringRef getName() const
Return a constant reference to the value's name.
this could be done in SelectionDAGISel along with other special for
Helper struct shared between Function Specialization and SCCP Solver.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
bool isOverdefined() const
bool isBlockExecutable(BasicBlock *BB) const
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
void stable_sort(R &&Range)
Provides information about what library functions are available for the current target.
bool resolvedUndefsIn(Function &F)
resolvedUndefsIn - While solving the dataflow for a function, we assume that branches on undef values...
static Constant * getPromotableAlloca(AllocaInst *Alloca, CallInst *Call)
static void removeSSACopy(Function &F)
bool canTrackArgumentsInterprocedurally(Function *F)
Determine if the values of the given function's arguments can be tracked interprocedurally.
bool isConstantRange(bool UndefAllowed=true) const
Returns true if this value is a constant range.
static const uint32_t IV[8]
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void trackValueOfGlobalVariable(GlobalVariable *GV)
trackValueOfGlobalVariable - Clients can use this method to inform the SCCPSolver that it should trac...
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
static cl::opt< bool > EnableSpecializationForLiteralConstant("function-specialization-for-literal-constant", cl::init(false), cl::Hidden, cl::desc("Enable specialization of functions that take a literal constant " "as an argument."))
This class represents a function call, abstracting a target machine's calling convention.
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
void solve()
Solve - Solve for constants and executable blocks.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
an instruction to allocate memory on the stack
static cl::opt< bool > ForceFunctionSpecialization("force-function-specialization", cl::init(false), cl::Hidden, cl::desc("Force function specialization for every call site with a " "constant argument"))
void addTrackedFunction(Function *F)
addTrackedFunction - If the SCCP solver is supposed to track calls into and out of the specified func...
LLVM Value Representation.
iterator_range< user_iterator > users()
SmallPtrSetImpl< Function * > & getArgumentTrackedFunctions()
Return a reference to the set of argument tracked functions.
static bool isOverdefined(const ValueLatticeElement &LV)
A Use represents the edge between a Value definition and its users.