56#define DEBUG_TYPE "memprof-context-disambiguation"
59 "Number of function clones created during whole program analysis");
61 "Number of function clones created during ThinLTO backend");
63 "Number of functions that had clones created during ThinLTO backend");
65 FunctionCloneDuplicatesThinBackend,
66 "Number of function clone duplicates detected during ThinLTO backend");
67STATISTIC(AllocTypeNotCold,
"Number of not cold static allocations (possibly "
68 "cloned) during whole program analysis");
69STATISTIC(AllocTypeCold,
"Number of cold static allocations (possibly cloned) "
70 "during whole program analysis");
72 "Number of not cold static allocations (possibly cloned) during "
74STATISTIC(AllocTypeColdThinBackend,
"Number of cold static allocations "
75 "(possibly cloned) during ThinLTO backend");
77 "Number of original (not cloned) allocations with memprof profiles "
78 "during ThinLTO backend");
80 AllocVersionsThinBackend,
81 "Number of allocation versions (including clones) during ThinLTO backend");
83 "Maximum number of allocation versions created for an original "
84 "allocation during ThinLTO backend");
86 "Number of unclonable ambigous allocations during ThinLTO backend");
88 "Number of edges removed due to mismatched callees (profiled vs IR)");
90 "Number of profiled callees found via tail calls");
92 "Aggregate depth of profiled callees found via tail calls");
94 "Maximum depth of profiled callees found via tail calls");
96 "Number of profiled callees found via multiple tail call chains");
97STATISTIC(DeferredBackedges,
"Number of backedges with deferred cloning");
98STATISTIC(NewMergedNodes,
"Number of new nodes created during merging");
99STATISTIC(NonNewMergedNodes,
"Number of non new nodes used during merging");
101 "Number of missing alloc nodes for context ids");
103 "Number of calls skipped during cloning due to unexpected operand");
105 "Number of callsites assigned to call multiple non-matching clones");
106STATISTIC(TotalMergeInvokes,
"Number of merge invocations for nodes");
107STATISTIC(TotalMergeIters,
"Number of merge iterations for nodes");
108STATISTIC(MaxMergeIters,
"Max merge iterations for nodes");
109STATISTIC(NumImportantContextIds,
"Number of important context ids");
110STATISTIC(NumFixupEdgeIdsInserted,
"Number of fixup edge ids inserted");
111STATISTIC(NumFixupEdgesAdded,
"Number of fixup edges added");
112STATISTIC(NumFixedContexts,
"Number of contexts with fixed edges");
114 "Number of aliasees prevailing in a different module than its alias");
119 cl::desc(
"Specify the path prefix of the MemProf dot files."));
123 cl::desc(
"Export graph to dot files."));
128 cl::desc(
"Iteratively apply merging on a node to catch new callers"));
138 "memprof-dot-scope",
cl::desc(
"Scope of graph to export to dot"),
143 "Export only nodes with contexts feeding given "
144 "-memprof-dot-alloc-id"),
146 "Export only nodes with given -memprof-dot-context-id")));
150 cl::desc(
"Id of alloc to export if -memprof-dot-scope=alloc "
151 "or to highlight if -memprof-dot-scope=all"));
155 cl::desc(
"Id of context to export if -memprof-dot-scope=context or to "
156 "highlight otherwise"));
160 cl::desc(
"Dump CallingContextGraph to stdout after each stage."));
164 cl::desc(
"Perform verification checks on CallingContextGraph."));
168 cl::desc(
"Perform frequent verification checks on nodes."));
171 "memprof-import-summary",
172 cl::desc(
"Import summary to use for testing the ThinLTO backend via opt"),
178 cl::desc(
"Max depth to recursively search for missing "
179 "frames through tail calls."));
184 cl::desc(
"Allow cloning of callsites involved in recursive cycles"));
188 cl::desc(
"Allow cloning of contexts through recursive cycles"));
195 cl::desc(
"Merge clones before assigning functions"));
204 cl::desc(
"Allow cloning of contexts having recursive cycles"));
210 cl::desc(
"Minimum absolute count for promoted target to be inlinable"));
214 "enable-memprof-context-disambiguation",
cl::Hidden,
215 cl::desc(
"Enable MemProf context disambiguation"));
221 cl::desc(
"Linking with hot/cold operator new interfaces"));
226 "Require target function definition when promoting indirect calls"));
233 cl::desc(
"Number of largest cold contexts to consider important"));
237 cl::desc(
"Enables edge fixup for important contexts"));
259template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
260class CallsiteContextGraph {
262 CallsiteContextGraph() =
default;
263 CallsiteContextGraph(
const CallsiteContextGraph &) =
default;
264 CallsiteContextGraph(CallsiteContextGraph &&) =
default;
268 EmitRemark =
nullptr,
269 bool AllowExtraAnalysis =
false);
273 void identifyClones();
280 bool assignFunctions();
286 EmitRemark =
nullptr)
const;
289 const CallsiteContextGraph &CCG) {
295 const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>;
297 const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>;
299 void exportToDot(std::string Label)
const;
302 struct FuncInfo final
303 :
public std::pair<FuncTy *, unsigned > {
304 using Base = std::pair<FuncTy *, unsigned>;
306 FuncInfo(FuncTy *
F =
nullptr,
unsigned CloneNo = 0) :
Base(
F, CloneNo) {}
307 explicit operator bool()
const {
return this->first !=
nullptr; }
308 FuncTy *func()
const {
return this->first; }
309 unsigned cloneNo()
const {
return this->second; }
313 struct CallInfo final :
public std::pair<CallTy, unsigned > {
314 using Base = std::pair<CallTy, unsigned>;
316 CallInfo(CallTy
Call =
nullptr,
unsigned CloneNo = 0)
318 explicit operator bool()
const {
return (
bool)this->first; }
319 CallTy call()
const {
return this->first; }
320 unsigned cloneNo()
const {
return this->second; }
321 void setCloneNo(
unsigned N) { this->second =
N; }
323 if (!
operator bool()) {
329 OS <<
"\t(clone " << cloneNo() <<
")";
355 bool Recursive =
false;
382 std::vector<std::shared_ptr<ContextEdge>> CalleeEdges;
386 std::vector<std::shared_ptr<ContextEdge>> CallerEdges;
390 bool useCallerEdgesForContextInfo()
const {
395 assert(!CalleeEdges.empty() || CallerEdges.empty() || IsAllocation ||
413 for (
auto &Edge : CalleeEdges.empty() ? CallerEdges : CalleeEdges)
414 Count += Edge->getContextIds().size();
418 CalleeEdges, useCallerEdgesForContextInfo()
420 : std::vector<std::shared_ptr<ContextEdge>>());
421 for (
const auto &Edge : Edges)
428 uint8_t computeAllocType()
const {
433 CalleeEdges, useCallerEdgesForContextInfo()
435 : std::vector<std::shared_ptr<ContextEdge>>());
436 for (
const auto &Edge : Edges) {
447 bool emptyContextIds()
const {
449 CalleeEdges, useCallerEdgesForContextInfo()
451 : std::vector<std::shared_ptr<ContextEdge>>());
452 for (
const auto &Edge : Edges) {
453 if (!Edge->getContextIds().empty())
460 std::vector<ContextNode *> Clones;
463 ContextNode *CloneOf =
nullptr;
465 ContextNode(
bool IsAllocation) : IsAllocation(IsAllocation),
Call() {}
467 ContextNode(
bool IsAllocation, CallInfo
C)
468 : IsAllocation(IsAllocation),
Call(
C) {}
470 void addClone(ContextNode *Clone) {
472 CloneOf->Clones.push_back(Clone);
473 Clone->CloneOf = CloneOf;
475 Clones.push_back(Clone);
477 Clone->CloneOf =
this;
481 ContextNode *getOrigNode() {
488 unsigned int ContextId);
490 ContextEdge *findEdgeFromCallee(
const ContextNode *Callee);
491 ContextEdge *findEdgeFromCaller(
const ContextNode *Caller);
492 void eraseCalleeEdge(
const ContextEdge *Edge);
493 void eraseCallerEdge(
const ContextEdge *Edge);
495 void setCall(CallInfo
C) {
Call = std::move(
C); }
497 bool hasCall()
const {
return (
bool)
Call.call(); }
503 bool isRemoved()
const {
539 bool IsBackedge =
false;
546 : Callee(Callee), Caller(Caller), AllocTypes(
AllocType),
547 ContextIds(std::move(ContextIds)) {}
553 inline void clear() {
563 inline bool isRemoved()
const {
564 if (Callee || Caller)
585 void removeNoneTypeCalleeEdges(ContextNode *
Node);
586 void removeNoneTypeCallerEdges(ContextNode *
Node);
588 recursivelyRemoveNoneTypeCalleeEdges(ContextNode *
Node,
594 template <
class NodeT,
class IteratorT>
595 std::vector<uint64_t>
600 ContextNode *addAllocNode(CallInfo
Call,
const FuncTy *
F);
603 template <
class NodeT,
class IteratorT>
604 void addStackNodesForMIB(
608 std::map<uint64_t, uint32_t> &TotalSizeToContextIdTopNCold);
613 void updateStackNodes();
622 void fixupImportantContexts();
626 void handleCallsitesWithMultipleTargets();
629 void markBackedges();
639 bool partitionCallsByCallee(
641 std::vector<std::pair<CallInfo, ContextNode *>> &NewCallToNode);
648 std::map<const ContextNode *, const FuncTy *> NodeToCallingFunc;
655 using EdgeIter =
typename std::vector<std::shared_ptr<ContextEdge>>
::iterator;
660 struct CallContextInfo {
664 std::vector<uint64_t> StackIds;
678 void removeEdgeFromGraph(ContextEdge *Edge, EdgeIter *EI =
nullptr,
679 bool CalleeIter =
true);
687 void assignStackNodesPostOrder(
701 void propagateDuplicateContextIds(
707 void connectNewNode(ContextNode *NewNode, ContextNode *OrigNode,
715 return static_cast<const DerivedCCG *
>(
this)->getStackId(IdOrIndex);
725 calleesMatch(CallTy
Call, EdgeIter &EI,
730 const FuncTy *getCalleeFunc(CallTy
Call) {
731 return static_cast<DerivedCCG *
>(
this)->getCalleeFunc(
Call);
737 bool calleeMatchesFunc(
738 CallTy
Call,
const FuncTy *Func,
const FuncTy *CallerFunc,
739 std::vector<std::pair<CallTy, FuncTy *>> &FoundCalleeChain) {
740 return static_cast<DerivedCCG *
>(
this)->calleeMatchesFunc(
741 Call, Func, CallerFunc, FoundCalleeChain);
745 bool sameCallee(CallTy Call1, CallTy Call2) {
746 return static_cast<DerivedCCG *
>(
this)->sameCallee(Call1, Call2);
751 std::vector<uint64_t> getStackIdsWithContextNodesForCall(CallTy
Call) {
752 return static_cast<DerivedCCG *
>(
this)->getStackIdsWithContextNodesForCall(
758 return static_cast<DerivedCCG *
>(
this)->getLastStackId(
Call);
764 static_cast<DerivedCCG *
>(
this)->updateAllocationCall(
Call,
AllocType);
769 return static_cast<const DerivedCCG *
>(
this)->getAllocationCallType(
Call);
774 void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc) {
775 static_cast<DerivedCCG *
>(
this)->updateCall(CallerCall, CalleeFunc);
781 FuncInfo cloneFunctionForCallsite(
783 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
784 return static_cast<DerivedCCG *
>(
this)->cloneFunctionForCallsite(
785 Func,
Call, CallMap, CallsWithMetadataInFunc, CloneNo);
790 std::string getLabel(
const FuncTy *Func,
const CallTy
Call,
791 unsigned CloneNo)
const {
792 return static_cast<const DerivedCCG *
>(
this)->getLabel(Func,
Call, CloneNo);
796 ContextNode *createNewNode(
bool IsAllocation,
const FuncTy *
F =
nullptr,
797 CallInfo
C = CallInfo()) {
798 NodeOwner.push_back(std::make_unique<ContextNode>(IsAllocation,
C));
799 auto *NewNode = NodeOwner.back().get();
801 NodeToCallingFunc[NewNode] =
F;
802 NewNode->NodeId = NodeOwner.size();
807 ContextNode *getNodeForInst(
const CallInfo &
C);
808 ContextNode *getNodeForAlloc(
const CallInfo &
C);
809 ContextNode *getNodeForStackId(
uint64_t StackId);
831 moveEdgeToNewCalleeClone(
const std::shared_ptr<ContextEdge> &Edge,
838 void moveEdgeToExistingCalleeClone(
const std::shared_ptr<ContextEdge> &Edge,
839 ContextNode *NewCallee,
840 bool NewClone =
false,
848 void moveCalleeEdgeToNewCaller(
const std::shared_ptr<ContextEdge> &Edge,
849 ContextNode *NewCaller);
860 void mergeNodeCalleeClones(
865 void findOtherCallersToShareMerge(
866 ContextNode *
Node, std::vector<std::shared_ptr<ContextEdge>> &CalleeEdges,
894 struct ImportantContextInfo {
896 std::vector<uint64_t> StackIds;
899 unsigned MaxLength = 0;
903 std::map<std::vector<uint64_t>, ContextNode *> StackIdsToNode;
912 void recordStackNode(std::vector<uint64_t> &StackIds, ContextNode *
Node,
926 auto Size = StackIds.size();
927 for (
auto Id : Ids) {
928 auto &Entry = ImportantContextIdInfo[Id];
929 Entry.StackIdsToNode[StackIds] =
Node;
931 if (
Size > Entry.MaxLength)
932 Entry.MaxLength =
Size;
941 std::vector<std::unique_ptr<ContextNode>> NodeOwner;
947 unsigned int LastContextId = 0;
950template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
952 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode;
953template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
955 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge;
956template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
958 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::FuncInfo;
959template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
961 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::CallInfo;
964class ModuleCallsiteContextGraph
965 :
public CallsiteContextGraph<ModuleCallsiteContextGraph, Function,
968 ModuleCallsiteContextGraph(
970 llvm::function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter);
973 friend CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
976 uint64_t getStackId(uint64_t IdOrIndex)
const;
978 bool calleeMatchesFunc(
979 Instruction *
Call,
const Function *Func,
const Function *CallerFunc,
980 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain);
981 bool sameCallee(Instruction *Call1, Instruction *Call2);
982 bool findProfiledCalleeThroughTailCalls(
983 const Function *ProfiledCallee,
Value *CurCallee,
unsigned Depth,
984 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain,
985 bool &FoundMultipleCalleeChains);
986 uint64_t getLastStackId(Instruction *
Call);
987 std::vector<uint64_t> getStackIdsWithContextNodesForCall(Instruction *
Call);
990 void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc);
991 CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
993 cloneFunctionForCallsite(FuncInfo &Func, CallInfo &
Call,
994 DenseMap<CallInfo, CallInfo> &CallMap,
995 std::vector<CallInfo> &CallsWithMetadataInFunc,
997 std::string getLabel(
const Function *Func,
const Instruction *
Call,
998 unsigned CloneNo)
const;
1001 llvm::function_ref<OptimizationRemarkEmitter &(
Function *)> OREGetter;
1007struct IndexCall :
public PointerUnion<CallsiteInfo *, AllocInfo *> {
1008 IndexCall() : PointerUnion() {}
1009 IndexCall(std::nullptr_t) : IndexCall() {}
1010 IndexCall(CallsiteInfo *StackNode) : PointerUnion(StackNode) {}
1011 IndexCall(AllocInfo *AllocNode) : PointerUnion(AllocNode) {}
1012 IndexCall(PointerUnion PT) : PointerUnion(PT) {}
1014 IndexCall *operator->() {
return this; }
1016 void print(raw_ostream &OS)
const {
1017 PointerUnion<CallsiteInfo *, AllocInfo *>
Base = *
this;
1042class IndexCallsiteContextGraph
1043 :
public CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
1046 IndexCallsiteContextGraph(
1047 ModuleSummaryIndex &Index,
1051 ~IndexCallsiteContextGraph() {
1056 for (
auto &
I : FunctionCalleesToSynthesizedCallsiteInfos) {
1058 for (
auto &Callsite :
I.second)
1059 FS->addCallsite(std::move(*Callsite.second));
1064 friend CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
1067 uint64_t getStackId(uint64_t IdOrIndex)
const;
1068 const FunctionSummary *getCalleeFunc(IndexCall &
Call);
1069 bool calleeMatchesFunc(
1070 IndexCall &
Call,
const FunctionSummary *Func,
1071 const FunctionSummary *CallerFunc,
1072 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain);
1073 bool sameCallee(IndexCall &Call1, IndexCall &Call2);
1074 bool findProfiledCalleeThroughTailCalls(
1075 ValueInfo ProfiledCallee, ValueInfo CurCallee,
unsigned Depth,
1076 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain,
1077 bool &FoundMultipleCalleeChains);
1078 uint64_t getLastStackId(IndexCall &
Call);
1079 std::vector<uint64_t> getStackIdsWithContextNodesForCall(IndexCall &
Call);
1082 void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc);
1083 CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
1084 IndexCall>::FuncInfo
1085 cloneFunctionForCallsite(FuncInfo &Func, CallInfo &
Call,
1086 DenseMap<CallInfo, CallInfo> &CallMap,
1087 std::vector<CallInfo> &CallsWithMetadataInFunc,
1089 std::string getLabel(
const FunctionSummary *Func,
const IndexCall &
Call,
1090 unsigned CloneNo)
const;
1091 DenseSet<GlobalValue::GUID> findAliaseeGUIDsPrevailingInDifferentModule();
1095 std::map<const FunctionSummary *, ValueInfo> FSToVIMap;
1097 const ModuleSummaryIndex &Index;
1105 DenseMap<FunctionSummary *,
1106 std::map<ValueInfo, std::unique_ptr<CallsiteInfo>>>
1107 FunctionCalleesToSynthesizedCallsiteInfos;
1118 :
public DenseMapInfo<std::pair<IndexCall, unsigned>> {};
1121 :
public DenseMapInfo<PointerUnion<CallsiteInfo *, AllocInfo *>> {};
1142template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1143bool allocTypesMatch(
1144 const std::vector<uint8_t> &InAllocTypes,
1145 const std::vector<std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>>
1149 assert(InAllocTypes.size() == Edges.size());
1151 InAllocTypes.begin(), InAllocTypes.end(), Edges.begin(), Edges.end(),
1153 const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &r) {
1157 if (l == (uint8_t)AllocationType::None ||
1158 r->AllocTypes == (uint8_t)AllocationType::None)
1160 return allocTypeToUse(l) == allocTypeToUse(r->AllocTypes);
1169template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1170bool allocTypesMatchClone(
1171 const std::vector<uint8_t> &InAllocTypes,
1172 const ContextNode<DerivedCCG, FuncTy, CallTy> *Clone) {
1173 const ContextNode<DerivedCCG, FuncTy, CallTy> *
Node = Clone->CloneOf;
1177 assert(InAllocTypes.size() ==
Node->CalleeEdges.size());
1181 for (
const auto &
E : Clone->CalleeEdges) {
1183 EdgeCalleeMap[
E->Callee] =
E->AllocTypes;
1187 for (
unsigned I = 0;
I <
Node->CalleeEdges.size();
I++) {
1188 auto Iter = EdgeCalleeMap.
find(
Node->CalleeEdges[
I]->Callee);
1190 if (Iter == EdgeCalleeMap.
end())
1198 if (allocTypeToUse(Iter->second) != allocTypeToUse(InAllocTypes[
I]))
1206template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1207typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1208CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForInst(
1209 const CallInfo &
C) {
1210 ContextNode *
Node = getNodeForAlloc(
C);
1214 return NonAllocationCallToContextNodeMap.lookup(
C);
1217template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1218typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1219CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForAlloc(
1220 const CallInfo &
C) {
1221 return AllocationCallToContextNodeMap.lookup(
C);
1224template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1225typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1226CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForStackId(
1228 auto StackEntryNode = StackEntryIdToContextNodeMap.find(StackId);
1229 if (StackEntryNode != StackEntryIdToContextNodeMap.end())
1230 return StackEntryNode->second;
1234template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1235void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1237 unsigned int ContextId) {
1238 for (
auto &
Edge : CallerEdges) {
1239 if (
Edge->Caller == Caller) {
1241 Edge->getContextIds().insert(ContextId);
1245 std::shared_ptr<ContextEdge>
Edge = std::make_shared<ContextEdge>(
1246 this, Caller, (uint8_t)
AllocType, DenseSet<uint32_t>({ContextId}));
1247 CallerEdges.push_back(
Edge);
1251template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1252void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::removeEdgeFromGraph(
1253 ContextEdge *
Edge, EdgeIter *EI,
bool CalleeIter) {
1269 auto CalleeCallerCount =
Callee->CallerEdges.size();
1270 auto CallerCalleeCount =
Caller->CalleeEdges.size();
1275 }
else if (CalleeIter) {
1277 *EI =
Caller->CalleeEdges.erase(*EI);
1280 *EI =
Callee->CallerEdges.erase(*EI);
1282 assert(
Callee->CallerEdges.size() < CalleeCallerCount);
1283 assert(
Caller->CalleeEdges.size() < CallerCalleeCount);
1286template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1287void CallsiteContextGraph<
1288 DerivedCCG, FuncTy, CallTy>::removeNoneTypeCalleeEdges(ContextNode *Node) {
1289 for (
auto EI =
Node->CalleeEdges.begin(); EI !=
Node->CalleeEdges.end();) {
1291 if (
Edge->AllocTypes == (uint8_t)AllocationType::None) {
1293 removeEdgeFromGraph(
Edge.get(), &EI,
true);
1299template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1300void CallsiteContextGraph<
1301 DerivedCCG, FuncTy, CallTy>::removeNoneTypeCallerEdges(ContextNode *Node) {
1302 for (
auto EI =
Node->CallerEdges.begin(); EI !=
Node->CallerEdges.end();) {
1304 if (
Edge->AllocTypes == (uint8_t)AllocationType::None) {
1306 Edge->Caller->eraseCalleeEdge(
Edge.get());
1307 EI =
Node->CallerEdges.erase(EI);
1313template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1314typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge *
1315CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1316 findEdgeFromCallee(
const ContextNode *Callee) {
1317 for (
const auto &
Edge : CalleeEdges)
1318 if (
Edge->Callee == Callee)
1323template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1324typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge *
1325CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1326 findEdgeFromCaller(
const ContextNode *Caller) {
1327 for (
const auto &
Edge : CallerEdges)
1328 if (
Edge->Caller == Caller)
1333template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1334void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1335 eraseCalleeEdge(
const ContextEdge *
Edge) {
1337 CalleeEdges, [
Edge](
const std::shared_ptr<ContextEdge> &CalleeEdge) {
1338 return CalleeEdge.get() ==
Edge;
1340 assert(EI != CalleeEdges.end());
1341 CalleeEdges.erase(EI);
1344template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1345void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1346 eraseCallerEdge(
const ContextEdge *
Edge) {
1348 CallerEdges, [
Edge](
const std::shared_ptr<ContextEdge> &CallerEdge) {
1349 return CallerEdge.get() ==
Edge;
1351 assert(EI != CallerEdges.end());
1352 CallerEdges.erase(EI);
1355template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1356uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::computeAllocType(
1357 DenseSet<uint32_t> &ContextIds)
const {
1359 (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
1360 uint8_t
AllocType = (uint8_t)AllocationType::None;
1361 for (
auto Id : ContextIds) {
1362 AllocType |= (uint8_t)ContextIdToAllocationType.at(Id);
1370template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1372CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypesImpl(
1373 const DenseSet<uint32_t> &Node1Ids,
1374 const DenseSet<uint32_t> &Node2Ids)
const {
1376 (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
1377 uint8_t
AllocType = (uint8_t)AllocationType::None;
1378 for (
auto Id : Node1Ids) {
1379 if (!Node2Ids.
count(Id))
1381 AllocType |= (uint8_t)ContextIdToAllocationType.at(Id);
1389template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1390uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypes(
1391 const DenseSet<uint32_t> &Node1Ids,
1392 const DenseSet<uint32_t> &Node2Ids)
const {
1393 if (Node1Ids.
size() < Node2Ids.
size())
1394 return intersectAllocTypesImpl(Node1Ids, Node2Ids);
1396 return intersectAllocTypesImpl(Node2Ids, Node1Ids);
1399template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1400typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1401CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addAllocNode(
1402 CallInfo
Call,
const FuncTy *
F) {
1404 ContextNode *AllocNode = createNewNode(
true,
F,
Call);
1405 AllocationCallToContextNodeMap[
Call] = AllocNode;
1407 AllocNode->OrigStackOrAllocId = LastContextId;
1410 AllocNode->AllocTypes = (uint8_t)AllocationType::None;
1426template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1427template <
class NodeT,
class IteratorT>
1428void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addStackNodesForMIB(
1429 ContextNode *AllocNode, CallStack<NodeT, IteratorT> &StackContext,
1432 std::map<uint64_t, uint32_t> &TotalSizeToContextIdTopNCold) {
1438 ContextIdToAllocationType[++LastContextId] =
AllocType;
1440 bool IsImportant =
false;
1441 if (!ContextSizeInfo.
empty()) {
1442 auto &
Entry = ContextIdToContextSizeInfos[LastContextId];
1446 uint64_t TotalCold = 0;
1447 for (
auto &CSI : ContextSizeInfo)
1448 TotalCold += CSI.TotalSize;
1454 TotalCold > TotalSizeToContextIdTopNCold.begin()->first) {
1457 auto IdToRemove = TotalSizeToContextIdTopNCold.begin()->second;
1458 TotalSizeToContextIdTopNCold.erase(
1459 TotalSizeToContextIdTopNCold.begin());
1460 assert(ImportantContextIdInfo.count(IdToRemove));
1461 ImportantContextIdInfo.erase(IdToRemove);
1463 TotalSizeToContextIdTopNCold[TotalCold] = LastContextId;
1467 Entry.insert(
Entry.begin(), ContextSizeInfo.begin(), ContextSizeInfo.end());
1471 AllocNode->AllocTypes |= (uint8_t)
AllocType;
1476 ContextNode *PrevNode = AllocNode;
1480 SmallSet<uint64_t, 8> StackIdSet;
1483 ContextIter != StackContext.
end(); ++ContextIter) {
1484 auto StackId = getStackId(*ContextIter);
1486 ImportantContextIdInfo[LastContextId].StackIds.push_back(StackId);
1487 ContextNode *StackNode = getNodeForStackId(StackId);
1489 StackNode = createNewNode(
false);
1490 StackEntryIdToContextNodeMap[StackId] = StackNode;
1491 StackNode->OrigStackOrAllocId = StackId;
1496 auto Ins = StackIdSet.
insert(StackId);
1498 StackNode->Recursive =
true;
1500 StackNode->AllocTypes |= (uint8_t)
AllocType;
1501 PrevNode->addOrUpdateCallerEdge(StackNode,
AllocType, LastContextId);
1502 PrevNode = StackNode;
1506template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1508CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::duplicateContextIds(
1509 const DenseSet<uint32_t> &StackSequenceContextIds,
1510 DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds) {
1511 DenseSet<uint32_t> NewContextIds;
1512 for (
auto OldId : StackSequenceContextIds) {
1513 NewContextIds.
insert(++LastContextId);
1514 OldToNewContextIds[OldId].insert(LastContextId);
1515 assert(ContextIdToAllocationType.count(OldId));
1517 ContextIdToAllocationType[LastContextId] = ContextIdToAllocationType[OldId];
1518 auto CSI = ContextIdToContextSizeInfos.find(OldId);
1519 if (CSI != ContextIdToContextSizeInfos.end())
1520 ContextIdToContextSizeInfos[LastContextId] = CSI->second;
1521 if (DotAllocContextIds.
contains(OldId))
1522 DotAllocContextIds.
insert(LastContextId);
1524 return NewContextIds;
1527template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1528void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
1529 propagateDuplicateContextIds(
1530 const DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds) {
1532 auto GetNewIds = [&OldToNewContextIds](
const DenseSet<uint32_t> &ContextIds) {
1533 DenseSet<uint32_t> NewIds;
1534 for (
auto Id : ContextIds)
1535 if (
auto NewId = OldToNewContextIds.find(Id);
1536 NewId != OldToNewContextIds.end())
1542 auto UpdateCallers = [&](ContextNode *
Node,
1543 DenseSet<const ContextEdge *> &Visited,
1544 auto &&UpdateCallers) ->
void {
1545 for (
const auto &
Edge :
Node->CallerEdges) {
1549 ContextNode *NextNode =
Edge->Caller;
1550 DenseSet<uint32_t> NewIdsToAdd = GetNewIds(
Edge->getContextIds());
1553 if (!NewIdsToAdd.
empty()) {
1554 Edge->getContextIds().insert_range(NewIdsToAdd);
1555 UpdateCallers(NextNode, Visited, UpdateCallers);
1560 DenseSet<const ContextEdge *> Visited;
1561 for (
auto &Entry : AllocationCallToContextNodeMap) {
1563 UpdateCallers(Node, Visited, UpdateCallers);
1567template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1568void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::connectNewNode(
1569 ContextNode *NewNode, ContextNode *OrigNode,
bool TowardsCallee,
1572 DenseSet<uint32_t> RemainingContextIds) {
1574 TowardsCallee ? OrigNode->CalleeEdges : OrigNode->CallerEdges;
1575 DenseSet<uint32_t> RecursiveContextIds;
1576 DenseSet<uint32_t> AllCallerContextIds;
1581 for (
auto &CE : OrigEdges) {
1582 AllCallerContextIds.
reserve(
CE->getContextIds().size());
1583 for (
auto Id :
CE->getContextIds())
1584 if (!AllCallerContextIds.
insert(Id).second)
1585 RecursiveContextIds.
insert(Id);
1589 for (
auto EI = OrigEdges.begin(); EI != OrigEdges.end();) {
1591 DenseSet<uint32_t> NewEdgeContextIds;
1592 DenseSet<uint32_t> NotFoundContextIds;
1596 set_subtract(
Edge->getContextIds(), RemainingContextIds, NewEdgeContextIds,
1597 NotFoundContextIds);
1600 if (RecursiveContextIds.
empty()) {
1603 RemainingContextIds.
swap(NotFoundContextIds);
1613 DenseSet<uint32_t> NonRecursiveRemainingCurEdgeIds =
1615 set_subtract(RemainingContextIds, NonRecursiveRemainingCurEdgeIds);
1618 if (NewEdgeContextIds.
empty()) {
1622 if (TowardsCallee) {
1623 uint8_t NewAllocType = computeAllocType(NewEdgeContextIds);
1624 auto NewEdge = std::make_shared<ContextEdge>(
1625 Edge->Callee, NewNode, NewAllocType, std::move(NewEdgeContextIds));
1626 NewNode->CalleeEdges.push_back(NewEdge);
1627 NewEdge->Callee->CallerEdges.push_back(NewEdge);
1629 uint8_t NewAllocType = computeAllocType(NewEdgeContextIds);
1630 auto NewEdge = std::make_shared<ContextEdge>(
1631 NewNode,
Edge->Caller, NewAllocType, std::move(NewEdgeContextIds));
1632 NewNode->CallerEdges.push_back(NewEdge);
1633 NewEdge->Caller->CalleeEdges.push_back(NewEdge);
1636 if (
Edge->getContextIds().empty()) {
1637 removeEdgeFromGraph(
Edge.get(), &EI, TowardsCallee);
1644template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1646 const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &Edge) {
1650 assert(!Edge->ContextIds.empty());
1653template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1655 bool CheckEdges =
true) {
1656 if (
Node->isRemoved())
1660 auto NodeContextIds =
Node->getContextIds();
1664 if (
Node->CallerEdges.size()) {
1666 Node->CallerEdges.front()->ContextIds);
1670 set_union(CallerEdgeContextIds, Edge->ContextIds);
1677 NodeContextIds == CallerEdgeContextIds ||
1680 if (
Node->CalleeEdges.size()) {
1682 Node->CalleeEdges.front()->ContextIds);
1686 set_union(CalleeEdgeContextIds, Edge->getContextIds());
1692 NodeContextIds == CalleeEdgeContextIds);
1701 for (
const auto &
E :
Node->CalleeEdges)
1707template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1708void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
1709 assignStackNodesPostOrder(ContextNode *Node,
1710 DenseSet<const ContextNode *> &Visited,
1711 DenseMap<uint64_t, std::vector<CallContextInfo>>
1712 &StackIdToMatchingCalls,
1713 DenseMap<CallInfo, CallInfo> &CallToMatchingCall,
1714 const DenseSet<uint32_t> &ImportantContextIds) {
1722 auto CallerEdges =
Node->CallerEdges;
1723 for (
auto &
Edge : CallerEdges) {
1725 if (
Edge->isRemoved()) {
1729 assignStackNodesPostOrder(
Edge->Caller, Visited, StackIdToMatchingCalls,
1730 CallToMatchingCall, ImportantContextIds);
1739 if (
Node->IsAllocation ||
1740 !StackIdToMatchingCalls.count(
Node->OrigStackOrAllocId))
1743 auto &Calls = StackIdToMatchingCalls[
Node->OrigStackOrAllocId];
1747 if (Calls.size() == 1) {
1748 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[0];
1749 if (Ids.size() == 1) {
1750 assert(SavedContextIds.empty());
1752 assert(Node == getNodeForStackId(Ids[0]));
1753 if (
Node->Recursive)
1756 NonAllocationCallToContextNodeMap[
Call] =
Node;
1758 recordStackNode(Ids, Node,
Node->getContextIds(), ImportantContextIds);
1766 uint64_t LastId =
Node->OrigStackOrAllocId;
1767 ContextNode *LastNode = getNodeForStackId(LastId);
1770 assert(LastNode == Node);
1772 ContextNode *LastNode =
Node;
1777 DenseSet<uint32_t> LastNodeContextIds = LastNode->getContextIds();
1779 [[maybe_unused]]
bool PrevIterCreatedNode =
false;
1780 bool CreatedNode =
false;
1781 for (
unsigned I = 0;
I < Calls.size();
1782 I++, PrevIterCreatedNode = CreatedNode) {
1783 CreatedNode =
false;
1784 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[
I];
1787 if (SavedContextIds.empty()) {
1794 auto MatchingCall = CallToMatchingCall[
Call];
1795 if (!NonAllocationCallToContextNodeMap.contains(MatchingCall)) {
1799 assert(
I > 0 && !PrevIterCreatedNode);
1802 NonAllocationCallToContextNodeMap[MatchingCall]->MatchingCalls.push_back(
1807 assert(LastId == Ids.back());
1816 ContextNode *PrevNode = LastNode;
1820 for (
auto IdIter = Ids.rbegin() + 1; IdIter != Ids.rend(); IdIter++) {
1822 ContextNode *CurNode = getNodeForStackId(Id);
1826 assert(!CurNode->Recursive);
1828 auto *
Edge = CurNode->findEdgeFromCaller(PrevNode);
1840 if (SavedContextIds.empty()) {
1849 ContextNode *NewNode = createNewNode(
false, Func,
Call);
1850 NonAllocationCallToContextNodeMap[
Call] = NewNode;
1852 NewNode->AllocTypes = computeAllocType(SavedContextIds);
1854 ContextNode *FirstNode = getNodeForStackId(Ids[0]);
1860 connectNewNode(NewNode, FirstNode,
true, SavedContextIds);
1865 connectNewNode(NewNode, LastNode,
false, SavedContextIds);
1870 for (
auto Id : Ids) {
1871 ContextNode *CurNode = getNodeForStackId(Id);
1878 auto *PrevEdge = CurNode->findEdgeFromCallee(PrevNode);
1885 set_subtract(PrevEdge->getContextIds(), SavedContextIds);
1886 if (PrevEdge->getContextIds().empty())
1887 removeEdgeFromGraph(PrevEdge);
1892 CurNode->AllocTypes = CurNode->CalleeEdges.empty()
1893 ? (uint8_t)AllocationType::None
1894 : CurNode->computeAllocType();
1898 recordStackNode(Ids, NewNode, SavedContextIds, ImportantContextIds);
1902 for (
auto Id : Ids) {
1903 ContextNode *CurNode = getNodeForStackId(Id);
1912template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1913void CallsiteContextGraph<DerivedCCG, FuncTy,
1914 CallTy>::fixupImportantContexts() {
1915 if (ImportantContextIdInfo.empty())
1919 NumImportantContextIds = ImportantContextIdInfo.size();
1925 exportToDot(
"beforestackfixup");
1950 for (
auto &[CurContextId, Info] : ImportantContextIdInfo) {
1951 if (
Info.StackIdsToNode.empty())
1954 ContextNode *PrevNode =
nullptr;
1955 ContextNode *CurNode =
nullptr;
1956 DenseSet<const ContextEdge *> VisitedEdges;
1957 ArrayRef<uint64_t> AllStackIds(
Info.StackIds);
1960 for (
unsigned I = 0;
I < AllStackIds.size();
I++, PrevNode = CurNode) {
1964 auto LenToEnd = AllStackIds.size() -
I;
1972 auto CheckStackIds = AllStackIds.slice(
I, Len);
1973 auto EntryIt =
Info.StackIdsToNode.find(CheckStackIds);
1974 if (EntryIt ==
Info.StackIdsToNode.end())
1976 CurNode = EntryIt->second;
1993 auto *CurEdge = PrevNode->findEdgeFromCaller(CurNode);
1996 if (CurEdge->getContextIds().insert(CurContextId).second) {
1997 NumFixupEdgeIdsInserted++;
2002 NumFixupEdgesAdded++;
2003 DenseSet<uint32_t> ContextIds({CurContextId});
2004 auto AllocType = computeAllocType(ContextIds);
2005 auto NewEdge = std::make_shared<ContextEdge>(
2006 PrevNode, CurNode,
AllocType, std::move(ContextIds));
2007 PrevNode->CallerEdges.push_back(NewEdge);
2008 CurNode->CalleeEdges.push_back(NewEdge);
2010 CurEdge = NewEdge.get();
2013 VisitedEdges.
insert(CurEdge);
2016 for (
auto &
Edge : PrevNode->CallerEdges) {
2020 Edge->getContextIds().erase(CurContextId);
2028template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2029void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
2037 DenseMap<uint64_t, std::vector<CallContextInfo>> StackIdToMatchingCalls;
2038 for (
auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) {
2039 for (
auto &
Call : CallsWithMetadata) {
2041 if (AllocationCallToContextNodeMap.count(
Call))
2043 auto StackIdsWithContextNodes =
2044 getStackIdsWithContextNodesForCall(
Call.call());
2047 if (StackIdsWithContextNodes.empty())
2051 StackIdToMatchingCalls[StackIdsWithContextNodes.back()].push_back(
2052 {
Call.call(), StackIdsWithContextNodes,
Func, {}});
2062 DenseMap<uint32_t, DenseSet<uint32_t>> OldToNewContextIds;
2066 DenseMap<CallInfo, CallInfo> CallToMatchingCall;
2067 for (
auto &It : StackIdToMatchingCalls) {
2068 auto &Calls = It.getSecond();
2070 if (Calls.size() == 1) {
2071 auto &Ids = Calls[0].StackIds;
2072 if (Ids.size() == 1)
2085 DenseMap<const FuncTy *, unsigned> FuncToIndex;
2086 for (
const auto &[Idx, CallCtxInfo] :
enumerate(Calls))
2087 FuncToIndex.
insert({CallCtxInfo.Func, Idx});
2090 [&FuncToIndex](
const CallContextInfo &
A,
const CallContextInfo &
B) {
2091 return A.StackIds.size() >
B.StackIds.size() ||
2092 (
A.StackIds.size() ==
B.StackIds.size() &&
2093 (
A.StackIds <
B.StackIds ||
2094 (
A.StackIds ==
B.StackIds &&
2095 FuncToIndex[
A.Func] < FuncToIndex[
B.Func])));
2101 uint64_t LastId = It.getFirst();
2102 ContextNode *LastNode = getNodeForStackId(LastId);
2106 if (LastNode->Recursive)
2111 DenseSet<uint32_t> LastNodeContextIds = LastNode->getContextIds();
2119 DenseSet<const FuncTy *> MatchingIdsFuncSet;
2122 for (
unsigned I = 0;
I < Calls.size();
I++) {
2123 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[
I];
2124 assert(SavedContextIds.empty());
2125 assert(LastId == Ids.back());
2130 if (
I > 0 && Ids != Calls[
I - 1].StackIds)
2131 MatchingIdsFuncSet.
clear();
2138 DenseSet<uint32_t> StackSequenceContextIds = LastNodeContextIds;
2140 ContextNode *PrevNode = LastNode;
2141 ContextNode *CurNode = LastNode;
2146 for (
auto IdIter = Ids.rbegin() + 1; IdIter != Ids.rend(); IdIter++) {
2148 CurNode = getNodeForStackId(Id);
2152 if (CurNode->Recursive) {
2157 auto *
Edge = CurNode->findEdgeFromCaller(PrevNode);
2178 if (StackSequenceContextIds.
empty()) {
2191 if (Ids.back() != getLastStackId(
Call)) {
2192 for (
const auto &PE : LastNode->CallerEdges) {
2193 set_subtract(StackSequenceContextIds, PE->getContextIds());
2194 if (StackSequenceContextIds.
empty())
2198 if (StackSequenceContextIds.
empty())
2210 MatchingIdsFuncSet.
insert(Func);
2217 bool DuplicateContextIds =
false;
2218 for (
unsigned J =
I + 1; J < Calls.size(); J++) {
2219 auto &CallCtxInfo = Calls[J];
2220 auto &NextIds = CallCtxInfo.StackIds;
2223 auto *NextFunc = CallCtxInfo.Func;
2224 if (NextFunc != Func) {
2227 DuplicateContextIds =
true;
2230 auto &NextCall = CallCtxInfo.Call;
2231 CallToMatchingCall[NextCall] =
Call;
2242 OldToNewContextIds.
reserve(OldToNewContextIds.
size() +
2243 StackSequenceContextIds.
size());
2246 ? duplicateContextIds(StackSequenceContextIds, OldToNewContextIds)
2247 : StackSequenceContextIds;
2248 assert(!SavedContextIds.empty());
2250 if (!DuplicateContextIds) {
2254 set_subtract(LastNodeContextIds, StackSequenceContextIds);
2255 if (LastNodeContextIds.
empty())
2262 propagateDuplicateContextIds(OldToNewContextIds);
2272 DenseSet<const ContextNode *> Visited;
2274 ImportantContextIdInfo.keys());
2275 for (
auto &Entry : AllocationCallToContextNodeMap)
2276 assignStackNodesPostOrder(
Entry.second, Visited, StackIdToMatchingCalls,
2277 CallToMatchingCall, ImportantContextIds);
2279 fixupImportantContexts();
2285uint64_t ModuleCallsiteContextGraph::getLastStackId(Instruction *
Call) {
2286 CallStack<MDNode, MDNode::op_iterator> CallsiteContext(
2288 return CallsiteContext.
back();
2291uint64_t IndexCallsiteContextGraph::getLastStackId(IndexCall &
Call) {
2293 CallStack<CallsiteInfo, SmallVector<unsigned>::const_iterator>
2296 return Index.getStackIdAtIndex(CallsiteContext.
back());
2318 auto Pos =
F.getName().find_last_of(
'.');
2321 bool Err =
F.getName().drop_front(Pos + 1).getAsInteger(10, CloneNo);
2327std::string ModuleCallsiteContextGraph::getLabel(
const Function *Func,
2328 const Instruction *
Call,
2329 unsigned CloneNo)
const {
2335std::string IndexCallsiteContextGraph::getLabel(
const FunctionSummary *Func,
2336 const IndexCall &
Call,
2337 unsigned CloneNo)
const {
2338 auto VI = FSToVIMap.find(Func);
2339 assert(VI != FSToVIMap.end());
2342 return CallerName +
" -> alloc";
2345 return CallerName +
" -> " +
2347 Callsite->Clones[CloneNo]);
2351std::vector<uint64_t>
2352ModuleCallsiteContextGraph::getStackIdsWithContextNodesForCall(
2353 Instruction *
Call) {
2354 CallStack<MDNode, MDNode::op_iterator> CallsiteContext(
2356 return getStackIdsWithContextNodes<MDNode, MDNode::op_iterator>(
2360std::vector<uint64_t>
2361IndexCallsiteContextGraph::getStackIdsWithContextNodesForCall(IndexCall &
Call) {
2363 CallStack<CallsiteInfo, SmallVector<unsigned>::const_iterator>
2365 return getStackIdsWithContextNodes<CallsiteInfo,
2366 SmallVector<unsigned>::const_iterator>(
2370template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2371template <
class NodeT,
class IteratorT>
2372std::vector<uint64_t>
2373CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getStackIdsWithContextNodes(
2374 CallStack<NodeT, IteratorT> &CallsiteContext) {
2375 std::vector<uint64_t> StackIds;
2376 for (
auto IdOrIndex : CallsiteContext) {
2377 auto StackId = getStackId(IdOrIndex);
2378 ContextNode *
Node = getNodeForStackId(StackId);
2381 StackIds.push_back(StackId);
2386ModuleCallsiteContextGraph::ModuleCallsiteContextGraph(
2388 llvm::function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter)
2389 :
Mod(
M), OREGetter(OREGetter) {
2393 std::map<uint64_t, uint32_t> TotalSizeToContextIdTopNCold;
2395 std::vector<CallInfo> CallsWithMetadata;
2396 for (
auto &BB :
F) {
2397 for (
auto &
I : BB) {
2400 if (
auto *MemProfMD =
I.getMetadata(LLVMContext::MD_memprof)) {
2401 CallsWithMetadata.push_back(&
I);
2402 auto *AllocNode = addAllocNode(&
I, &
F);
2403 auto *CallsiteMD =
I.getMetadata(LLVMContext::MD_callsite);
2407 for (
auto &MDOp : MemProfMD->operands()) {
2409 std::vector<ContextTotalSize> ContextSizeInfo;
2411 if (MIBMD->getNumOperands() > 2) {
2412 for (
unsigned I = 2;
I < MIBMD->getNumOperands();
I++) {
2413 MDNode *ContextSizePair =
2422 ContextSizeInfo.push_back({FullStackId, TotalSize});
2428 addStackNodesForMIB<MDNode, MDNode::op_iterator>(
2429 AllocNode, StackContext, CallsiteContext,
2431 TotalSizeToContextIdTopNCold);
2436 DotAllocContextIds = AllocNode->getContextIds();
2440 I.setMetadata(LLVMContext::MD_memprof,
nullptr);
2441 I.setMetadata(LLVMContext::MD_callsite,
nullptr);
2444 else if (
I.getMetadata(LLVMContext::MD_callsite)) {
2445 CallsWithMetadata.push_back(&
I);
2449 if (!CallsWithMetadata.empty())
2450 FuncToCallsWithMetadata[&
F] = CallsWithMetadata;
2454 dbgs() <<
"CCG before updating call stack chains:\n";
2459 exportToDot(
"prestackupdate");
2464 exportToDot(
"poststackupdate");
2466 handleCallsitesWithMultipleTargets();
2471 for (
auto &FuncEntry : FuncToCallsWithMetadata)
2472 for (
auto &
Call : FuncEntry.second)
2473 Call.call()->setMetadata(LLVMContext::MD_callsite,
nullptr);
2479IndexCallsiteContextGraph::findAliaseeGUIDsPrevailingInDifferentModule() {
2480 DenseSet<GlobalValue::GUID> AliaseeGUIDs;
2481 for (
auto &
I : Index) {
2483 for (
auto &S :
VI.getSummaryList()) {
2488 auto *AliaseeSummary = &AS->getAliasee();
2496 !isPrevailing(
VI.getGUID(), S.get()))
2501 auto AliaseeGUID = AS->getAliaseeGUID();
2503 if (!isPrevailing(AliaseeGUID, AliaseeSummary))
2504 AliaseeGUIDs.
insert(AliaseeGUID);
2507 AliaseesPrevailingInDiffModuleFromAlias += AliaseeGUIDs.
size();
2508 return AliaseeGUIDs;
2511IndexCallsiteContextGraph::IndexCallsiteContextGraph(
2512 ModuleSummaryIndex &Index,
2522 findAliaseeGUIDsPrevailingInDifferentModule();
2526 std::map<uint64_t, uint32_t> TotalSizeToContextIdTopNCold;
2531 for (
const auto &
I : Index.sortedGlobalValueSummariesRange()) {
2532 auto VI = Index.getValueInfo(
I);
2533 if (GUIDsToSkip.
contains(VI.getGUID()))
2535 for (
auto &S : VI.getSummaryList()) {
2544 !isPrevailing(VI.getGUID(), S.get()))
2549 std::vector<CallInfo> CallsWithMetadata;
2550 if (!
FS->allocs().empty()) {
2551 for (
auto &AN :
FS->mutableAllocs()) {
2556 if (AN.MIBs.empty())
2558 IndexCall AllocCall(&AN);
2559 CallsWithMetadata.push_back(AllocCall);
2560 auto *AllocNode = addAllocNode(AllocCall, FS);
2568 AN.ContextSizeInfos.size() == AN.MIBs.size());
2570 for (
auto &MIB : AN.MIBs) {
2573 std::vector<ContextTotalSize> ContextSizeInfo;
2574 if (!AN.ContextSizeInfos.empty()) {
2575 for (
auto [FullStackId, TotalSize] : AN.ContextSizeInfos[
I])
2576 ContextSizeInfo.push_back({FullStackId, TotalSize});
2578 addStackNodesForMIB<MIBInfo, SmallVector<unsigned>::const_iterator>(
2579 AllocNode, StackContext, EmptyContext, MIB.AllocType,
2580 ContextSizeInfo, TotalSizeToContextIdTopNCold);
2586 DotAllocContextIds = AllocNode->getContextIds();
2592 AN.Versions[0] = (
uint8_t)allocTypeToUse(AllocNode->AllocTypes);
2596 if (!
FS->callsites().empty())
2597 for (
auto &SN :
FS->mutableCallsites()) {
2598 IndexCall StackNodeCall(&SN);
2599 CallsWithMetadata.push_back(StackNodeCall);
2602 if (!CallsWithMetadata.empty())
2603 FuncToCallsWithMetadata[
FS] = CallsWithMetadata;
2605 if (!
FS->allocs().empty() || !
FS->callsites().empty())
2611 dbgs() <<
"CCG before updating call stack chains:\n";
2616 exportToDot(
"prestackupdate");
2621 exportToDot(
"poststackupdate");
2623 handleCallsitesWithMultipleTargets();
2628template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2629void CallsiteContextGraph<DerivedCCG, FuncTy,
2630 CallTy>::handleCallsitesWithMultipleTargets() {
2645 std::vector<std::pair<CallInfo, ContextNode *>> NewCallToNode;
2646 for (
auto &Entry : NonAllocationCallToContextNodeMap) {
2647 auto *
Node = Entry.second;
2656 std::vector<CallInfo> AllCalls;
2657 AllCalls.reserve(
Node->MatchingCalls.size() + 1);
2658 AllCalls.push_back(
Node->Call);
2672 if (partitionCallsByCallee(
Node, AllCalls, NewCallToNode))
2675 auto It = AllCalls.begin();
2677 for (; It != AllCalls.end(); ++It) {
2680 for (
auto EI =
Node->CalleeEdges.begin(); EI !=
Node->CalleeEdges.end();
2683 if (!Edge->Callee->hasCall())
2685 assert(NodeToCallingFunc.count(Edge->Callee));
2687 if (!calleesMatch(
ThisCall.call(), EI, TailCallToContextNodeMap)) {
2696 if (
Node->Call != ThisCall) {
2697 Node->setCall(ThisCall);
2708 Node->MatchingCalls.clear();
2711 if (It == AllCalls.end()) {
2712 RemovedEdgesWithMismatchedCallees++;
2716 Node->setCall(CallInfo());
2721 for (++It; It != AllCalls.end(); ++It) {
2725 Node->MatchingCalls.push_back(ThisCall);
2734 NonAllocationCallToContextNodeMap.remove_if([](
const auto &it) {
2735 return !it.second->hasCall() || it.second->Call != it.first;
2739 for (
auto &[
Call,
Node] : NewCallToNode)
2740 NonAllocationCallToContextNodeMap[
Call] =
Node;
2744 for (
auto &[
Call,
Node] : TailCallToContextNodeMap)
2745 NonAllocationCallToContextNodeMap[
Call] =
Node;
2748template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2749bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::partitionCallsByCallee(
2751 std::vector<std::pair<CallInfo, ContextNode *>> &NewCallToNode) {
2755 struct CallsWithSameCallee {
2756 std::vector<CallInfo> Calls;
2757 ContextNode *
Node =
nullptr;
2763 for (
auto ThisCall : AllCalls) {
2764 auto *
F = getCalleeFunc(
ThisCall.call());
2766 CalleeFuncToCallInfo[
F].Calls.push_back(ThisCall);
2775 for (
const auto &Edge :
Node->CalleeEdges) {
2776 if (!Edge->Callee->hasCall())
2778 const FuncTy *ProfiledCalleeFunc = NodeToCallingFunc[Edge->Callee];
2779 if (CalleeFuncToCallInfo.
contains(ProfiledCalleeFunc))
2780 CalleeNodeToCallInfo[Edge->Callee] =
2781 &CalleeFuncToCallInfo[ProfiledCalleeFunc];
2787 if (CalleeNodeToCallInfo.
empty())
2799 ContextNode *UnmatchedCalleesNode =
nullptr;
2801 bool UsedOrigNode =
false;
2806 auto CalleeEdges =
Node->CalleeEdges;
2807 for (
auto &Edge : CalleeEdges) {
2808 if (!Edge->Callee->hasCall())
2813 ContextNode *CallerNodeToUse =
nullptr;
2817 if (!CalleeNodeToCallInfo.
contains(Edge->Callee)) {
2818 if (!UnmatchedCalleesNode)
2819 UnmatchedCalleesNode =
2820 createNewNode(
false, NodeToCallingFunc[
Node]);
2821 CallerNodeToUse = UnmatchedCalleesNode;
2825 auto *Info = CalleeNodeToCallInfo[Edge->Callee];
2828 if (!UsedOrigNode) {
2831 Node->MatchingCalls.clear();
2832 UsedOrigNode =
true;
2835 createNewNode(
false, NodeToCallingFunc[
Node]);
2836 assert(!Info->Calls.empty());
2839 Info->Node->setCall(Info->Calls.front());
2845 NewCallToNode.push_back({Info->Node->Call, Info->Node});
2847 CallerNodeToUse = Info->Node;
2851 if (CallerNodeToUse ==
Node)
2854 moveCalleeEdgeToNewCaller(Edge, CallerNodeToUse);
2861 for (
auto &
I : CalleeNodeToCallInfo)
2862 removeNoneTypeCallerEdges(
I.second->Node);
2863 if (UnmatchedCalleesNode)
2864 removeNoneTypeCallerEdges(UnmatchedCalleesNode);
2865 removeNoneTypeCallerEdges(
Node);
2875uint64_t IndexCallsiteContextGraph::getStackId(uint64_t IdOrIndex)
const {
2878 return Index.getStackIdAtIndex(IdOrIndex);
2881template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2882bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::calleesMatch(
2883 CallTy
Call, EdgeIter &EI,
2884 MapVector<CallInfo, ContextNode *> &TailCallToContextNodeMap) {
2886 const FuncTy *ProfiledCalleeFunc = NodeToCallingFunc[
Edge->Callee];
2887 const FuncTy *CallerFunc = NodeToCallingFunc[
Edge->Caller];
2890 std::vector<std::pair<CallTy, FuncTy *>> FoundCalleeChain;
2891 if (!calleeMatchesFunc(
Call, ProfiledCalleeFunc, CallerFunc,
2896 if (FoundCalleeChain.empty())
2900 auto *CurEdge =
Callee->findEdgeFromCaller(Caller);
2904 CurEdge->ContextIds.insert_range(
Edge->ContextIds);
2905 CurEdge->AllocTypes |=
Edge->AllocTypes;
2910 auto NewEdge = std::make_shared<ContextEdge>(
2911 Callee, Caller,
Edge->AllocTypes,
Edge->ContextIds);
2912 Callee->CallerEdges.push_back(NewEdge);
2913 if (Caller ==
Edge->Caller) {
2917 EI =
Caller->CalleeEdges.insert(EI, NewEdge);
2920 "Iterator position not restored after insert and increment");
2922 Caller->CalleeEdges.push_back(NewEdge);
2927 auto *CurCalleeNode =
Edge->Callee;
2928 for (
auto &[NewCall, Func] : FoundCalleeChain) {
2929 ContextNode *NewNode =
nullptr;
2931 if (TailCallToContextNodeMap.
count(NewCall)) {
2932 NewNode = TailCallToContextNodeMap[NewCall];
2933 NewNode->AllocTypes |=
Edge->AllocTypes;
2935 FuncToCallsWithMetadata[
Func].push_back({NewCall});
2937 NewNode = createNewNode(
false, Func, NewCall);
2938 TailCallToContextNodeMap[NewCall] = NewNode;
2939 NewNode->AllocTypes =
Edge->AllocTypes;
2943 AddEdge(NewNode, CurCalleeNode);
2945 CurCalleeNode = NewNode;
2949 AddEdge(
Edge->Caller, CurCalleeNode);
2957 removeEdgeFromGraph(
Edge.get(), &EI,
true);
2969bool ModuleCallsiteContextGraph::findProfiledCalleeThroughTailCalls(
2970 const Function *ProfiledCallee,
Value *CurCallee,
unsigned Depth,
2971 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain,
2972 bool &FoundMultipleCalleeChains) {
2979 FoundCalleeChain.push_back({Callsite,
F});
2994 bool FoundSingleCalleeChain =
false;
2995 for (
auto &BB : *CalleeFunc) {
2996 for (
auto &
I : BB) {
2998 if (!CB || !CB->isTailCall())
3000 auto *CalledValue = CB->getCalledOperand();
3001 auto *CalledFunction = CB->getCalledFunction();
3002 if (CalledValue && !CalledFunction) {
3003 CalledValue = CalledValue->stripPointerCasts();
3010 assert(!CalledFunction &&
3011 "Expected null called function in callsite for alias");
3014 if (!CalledFunction)
3016 if (CalledFunction == ProfiledCallee) {
3017 if (FoundSingleCalleeChain) {
3018 FoundMultipleCalleeChains =
true;
3021 FoundSingleCalleeChain =
true;
3022 FoundProfiledCalleeCount++;
3023 FoundProfiledCalleeDepth +=
Depth;
3024 if (
Depth > FoundProfiledCalleeMaxDepth)
3025 FoundProfiledCalleeMaxDepth =
Depth;
3026 SaveCallsiteInfo(&
I, CalleeFunc);
3027 }
else if (findProfiledCalleeThroughTailCalls(
3028 ProfiledCallee, CalledFunction,
Depth + 1,
3029 FoundCalleeChain, FoundMultipleCalleeChains)) {
3032 assert(!FoundMultipleCalleeChains);
3033 if (FoundSingleCalleeChain) {
3034 FoundMultipleCalleeChains =
true;
3037 FoundSingleCalleeChain =
true;
3038 SaveCallsiteInfo(&
I, CalleeFunc);
3039 }
else if (FoundMultipleCalleeChains)
3044 return FoundSingleCalleeChain;
3047const Function *ModuleCallsiteContextGraph::getCalleeFunc(Instruction *
Call) {
3049 if (!CB->getCalledOperand() || CB->isIndirectCall())
3051 auto *CalleeVal = CB->getCalledOperand()->stripPointerCasts();
3058bool ModuleCallsiteContextGraph::calleeMatchesFunc(
3059 Instruction *
Call,
const Function *Func,
const Function *CallerFunc,
3060 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain) {
3062 if (!CB->getCalledOperand() || CB->isIndirectCall())
3064 auto *CalleeVal = CB->getCalledOperand()->stripPointerCasts();
3066 if (CalleeFunc == Func)
3069 if (Alias && Alias->getAliasee() == Func)
3080 bool FoundMultipleCalleeChains =
false;
3081 if (!findProfiledCalleeThroughTailCalls(Func, CalleeVal,
Depth,
3083 FoundMultipleCalleeChains)) {
3084 LLVM_DEBUG(
dbgs() <<
"Not found through unique tail call chain: "
3085 <<
Func->getName() <<
" from " << CallerFunc->
getName()
3086 <<
" that actually called " << CalleeVal->getName()
3087 << (FoundMultipleCalleeChains
3088 ?
" (found multiple possible chains)"
3091 if (FoundMultipleCalleeChains)
3092 FoundProfiledCalleeNonUniquelyCount++;
3099bool ModuleCallsiteContextGraph::sameCallee(Instruction *Call1,
3100 Instruction *Call2) {
3102 if (!CB1->getCalledOperand() || CB1->isIndirectCall())
3104 auto *CalleeVal1 = CB1->getCalledOperand()->stripPointerCasts();
3107 if (!CB2->getCalledOperand() || CB2->isIndirectCall())
3109 auto *CalleeVal2 = CB2->getCalledOperand()->stripPointerCasts();
3111 return CalleeFunc1 == CalleeFunc2;
3114bool IndexCallsiteContextGraph::findProfiledCalleeThroughTailCalls(
3115 ValueInfo ProfiledCallee, ValueInfo CurCallee,
unsigned Depth,
3116 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain,
3117 bool &FoundMultipleCalleeChains) {
3123 auto CreateAndSaveCallsiteInfo = [&](ValueInfo
Callee, FunctionSummary *
FS) {
3126 if (!FunctionCalleesToSynthesizedCallsiteInfos.count(FS) ||
3127 !FunctionCalleesToSynthesizedCallsiteInfos[FS].count(Callee))
3130 FunctionCalleesToSynthesizedCallsiteInfos[
FS][
Callee] =
3131 std::make_unique<CallsiteInfo>(Callee, SmallVector<unsigned>());
3132 CallsiteInfo *NewCallsiteInfo =
3133 FunctionCalleesToSynthesizedCallsiteInfos[
FS][
Callee].get();
3134 FoundCalleeChain.push_back({NewCallsiteInfo,
FS});
3141 bool FoundSingleCalleeChain =
false;
3144 !isPrevailing(CurCallee.
getGUID(), S.get()))
3149 auto FSVI = CurCallee;
3152 FSVI = AS->getAliaseeVI();
3153 for (
auto &CallEdge :
FS->calls()) {
3154 if (!CallEdge.second.hasTailCall())
3156 if (CallEdge.first == ProfiledCallee) {
3157 if (FoundSingleCalleeChain) {
3158 FoundMultipleCalleeChains =
true;
3161 FoundSingleCalleeChain =
true;
3162 FoundProfiledCalleeCount++;
3163 FoundProfiledCalleeDepth +=
Depth;
3164 if (
Depth > FoundProfiledCalleeMaxDepth)
3165 FoundProfiledCalleeMaxDepth =
Depth;
3166 CreateAndSaveCallsiteInfo(CallEdge.first, FS);
3168 assert(!FSToVIMap.count(FS) || FSToVIMap[FS] == FSVI);
3169 FSToVIMap[
FS] = FSVI;
3170 }
else if (findProfiledCalleeThroughTailCalls(
3171 ProfiledCallee, CallEdge.first,
Depth + 1,
3172 FoundCalleeChain, FoundMultipleCalleeChains)) {
3175 assert(!FoundMultipleCalleeChains);
3176 if (FoundSingleCalleeChain) {
3177 FoundMultipleCalleeChains =
true;
3180 FoundSingleCalleeChain =
true;
3181 CreateAndSaveCallsiteInfo(CallEdge.first, FS);
3183 assert(!FSToVIMap.count(FS) || FSToVIMap[FS] == FSVI);
3184 FSToVIMap[
FS] = FSVI;
3185 }
else if (FoundMultipleCalleeChains)
3190 return FoundSingleCalleeChain;
3193const FunctionSummary *
3194IndexCallsiteContextGraph::getCalleeFunc(IndexCall &
Call) {
3196 if (
Callee.getSummaryList().empty())
3201bool IndexCallsiteContextGraph::calleeMatchesFunc(
3202 IndexCall &
Call,
const FunctionSummary *Func,
3203 const FunctionSummary *CallerFunc,
3204 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain) {
3208 AliasSummary *Alias =
3209 Callee.getSummaryList().empty()
3212 assert(FSToVIMap.count(Func));
3213 auto FuncVI = FSToVIMap[
Func];
3214 if (Callee == FuncVI ||
3229 bool FoundMultipleCalleeChains =
false;
3230 if (!findProfiledCalleeThroughTailCalls(
3231 FuncVI, Callee,
Depth, FoundCalleeChain, FoundMultipleCalleeChains)) {
3232 LLVM_DEBUG(
dbgs() <<
"Not found through unique tail call chain: " << FuncVI
3233 <<
" from " << FSToVIMap[CallerFunc]
3234 <<
" that actually called " << Callee
3235 << (FoundMultipleCalleeChains
3236 ?
" (found multiple possible chains)"
3239 if (FoundMultipleCalleeChains)
3240 FoundProfiledCalleeNonUniquelyCount++;
3247bool IndexCallsiteContextGraph::sameCallee(IndexCall &Call1, IndexCall &Call2) {
3250 return Callee1 == Callee2;
3253template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3254void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::dump()
3260template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3261void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::print(
3262 raw_ostream &OS)
const {
3263 OS <<
"Node " <<
this <<
"\n";
3267 OS <<
" (recursive)";
3269 if (!MatchingCalls.empty()) {
3270 OS <<
"\tMatchingCalls:\n";
3271 for (
auto &MatchingCall : MatchingCalls) {
3273 MatchingCall.print(OS);
3277 OS <<
"\tNodeId: " <<
NodeId <<
"\n";
3279 OS <<
"\tContextIds:";
3281 auto ContextIds = getContextIds();
3282 std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
3283 std::sort(SortedIds.begin(), SortedIds.end());
3284 for (
auto Id : SortedIds)
3287 OS <<
"\tCalleeEdges:\n";
3288 for (
auto &
Edge : CalleeEdges)
3289 OS <<
"\t\t" << *
Edge <<
" (Callee NodeId: " <<
Edge->Callee->NodeId
3291 OS <<
"\tCallerEdges:\n";
3292 for (
auto &
Edge : CallerEdges)
3293 OS <<
"\t\t" << *
Edge <<
" (Caller NodeId: " <<
Edge->Caller->NodeId
3295 if (!Clones.empty()) {
3298 for (
auto *
C : Clones)
3299 OS <<
LS <<
C <<
" NodeId: " <<
C->NodeId;
3301 }
else if (CloneOf) {
3302 OS <<
"\tClone of " << CloneOf <<
" NodeId: " << CloneOf->NodeId <<
"\n";
3306template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3307void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::dump()
3313template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3314void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::print(
3315 raw_ostream &OS)
const {
3316 OS <<
"Edge from Callee " <<
Callee <<
" to Caller: " <<
Caller
3317 << (IsBackedge ?
" (BE)" :
"")
3319 OS <<
" ContextIds:";
3320 std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
3321 std::sort(SortedIds.begin(), SortedIds.end());
3322 for (
auto Id : SortedIds)
3326template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3327void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::dump()
const {
3331template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3332void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::print(
3333 raw_ostream &OS)
const {
3334 OS <<
"Callsite Context Graph:\n";
3335 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3337 if (
Node->isRemoved())
3344template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3345void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::printTotalSizes(
3347 function_ref<
void(StringRef, StringRef,
const Twine &)> EmitRemark)
const {
3348 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3350 if (
Node->isRemoved())
3352 if (!
Node->IsAllocation)
3354 DenseSet<uint32_t> ContextIds =
Node->getContextIds();
3355 auto AllocTypeFromCall = getAllocationCallType(
Node->Call);
3356 std::vector<uint32_t> SortedIds(ContextIds.
begin(), ContextIds.
end());
3357 std::sort(SortedIds.begin(), SortedIds.end());
3358 for (
auto Id : SortedIds) {
3359 auto TypeI = ContextIdToAllocationType.find(Id);
3360 assert(TypeI != ContextIdToAllocationType.end());
3361 auto CSI = ContextIdToContextSizeInfos.find(Id);
3362 if (CSI != ContextIdToContextSizeInfos.end()) {
3363 for (
auto &Info : CSI->second) {
3366 " full allocation context " + std::to_string(
Info.FullStackId) +
3367 " with total size " + std::to_string(
Info.TotalSize) +
" is " +
3369 if (allocTypeToUse(
Node->AllocTypes) != AllocTypeFromCall)
3371 " due to cold byte percent";
3373 Msg +=
" (internal context id " + std::to_string(Id) +
")";
3377 EmitRemark(
DEBUG_TYPE,
"MemProfReport", Msg);
3385 if (allocTypeToUse(
Node->AllocTypes) != AllocTypeFromCall)
3387 " due to cold byte percent";
3389 Msg +=
" (internal context id " + std::to_string(Id) +
")";
3393 EmitRemark(
DEBUG_TYPE,
"MemProfReport", Msg);
3399template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3400void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::check()
const {
3401 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3404 for (
auto &
Edge :
Node->CallerEdges)
3409template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3411 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3412 using NodeRef =
const ContextNode<DerivedCCG, FuncTy, CallTy> *;
3414 using NodePtrTy = std::unique_ptr<ContextNode<DerivedCCG, FuncTy, CallTy>>;
3430 return G->NodeOwner.begin()->get();
3433 using EdgePtrTy = std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>;
3434 static const ContextNode<DerivedCCG, FuncTy, CallTy> *
3453template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3467 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3473 std::string LabelString =
3474 (
Twine(
"OrigId: ") + (
Node->IsAllocation ?
"Alloc" :
"") +
3477 LabelString +=
"\n";
3478 if (
Node->hasCall()) {
3479 auto Func =
G->NodeToCallingFunc.find(
Node);
3480 assert(Func !=
G->NodeToCallingFunc.end());
3482 G->getLabel(Func->second,
Node->Call.call(),
Node->Call.cloneNo());
3483 for (
auto &MatchingCall :
Node->MatchingCalls) {
3484 LabelString +=
"\n";
3485 LabelString +=
G->getLabel(Func->second, MatchingCall.call(),
3486 MatchingCall.cloneNo());
3489 LabelString +=
"null call";
3490 if (
Node->Recursive)
3491 LabelString +=
" (recursive)";
3493 LabelString +=
" (external)";
3499 auto ContextIds =
Node->getContextIds();
3503 bool Highlight =
false;
3512 std::string AttributeString = (
Twine(
"tooltip=\"") + getNodeId(
Node) +
" " +
3513 getContextIds(ContextIds) +
"\"")
3517 AttributeString +=
",fontsize=\"30\"";
3519 (
Twine(
",fillcolor=\"") + getColor(
Node->AllocTypes, Highlight) +
"\"")
3521 if (
Node->CloneOf) {
3522 AttributeString +=
",color=\"blue\"";
3523 AttributeString +=
",style=\"filled,bold,dashed\"";
3525 AttributeString +=
",style=\"filled\"";
3526 return AttributeString;
3531 auto &Edge = *(ChildIter.getCurrent());
3536 bool Highlight =
false;
3545 auto Color = getColor(Edge->AllocTypes, Highlight);
3546 std::string AttributeString =
3547 (
Twine(
"tooltip=\"") + getContextIds(Edge->ContextIds) +
"\"" +
3549 Twine(
",fillcolor=\"") + Color +
"\"" +
Twine(
",color=\"") + Color +
3552 if (Edge->IsBackedge)
3553 AttributeString +=
",style=\"dotted\"";
3556 AttributeString +=
",penwidth=\"2.0\",weight=\"2\"";
3557 return AttributeString;
3563 if (
Node->isRemoved())
3576 std::string IdString =
"ContextIds:";
3577 if (ContextIds.
size() < 100) {
3578 std::vector<uint32_t> SortedIds(ContextIds.
begin(), ContextIds.
end());
3579 std::sort(SortedIds.begin(), SortedIds.end());
3580 for (
auto Id : SortedIds)
3581 IdString += (
" " +
Twine(Id)).str();
3583 IdString += (
" (" + Twine(ContextIds.
size()) +
" ids)").str();
3588 static std::string getColor(uint8_t AllocTypes,
bool Highlight) {
3594 if (AllocTypes == (uint8_t)AllocationType::NotCold)
3596 return !
DoHighlight || Highlight ?
"brown1" :
"lightpink";
3597 if (AllocTypes == (uint8_t)AllocationType::Cold)
3598 return !
DoHighlight || Highlight ?
"cyan" :
"lightskyblue";
3600 ((uint8_t)AllocationType::NotCold | (uint8_t)AllocationType::Cold))
3601 return Highlight ?
"magenta" :
"mediumorchid1";
3605 static std::string getNodeId(NodeRef Node) {
3606 std::stringstream SStream;
3607 SStream << std::hex <<
"N0x" << (
unsigned long long)Node;
3608 std::string
Result = SStream.str();
3617template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3622template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3623void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::exportToDot(
3624 std::string Label)
const {
3629template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3630typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
3631CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::moveEdgeToNewCalleeClone(
3632 const std::shared_ptr<ContextEdge> &
Edge,
3633 DenseSet<uint32_t> ContextIdsToMove) {
3635 assert(NodeToCallingFunc.count(Node));
3636 ContextNode *Clone =
3637 createNewNode(
Node->IsAllocation, NodeToCallingFunc[Node],
Node->Call);
3638 Node->addClone(Clone);
3639 Clone->MatchingCalls =
Node->MatchingCalls;
3640 moveEdgeToExistingCalleeClone(
Edge, Clone,
true,
3645template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3646void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
3647 moveEdgeToExistingCalleeClone(
const std::shared_ptr<ContextEdge> &
Edge,
3648 ContextNode *NewCallee,
bool NewClone,
3649 DenseSet<uint32_t> ContextIdsToMove) {
3652 assert(NewCallee->getOrigNode() ==
Edge->Callee->getOrigNode());
3654 bool EdgeIsRecursive =
Edge->Callee ==
Edge->Caller;
3656 ContextNode *OldCallee =
Edge->Callee;
3660 auto ExistingEdgeToNewCallee = NewCallee->findEdgeFromCaller(
Edge->Caller);
3664 if (ContextIdsToMove.
empty())
3665 ContextIdsToMove =
Edge->getContextIds();
3669 if (
Edge->getContextIds().size() == ContextIdsToMove.
size()) {
3672 NewCallee->AllocTypes |=
Edge->AllocTypes;
3674 if (ExistingEdgeToNewCallee) {
3677 ExistingEdgeToNewCallee->getContextIds().insert_range(ContextIdsToMove);
3678 ExistingEdgeToNewCallee->AllocTypes |=
Edge->AllocTypes;
3679 assert(
Edge->ContextIds == ContextIdsToMove);
3680 removeEdgeFromGraph(
Edge.get());
3683 Edge->Callee = NewCallee;
3684 NewCallee->CallerEdges.push_back(
Edge);
3686 OldCallee->eraseCallerEdge(
Edge.get());
3693 auto CallerEdgeAllocType = computeAllocType(ContextIdsToMove);
3694 if (ExistingEdgeToNewCallee) {
3697 ExistingEdgeToNewCallee->getContextIds().insert_range(ContextIdsToMove);
3698 ExistingEdgeToNewCallee->AllocTypes |= CallerEdgeAllocType;
3701 auto NewEdge = std::make_shared<ContextEdge>(
3702 NewCallee,
Edge->Caller, CallerEdgeAllocType, ContextIdsToMove);
3703 Edge->Caller->CalleeEdges.push_back(NewEdge);
3704 NewCallee->CallerEdges.push_back(NewEdge);
3708 NewCallee->AllocTypes |= CallerEdgeAllocType;
3710 Edge->AllocTypes = computeAllocType(
Edge->ContextIds);
3715 for (
auto &OldCalleeEdge : OldCallee->CalleeEdges) {
3716 ContextNode *CalleeToUse = OldCalleeEdge->Callee;
3720 if (CalleeToUse == OldCallee) {
3724 if (EdgeIsRecursive) {
3728 CalleeToUse = NewCallee;
3732 DenseSet<uint32_t> EdgeContextIdsToMove =
3734 set_subtract(OldCalleeEdge->getContextIds(), EdgeContextIdsToMove);
3735 OldCalleeEdge->AllocTypes =
3736 computeAllocType(OldCalleeEdge->getContextIds());
3743 if (
auto *NewCalleeEdge = NewCallee->findEdgeFromCallee(CalleeToUse)) {
3744 NewCalleeEdge->getContextIds().insert_range(EdgeContextIdsToMove);
3745 NewCalleeEdge->AllocTypes |= computeAllocType(EdgeContextIdsToMove);
3749 auto NewEdge = std::make_shared<ContextEdge>(
3750 CalleeToUse, NewCallee, computeAllocType(EdgeContextIdsToMove),
3751 EdgeContextIdsToMove);
3752 NewCallee->CalleeEdges.push_back(NewEdge);
3753 NewEdge->Callee->CallerEdges.push_back(NewEdge);
3757 OldCallee->AllocTypes = OldCallee->computeAllocType();
3759 assert((OldCallee->AllocTypes == (uint8_t)AllocationType::None) ==
3760 OldCallee->emptyContextIds());
3764 for (
const auto &OldCalleeEdge : OldCallee->CalleeEdges)
3767 for (
const auto &NewCalleeEdge : NewCallee->CalleeEdges)
3773template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3774void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
3775 moveCalleeEdgeToNewCaller(
const std::shared_ptr<ContextEdge> &
Edge,
3776 ContextNode *NewCaller) {
3777 auto *OldCallee =
Edge->Callee;
3778 auto *NewCallee = OldCallee;
3781 bool Recursive =
Edge->Caller ==
Edge->Callee;
3783 NewCallee = NewCaller;
3785 ContextNode *OldCaller =
Edge->Caller;
3786 OldCaller->eraseCalleeEdge(
Edge.get());
3790 auto ExistingEdgeToNewCaller = NewCaller->findEdgeFromCallee(NewCallee);
3792 if (ExistingEdgeToNewCaller) {
3795 ExistingEdgeToNewCaller->getContextIds().insert_range(
3796 Edge->getContextIds());
3797 ExistingEdgeToNewCaller->AllocTypes |=
Edge->AllocTypes;
3798 Edge->ContextIds.clear();
3799 Edge->AllocTypes = (uint8_t)AllocationType::None;
3800 OldCallee->eraseCallerEdge(
Edge.get());
3803 Edge->Caller = NewCaller;
3804 NewCaller->CalleeEdges.push_back(
Edge);
3806 assert(NewCallee == NewCaller);
3809 Edge->Callee = NewCallee;
3810 NewCallee->CallerEdges.push_back(
Edge);
3811 OldCallee->eraseCallerEdge(
Edge.get());
3817 NewCaller->AllocTypes |=
Edge->AllocTypes;
3824 bool IsNewNode = NewCaller->CallerEdges.empty();
3833 for (
auto &OldCallerEdge : OldCaller->CallerEdges) {
3834 auto OldCallerCaller = OldCallerEdge->Caller;
3838 OldCallerEdge->getContextIds(),
Edge->getContextIds());
3839 if (OldCaller == OldCallerCaller) {
3840 OldCallerCaller = NewCaller;
3846 set_subtract(OldCallerEdge->getContextIds(), EdgeContextIdsToMove);
3847 OldCallerEdge->AllocTypes =
3848 computeAllocType(OldCallerEdge->getContextIds());
3853 auto *ExistingCallerEdge = NewCaller->findEdgeFromCaller(OldCallerCaller);
3857 if (ExistingCallerEdge) {
3858 ExistingCallerEdge->getContextIds().insert_range(EdgeContextIdsToMove);
3859 ExistingCallerEdge->AllocTypes |=
3860 computeAllocType(EdgeContextIdsToMove);
3863 auto NewEdge = std::make_shared<ContextEdge>(
3864 NewCaller, OldCallerCaller, computeAllocType(EdgeContextIdsToMove),
3865 EdgeContextIdsToMove);
3866 NewCaller->CallerEdges.push_back(NewEdge);
3867 NewEdge->Caller->CalleeEdges.push_back(NewEdge);
3872 OldCaller->AllocTypes = OldCaller->computeAllocType();
3874 assert((OldCaller->AllocTypes == (uint8_t)AllocationType::None) ==
3875 OldCaller->emptyContextIds());
3879 for (
const auto &OldCallerEdge : OldCaller->CallerEdges)
3882 for (
const auto &NewCallerEdge : NewCaller->CallerEdges)
3888template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3889void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
3890 recursivelyRemoveNoneTypeCalleeEdges(
3891 ContextNode *Node, DenseSet<const ContextNode *> &Visited) {
3896 removeNoneTypeCalleeEdges(Node);
3898 for (
auto *Clone :
Node->Clones)
3899 recursivelyRemoveNoneTypeCalleeEdges(Clone, Visited);
3903 auto CallerEdges =
Node->CallerEdges;
3904 for (
auto &
Edge : CallerEdges) {
3906 if (
Edge->isRemoved()) {
3910 recursivelyRemoveNoneTypeCalleeEdges(
Edge->Caller, Visited);
3915template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3916void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::markBackedges() {
3921 DenseSet<const ContextNode *> Visited;
3922 DenseSet<const ContextNode *> CurrentStack;
3923 for (
auto &Entry : NonAllocationCallToContextNodeMap) {
3925 if (
Node->isRemoved())
3928 if (!
Node->CallerEdges.empty())
3930 markBackedges(Node, Visited, CurrentStack);
3936template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3937void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::markBackedges(
3938 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
3939 DenseSet<const ContextNode *> &CurrentStack) {
3940 auto I = Visited.
insert(Node);
3944 for (
auto &CalleeEdge :
Node->CalleeEdges) {
3945 auto *
Callee = CalleeEdge->Callee;
3946 if (Visited.
count(Callee)) {
3949 if (CurrentStack.
count(Callee))
3950 CalleeEdge->IsBackedge =
true;
3953 CurrentStack.
insert(Callee);
3954 markBackedges(Callee, Visited, CurrentStack);
3955 CurrentStack.
erase(Callee);
3959template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3960void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones() {
3961 DenseSet<const ContextNode *> Visited;
3962 for (
auto &Entry : AllocationCallToContextNodeMap) {
3964 identifyClones(
Entry.second, Visited,
Entry.second->getContextIds());
3967 for (
auto &Entry : AllocationCallToContextNodeMap)
3968 recursivelyRemoveNoneTypeCalleeEdges(
Entry.second, Visited);
3981template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3982void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones(
3983 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
3984 const DenseSet<uint32_t> &AllocContextIds) {
3994 if (!
Node->hasCall())
4013 auto CallerEdges =
Node->CallerEdges;
4014 for (
auto &
Edge : CallerEdges) {
4016 if (
Edge->isRemoved()) {
4022 if (
Edge->IsBackedge) {
4029 if (!Visited.
count(
Edge->Caller) && !
Edge->Caller->CloneOf) {
4030 identifyClones(
Edge->Caller, Visited, AllocContextIds);
4053 const unsigned AllocTypeCloningPriority[] = { 3, 4,
4057 [&](
const std::shared_ptr<ContextEdge> &
A,
4058 const std::shared_ptr<ContextEdge> &
B) {
4061 if (A->ContextIds.empty())
4067 if (B->ContextIds.empty())
4070 if (A->AllocTypes == B->AllocTypes)
4073 return *A->ContextIds.begin() < *B->ContextIds.begin();
4074 return AllocTypeCloningPriority[A->AllocTypes] <
4075 AllocTypeCloningPriority[B->AllocTypes];
4078 assert(
Node->AllocTypes != (uint8_t)AllocationType::None);
4080 DenseSet<uint32_t> RecursiveContextIds;
4085 DenseSet<uint32_t> AllCallerContextIds;
4086 for (
auto &CE :
Node->CallerEdges) {
4089 AllCallerContextIds.
reserve(
CE->getContextIds().size());
4090 for (
auto Id :
CE->getContextIds())
4091 if (!AllCallerContextIds.
insert(Id).second)
4092 RecursiveContextIds.
insert(Id);
4102 auto CallerEdges =
Node->CallerEdges;
4103 for (
auto &CallerEdge : CallerEdges) {
4105 if (CallerEdge->isRemoved()) {
4109 assert(CallerEdge->Callee == Node);
4118 if (!CallerEdge->Caller->hasCall())
4123 auto CallerEdgeContextsForAlloc =
4125 if (!RecursiveContextIds.
empty())
4126 CallerEdgeContextsForAlloc =
4128 if (CallerEdgeContextsForAlloc.empty())
4131 auto CallerAllocTypeForAlloc = computeAllocType(CallerEdgeContextsForAlloc);
4135 std::vector<uint8_t> CalleeEdgeAllocTypesForCallerEdge;
4136 CalleeEdgeAllocTypesForCallerEdge.reserve(
Node->CalleeEdges.size());
4137 for (
auto &CalleeEdge :
Node->CalleeEdges)
4138 CalleeEdgeAllocTypesForCallerEdge.push_back(intersectAllocTypes(
4139 CalleeEdge->getContextIds(), CallerEdgeContextsForAlloc));
4155 assert(CallerEdge->AllocTypes != (uint8_t)AllocationType::None);
4156 assert(
Node->AllocTypes != (uint8_t)AllocationType::None);
4157 if (!CallerEdge->IsBackedge &&
4158 allocTypeToUse(CallerAllocTypeForAlloc) ==
4159 allocTypeToUse(
Node->AllocTypes) &&
4160 allocTypesMatch<DerivedCCG, FuncTy, CallTy>(
4161 CalleeEdgeAllocTypesForCallerEdge,
Node->CalleeEdges)) {
4165 if (CallerEdge->IsBackedge) {
4169 DeferredBackedges++;
4182 if (CallerEdge->IsBackedge && !CallerEdge->Caller->CloneOf &&
4183 !Visited.
count(CallerEdge->Caller)) {
4184 const auto OrigIdCount = CallerEdge->getContextIds().size();
4187 identifyClones(CallerEdge->Caller, Visited, CallerEdgeContextsForAlloc);
4188 removeNoneTypeCalleeEdges(CallerEdge->Caller);
4192 bool UpdatedEdge =
false;
4193 if (OrigIdCount > CallerEdge->getContextIds().size()) {
4194 for (
auto E :
Node->CallerEdges) {
4196 if (
E->Caller->CloneOf != CallerEdge->Caller)
4200 auto CallerEdgeContextsForAllocNew =
4202 if (CallerEdgeContextsForAllocNew.empty())
4212 CallerEdgeContextsForAlloc.swap(CallerEdgeContextsForAllocNew);
4222 if (CallerEdge->isRemoved())
4232 CallerEdgeContextsForAlloc, CallerEdge->getContextIds());
4233 if (CallerEdgeContextsForAlloc.empty())
4238 CallerAllocTypeForAlloc = computeAllocType(CallerEdgeContextsForAlloc);
4239 CalleeEdgeAllocTypesForCallerEdge.clear();
4240 for (
auto &CalleeEdge :
Node->CalleeEdges) {
4241 CalleeEdgeAllocTypesForCallerEdge.push_back(intersectAllocTypes(
4242 CalleeEdge->getContextIds(), CallerEdgeContextsForAlloc));
4248 ContextNode *Clone =
nullptr;
4249 for (
auto *CurClone :
Node->Clones) {
4250 if (allocTypeToUse(CurClone->AllocTypes) !=
4251 allocTypeToUse(CallerAllocTypeForAlloc))
4258 assert(!BothSingleAlloc ||
4259 CurClone->AllocTypes == CallerAllocTypeForAlloc);
4265 if (BothSingleAlloc || allocTypesMatchClone<DerivedCCG, FuncTy, CallTy>(
4266 CalleeEdgeAllocTypesForCallerEdge, CurClone)) {
4274 moveEdgeToExistingCalleeClone(CallerEdge, Clone,
false,
4275 CallerEdgeContextsForAlloc);
4277 Clone = moveEdgeToNewCalleeClone(CallerEdge, CallerEdgeContextsForAlloc);
4280 assert(Clone->AllocTypes != (uint8_t)AllocationType::None);
4287 assert(
Node->AllocTypes != (uint8_t)AllocationType::None);
4293void ModuleCallsiteContextGraph::updateAllocationCall(
4298 "memprof", AllocTypeString);
4301 .emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofAttribute",
Call.call())
4302 <<
ore::NV(
"AllocationCall",
Call.call()) <<
" in clone "
4304 <<
" marked with memprof allocation attribute "
4305 <<
ore::NV(
"Attribute", AllocTypeString));
4308void IndexCallsiteContextGraph::updateAllocationCall(CallInfo &
Call,
4312 assert(AI->Versions.size() >
Call.cloneNo());
4317ModuleCallsiteContextGraph::getAllocationCallType(
const CallInfo &
Call)
const {
4319 if (!CB->getAttributes().hasFnAttr(
"memprof"))
4320 return AllocationType::None;
4321 return CB->getAttributes().getFnAttr(
"memprof").getValueAsString() ==
"cold"
4322 ? AllocationType::Cold
4323 : AllocationType::NotCold;
4327IndexCallsiteContextGraph::getAllocationCallType(
const CallInfo &
Call)
const {
4329 assert(AI->Versions.size() >
Call.cloneNo());
4333void ModuleCallsiteContextGraph::updateCall(CallInfo &CallerCall,
4334 FuncInfo CalleeFunc) {
4335 auto *CurF = getCalleeFunc(CallerCall.call());
4336 auto NewCalleeCloneNo = CalleeFunc.cloneNo();
4343 if (CurCalleeCloneNo != NewCalleeCloneNo) {
4345 << CurCalleeCloneNo <<
" now " << NewCalleeCloneNo
4347 MismatchedCloneAssignments++;
4350 if (NewCalleeCloneNo > 0)
4351 cast<CallBase>(CallerCall.call())->setCalledFunction(CalleeFunc.func());
4352 OREGetter(CallerCall.call()->getFunction())
4353 .emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofCall", CallerCall.call())
4354 <<
ore::NV(
"Call", CallerCall.call()) <<
" in clone "
4355 <<
ore::NV(
"Caller", CallerCall.call()->getFunction())
4356 <<
" assigned to call function clone "
4357 <<
ore::NV(
"Callee", CalleeFunc.func()));
4360void IndexCallsiteContextGraph::updateCall(CallInfo &CallerCall,
4361 FuncInfo CalleeFunc) {
4364 "Caller cannot be an allocation which should not have profiled calls");
4365 assert(CI->Clones.size() > CallerCall.cloneNo());
4366 auto NewCalleeCloneNo = CalleeFunc.cloneNo();
4367 auto &CurCalleeCloneNo = CI->Clones[CallerCall.cloneNo()];
4372 if (CurCalleeCloneNo != 0 && CurCalleeCloneNo != NewCalleeCloneNo) {
4374 << CurCalleeCloneNo <<
" now " << NewCalleeCloneNo
4376 MismatchedCloneAssignments++;
4378 CurCalleeCloneNo = NewCalleeCloneNo;
4390 SP->replaceLinkageName(MDName);
4394 TempDISubprogram NewDecl = Decl->
clone();
4395 NewDecl->replaceLinkageName(MDName);
4399CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
4401ModuleCallsiteContextGraph::cloneFunctionForCallsite(
4402 FuncInfo &Func, CallInfo &
Call, DenseMap<CallInfo, CallInfo> &CallMap,
4403 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
4408 assert(!
Func.func()->getParent()->getFunction(Name));
4409 NewFunc->setName(Name);
4411 for (
auto &Inst : CallsWithMetadataInFunc) {
4413 assert(Inst.cloneNo() == 0);
4416 OREGetter(
Func.func())
4417 .emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofClone",
Func.func())
4418 <<
"created clone " <<
ore::NV(
"NewFunction", NewFunc));
4419 return {NewFunc, CloneNo};
4422CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
4423 IndexCall>::FuncInfo
4424IndexCallsiteContextGraph::cloneFunctionForCallsite(
4425 FuncInfo &Func, CallInfo &
Call, DenseMap<CallInfo, CallInfo> &CallMap,
4426 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
4440 for (
auto &Inst : CallsWithMetadataInFunc) {
4442 assert(Inst.cloneNo() == 0);
4444 assert(AI->Versions.size() == CloneNo);
4447 AI->Versions.push_back(0);
4450 assert(CI && CI->Clones.size() == CloneNo);
4453 CI->Clones.push_back(0);
4455 CallMap[Inst] = {Inst.call(), CloneNo};
4457 return {
Func.func(), CloneNo};
4474template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4475void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::mergeClones() {
4481 DenseMap<uint32_t, ContextNode *> ContextIdToAllocationNode;
4482 for (
auto &Entry : AllocationCallToContextNodeMap) {
4484 for (
auto Id :
Node->getContextIds())
4485 ContextIdToAllocationNode[
Id] =
Node->getOrigNode();
4486 for (
auto *Clone :
Node->Clones) {
4487 for (
auto Id : Clone->getContextIds())
4488 ContextIdToAllocationNode[
Id] = Clone->getOrigNode();
4495 DenseSet<const ContextNode *> Visited;
4496 for (
auto &Entry : AllocationCallToContextNodeMap) {
4499 mergeClones(Node, Visited, ContextIdToAllocationNode);
4505 auto Clones =
Node->Clones;
4506 for (
auto *Clone : Clones)
4507 mergeClones(Clone, Visited, ContextIdToAllocationNode);
4511 dbgs() <<
"CCG after merging:\n";
4515 exportToDot(
"aftermerge");
4523template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4524void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::mergeClones(
4525 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
4526 DenseMap<uint32_t, ContextNode *> &ContextIdToAllocationNode) {
4536 bool FoundUnvisited =
true;
4538 while (FoundUnvisited) {
4540 FoundUnvisited =
false;
4543 auto CallerEdges =
Node->CallerEdges;
4544 for (
auto CallerEdge : CallerEdges) {
4546 if (CallerEdge->Callee != Node)
4551 FoundUnvisited =
true;
4552 mergeClones(CallerEdge->Caller, Visited, ContextIdToAllocationNode);
4556 TotalMergeInvokes++;
4557 TotalMergeIters += Iters;
4558 if (Iters > MaxMergeIters)
4559 MaxMergeIters = Iters;
4562 mergeNodeCalleeClones(Node, Visited, ContextIdToAllocationNode);
4565template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4566void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::mergeNodeCalleeClones(
4567 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
4568 DenseMap<uint32_t, ContextNode *> &ContextIdToAllocationNode) {
4570 if (
Node->emptyContextIds())
4575 MapVector<ContextNode *, std::vector<std::shared_ptr<ContextEdge>>>
4576 OrigNodeToCloneEdges;
4577 for (
const auto &
E :
Node->CalleeEdges) {
4582 OrigNodeToCloneEdges[
Base].push_back(
E);
4588 auto CalleeCallerEdgeLessThan = [](
const std::shared_ptr<ContextEdge> &
A,
4589 const std::shared_ptr<ContextEdge> &
B) {
4590 if (
A->Callee->CallerEdges.size() !=
B->Callee->CallerEdges.size())
4591 return A->Callee->CallerEdges.size() <
B->Callee->CallerEdges.size();
4592 if (
A->Callee->CloneOf && !
B->Callee->CloneOf)
4594 else if (!
A->Callee->CloneOf &&
B->Callee->CloneOf)
4598 return *
A->ContextIds.begin() < *
B->ContextIds.begin();
4603 for (
auto Entry : OrigNodeToCloneEdges) {
4606 auto &CalleeEdges =
Entry.second;
4607 auto NumCalleeClones = CalleeEdges.size();
4609 if (NumCalleeClones == 1)
4620 DenseSet<ContextNode *> OtherCallersToShareMerge;
4621 findOtherCallersToShareMerge(Node, CalleeEdges, ContextIdToAllocationNode,
4622 OtherCallersToShareMerge);
4627 ContextNode *MergeNode =
nullptr;
4628 DenseMap<ContextNode *, unsigned> CallerToMoveCount;
4629 for (
auto CalleeEdge : CalleeEdges) {
4630 auto *OrigCallee = CalleeEdge->Callee;
4636 if (CalleeEdge->Callee->CallerEdges.size() == 1) {
4637 MergeNode = OrigCallee;
4638 NonNewMergedNodes++;
4645 if (!OtherCallersToShareMerge.
empty()) {
4646 bool MoveAllCallerEdges =
true;
4647 for (
auto CalleeCallerE : OrigCallee->CallerEdges) {
4648 if (CalleeCallerE == CalleeEdge)
4650 if (!OtherCallersToShareMerge.
contains(CalleeCallerE->Caller)) {
4651 MoveAllCallerEdges =
false;
4657 if (MoveAllCallerEdges) {
4658 MergeNode = OrigCallee;
4659 NonNewMergedNodes++;
4666 assert(MergeNode != OrigCallee);
4667 moveEdgeToExistingCalleeClone(CalleeEdge, MergeNode,
4670 MergeNode = moveEdgeToNewCalleeClone(CalleeEdge);
4675 if (!OtherCallersToShareMerge.
empty()) {
4679 auto OrigCalleeCallerEdges = OrigCallee->CallerEdges;
4680 for (
auto &CalleeCallerE : OrigCalleeCallerEdges) {
4681 if (CalleeCallerE == CalleeEdge)
4683 if (!OtherCallersToShareMerge.
contains(CalleeCallerE->Caller))
4685 CallerToMoveCount[CalleeCallerE->Caller]++;
4686 moveEdgeToExistingCalleeClone(CalleeCallerE, MergeNode,
4690 removeNoneTypeCalleeEdges(OrigCallee);
4691 removeNoneTypeCalleeEdges(MergeNode);
4709template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4710void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
4711 findOtherCallersToShareMerge(
4713 std::vector<std::shared_ptr<ContextEdge>> &CalleeEdges,
4714 DenseMap<uint32_t, ContextNode *> &ContextIdToAllocationNode,
4715 DenseSet<ContextNode *> &OtherCallersToShareMerge) {
4716 auto NumCalleeClones = CalleeEdges.size();
4719 DenseMap<ContextNode *, unsigned> OtherCallersToSharedCalleeEdgeCount;
4722 unsigned PossibleOtherCallerNodes = 0;
4726 if (CalleeEdges[0]->
Callee->CallerEdges.size() < 2)
4732 DenseMap<ContextEdge *, DenseSet<ContextNode *>> CalleeEdgeToAllocNodes;
4733 for (
auto CalleeEdge : CalleeEdges) {
4734 assert(CalleeEdge->Callee->CallerEdges.size() > 1);
4737 for (
auto CalleeCallerEdges : CalleeEdge->Callee->CallerEdges) {
4738 if (CalleeCallerEdges->Caller == Node) {
4739 assert(CalleeCallerEdges == CalleeEdge);
4742 OtherCallersToSharedCalleeEdgeCount[CalleeCallerEdges->Caller]++;
4745 if (OtherCallersToSharedCalleeEdgeCount[CalleeCallerEdges->Caller] ==
4747 PossibleOtherCallerNodes++;
4751 for (
auto Id : CalleeEdge->getContextIds()) {
4752 auto *
Alloc = ContextIdToAllocationNode.
lookup(Id);
4756 MissingAllocForContextId++;
4759 CalleeEdgeToAllocNodes[CalleeEdge.get()].
insert(
Alloc);
4766 for (
auto CalleeEdge : CalleeEdges) {
4767 assert(CalleeEdge->Callee->CallerEdges.size() > 1);
4769 if (!PossibleOtherCallerNodes)
4771 auto &CurCalleeAllocNodes = CalleeEdgeToAllocNodes[CalleeEdge.get()];
4773 for (
auto &CalleeCallerE : CalleeEdge->Callee->CallerEdges) {
4775 if (CalleeCallerE == CalleeEdge)
4779 if (OtherCallersToSharedCalleeEdgeCount[CalleeCallerE->Caller] !=
4784 for (
auto Id : CalleeCallerE->getContextIds()) {
4785 auto *
Alloc = ContextIdToAllocationNode.
lookup(Id);
4790 if (!CurCalleeAllocNodes.contains(
Alloc)) {
4791 OtherCallersToSharedCalleeEdgeCount[CalleeCallerE->Caller] = 0;
4792 PossibleOtherCallerNodes--;
4799 if (!PossibleOtherCallerNodes)
4804 for (
auto &[OtherCaller,
Count] : OtherCallersToSharedCalleeEdgeCount) {
4805 if (
Count != NumCalleeClones)
4807 OtherCallersToShareMerge.
insert(OtherCaller);
4852template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4853bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
4860 DenseMap<ContextNode *, FuncInfo> CallsiteToCalleeFuncCloneMap;
4864 auto RecordCalleeFuncOfCallsite = [&](ContextNode *
Caller,
4865 const FuncInfo &CalleeFunc) {
4867 CallsiteToCalleeFuncCloneMap[
Caller] = CalleeFunc;
4871 struct FuncCloneInfo {
4876 DenseMap<CallInfo, CallInfo> CallMap;
4904 DenseMap<const ContextNode *, std::map<unsigned, SmallVector<CallInfo, 0>>>
4905 UnassignedCallClones;
4909 for (
auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) {
4910 FuncInfo OrigFunc(Func);
4915 std::vector<FuncCloneInfo> FuncCloneInfos;
4916 for (
auto &
Call : CallsWithMetadata) {
4917 ContextNode *
Node = getNodeForInst(
Call);
4921 if (!Node ||
Node->Clones.empty())
4924 "Not having a call should have prevented cloning");
4928 std::map<FuncInfo, ContextNode *> FuncCloneToCurNodeCloneMap;
4932 auto AssignCallsiteCloneToFuncClone = [&](
const FuncInfo &FuncClone,
4934 ContextNode *CallsiteClone,
4937 FuncCloneToCurNodeCloneMap[FuncClone] = CallsiteClone;
4939 assert(FuncCloneInfos.size() > FuncClone.cloneNo());
4940 DenseMap<CallInfo, CallInfo> &CallMap =
4941 FuncCloneInfos[FuncClone.cloneNo()].CallMap;
4942 CallInfo CallClone(
Call);
4943 if (
auto It = CallMap.
find(
Call); It != CallMap.
end())
4944 CallClone = It->second;
4945 CallsiteClone->setCall(CallClone);
4947 for (
auto &MatchingCall :
Node->MatchingCalls) {
4948 CallInfo CallClone(MatchingCall);
4949 if (
auto It = CallMap.
find(MatchingCall); It != CallMap.
end())
4950 CallClone = It->second;
4952 MatchingCall = CallClone;
4960 auto MoveEdgeToNewCalleeCloneAndSetUp =
4961 [&](
const std::shared_ptr<ContextEdge> &
Edge) {
4962 ContextNode *OrigCallee =
Edge->Callee;
4963 ContextNode *NewClone = moveEdgeToNewCalleeClone(
Edge);
4964 removeNoneTypeCalleeEdges(NewClone);
4965 assert(NewClone->AllocTypes != (uint8_t)AllocationType::None);
4969 if (CallsiteToCalleeFuncCloneMap.
count(OrigCallee))
4970 RecordCalleeFuncOfCallsite(
4971 NewClone, CallsiteToCalleeFuncCloneMap[OrigCallee]);
4978 std::deque<ContextNode *> ClonesWorklist;
4980 if (!
Node->emptyContextIds())
4981 ClonesWorklist.push_back(Node);
4987 unsigned NodeCloneCount = 0;
4988 while (!ClonesWorklist.empty()) {
4989 ContextNode *Clone = ClonesWorklist.front();
4990 ClonesWorklist.pop_front();
4999 if (FuncCloneInfos.size() < NodeCloneCount) {
5001 if (NodeCloneCount == 1) {
5006 Clone->CallerEdges, [&](
const std::shared_ptr<ContextEdge> &
E) {
5007 return CallsiteToCalleeFuncCloneMap.count(E->Caller);
5011 FuncCloneInfos.push_back(
5012 {OrigFunc, DenseMap<CallInfo, CallInfo>()});
5013 AssignCallsiteCloneToFuncClone(
5014 OrigFunc,
Call, Clone,
5015 AllocationCallToContextNodeMap.count(
Call));
5016 for (
auto &CE : Clone->CallerEdges) {
5018 if (!
CE->Caller->hasCall())
5020 RecordCalleeFuncOfCallsite(
CE->Caller, OrigFunc);
5030 FuncInfo PreviousAssignedFuncClone;
5032 Clone->CallerEdges, [&](
const std::shared_ptr<ContextEdge> &
E) {
5033 return CallsiteToCalleeFuncCloneMap.count(E->Caller);
5035 bool CallerAssignedToCloneOfFunc =
false;
5036 if (EI != Clone->CallerEdges.end()) {
5037 const std::shared_ptr<ContextEdge> &
Edge = *EI;
5038 PreviousAssignedFuncClone =
5039 CallsiteToCalleeFuncCloneMap[
Edge->Caller];
5040 CallerAssignedToCloneOfFunc =
true;
5045 DenseMap<CallInfo, CallInfo> NewCallMap;
5046 unsigned CloneNo = FuncCloneInfos.size();
5047 assert(CloneNo > 0 &&
"Clone 0 is the original function, which "
5048 "should already exist in the map");
5049 FuncInfo NewFuncClone = cloneFunctionForCallsite(
5050 OrigFunc,
Call, NewCallMap, CallsWithMetadata, CloneNo);
5051 FuncCloneInfos.push_back({NewFuncClone, std::move(NewCallMap)});
5052 FunctionClonesAnalysis++;
5058 if (!CallerAssignedToCloneOfFunc) {
5059 AssignCallsiteCloneToFuncClone(
5060 NewFuncClone,
Call, Clone,
5061 AllocationCallToContextNodeMap.count(
Call));
5062 for (
auto &CE : Clone->CallerEdges) {
5064 if (!
CE->Caller->hasCall())
5066 RecordCalleeFuncOfCallsite(
CE->Caller, NewFuncClone);
5078 auto CallerEdges = Clone->CallerEdges;
5079 for (
auto CE : CallerEdges) {
5081 if (
CE->isRemoved()) {
5087 if (!
CE->Caller->hasCall())
5090 if (!CallsiteToCalleeFuncCloneMap.
count(
CE->Caller) ||
5094 CallsiteToCalleeFuncCloneMap[
CE->Caller] !=
5095 PreviousAssignedFuncClone)
5098 RecordCalleeFuncOfCallsite(
CE->Caller, NewFuncClone);
5111 auto CalleeEdges =
CE->Caller->CalleeEdges;
5112 for (
auto CalleeEdge : CalleeEdges) {
5115 if (CalleeEdge->isRemoved()) {
5120 ContextNode *
Callee = CalleeEdge->Callee;
5124 if (Callee == Clone || !
Callee->hasCall())
5129 if (Callee == CalleeEdge->Caller)
5131 ContextNode *NewClone =
5132 MoveEdgeToNewCalleeCloneAndSetUp(CalleeEdge);
5135 removeNoneTypeCalleeEdges(Callee);
5143 CallInfo OrigCall(
Callee->getOrigNode()->Call);
5144 OrigCall.setCloneNo(0);
5145 DenseMap<CallInfo, CallInfo> &CallMap =
5146 FuncCloneInfos[NewFuncClone.cloneNo()].CallMap;
5148 CallInfo NewCall(CallMap[OrigCall]);
5150 NewClone->setCall(NewCall);
5152 for (
auto &MatchingCall : NewClone->MatchingCalls) {
5153 CallInfo OrigMatchingCall(MatchingCall);
5154 OrigMatchingCall.setCloneNo(0);
5156 CallInfo NewCall(CallMap[OrigMatchingCall]);
5159 MatchingCall = NewCall;
5168 auto FindFirstAvailFuncClone = [&]() {
5173 for (
auto &CF : FuncCloneInfos) {
5174 if (!FuncCloneToCurNodeCloneMap.count(CF.FuncClone))
5175 return CF.FuncClone;
5178 "Expected an available func clone for this callsite clone");
5195 std::map<FuncInfo, ContextNode *> FuncCloneToNewCallsiteCloneMap;
5196 FuncInfo FuncCloneAssignedToCurCallsiteClone;
5200 auto CloneCallerEdges = Clone->CallerEdges;
5201 for (
auto &
Edge : CloneCallerEdges) {
5205 if (
Edge->isRemoved())
5208 if (!
Edge->Caller->hasCall())
5212 if (CallsiteToCalleeFuncCloneMap.
count(
Edge->Caller)) {
5213 FuncInfo FuncCloneCalledByCaller =
5214 CallsiteToCalleeFuncCloneMap[
Edge->Caller];
5224 if ((FuncCloneToCurNodeCloneMap.count(FuncCloneCalledByCaller) &&
5225 FuncCloneToCurNodeCloneMap[FuncCloneCalledByCaller] !=
5233 (FuncCloneAssignedToCurCallsiteClone &&
5234 FuncCloneAssignedToCurCallsiteClone !=
5235 FuncCloneCalledByCaller)) {
5250 if (FuncCloneToNewCallsiteCloneMap.count(
5251 FuncCloneCalledByCaller)) {
5252 ContextNode *NewClone =
5253 FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller];
5254 moveEdgeToExistingCalleeClone(
Edge, NewClone);
5256 removeNoneTypeCalleeEdges(NewClone);
5259 ContextNode *NewClone = MoveEdgeToNewCalleeCloneAndSetUp(
Edge);
5260 FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller] =
5263 ClonesWorklist.push_back(NewClone);
5267 removeNoneTypeCalleeEdges(Clone);
5275 if (!FuncCloneAssignedToCurCallsiteClone) {
5276 FuncCloneAssignedToCurCallsiteClone = FuncCloneCalledByCaller;
5278 AssignCallsiteCloneToFuncClone(
5279 FuncCloneCalledByCaller,
Call, Clone,
5280 AllocationCallToContextNodeMap.count(
Call));
5284 assert(FuncCloneAssignedToCurCallsiteClone ==
5285 FuncCloneCalledByCaller);
5294 if (!FuncCloneAssignedToCurCallsiteClone) {
5295 FuncCloneAssignedToCurCallsiteClone = FindFirstAvailFuncClone();
5296 assert(FuncCloneAssignedToCurCallsiteClone);
5298 AssignCallsiteCloneToFuncClone(
5299 FuncCloneAssignedToCurCallsiteClone,
Call, Clone,
5300 AllocationCallToContextNodeMap.count(
Call));
5302 assert(FuncCloneToCurNodeCloneMap
5303 [FuncCloneAssignedToCurCallsiteClone] == Clone);
5305 RecordCalleeFuncOfCallsite(
Edge->Caller,
5306 FuncCloneAssignedToCurCallsiteClone);
5326 if (!FuncCloneAssignedToCurCallsiteClone) {
5327 FuncCloneAssignedToCurCallsiteClone = FindFirstAvailFuncClone();
5328 assert(FuncCloneAssignedToCurCallsiteClone &&
5329 "No available func clone for this callsite clone");
5330 AssignCallsiteCloneToFuncClone(
5331 FuncCloneAssignedToCurCallsiteClone,
Call, Clone,
5332 AllocationCallToContextNodeMap.contains(
Call));
5337 for (
const auto &PE :
Node->CalleeEdges)
5339 for (
const auto &CE :
Node->CallerEdges)
5341 for (
auto *Clone :
Node->Clones) {
5343 for (
const auto &PE : Clone->CalleeEdges)
5345 for (
const auto &CE : Clone->CallerEdges)
5351 if (FuncCloneInfos.size() < 2)
5357 for (
auto &
Call : CallsWithMetadata) {
5358 ContextNode *
Node = getNodeForInst(
Call);
5359 if (!Node || !
Node->hasCall() ||
Node->emptyContextIds())
5365 if (
Node->Clones.size() + 1 >= FuncCloneInfos.size())
5369 DenseSet<unsigned> NodeCallClones;
5370 for (
auto *
C :
Node->Clones)
5371 NodeCallClones.
insert(
C->Call.cloneNo());
5374 for (
auto &FC : FuncCloneInfos) {
5379 if (++
I == 1 || NodeCallClones.
contains(
I)) {
5384 auto &CallVector = UnassignedCallClones[
Node][
I];
5385 DenseMap<CallInfo, CallInfo> &CallMap =
FC.CallMap;
5386 if (
auto It = CallMap.
find(
Call); It != CallMap.
end()) {
5387 CallInfo CallClone = It->second;
5388 CallVector.push_back(CallClone);
5392 assert(
false &&
"Expected to find call in CallMap");
5395 for (
auto &MatchingCall :
Node->MatchingCalls) {
5396 if (
auto It = CallMap.
find(MatchingCall); It != CallMap.
end()) {
5397 CallInfo CallClone = It->second;
5398 CallVector.push_back(CallClone);
5402 assert(
false &&
"Expected to find call in CallMap");
5410 (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
5412 auto UpdateCalls = [&](ContextNode *
Node,
5413 DenseSet<const ContextNode *> &Visited,
5414 auto &&UpdateCalls) {
5415 auto Inserted = Visited.insert(Node);
5419 for (
auto *Clone :
Node->Clones)
5420 UpdateCalls(Clone, Visited, UpdateCalls);
5422 for (
auto &
Edge :
Node->CallerEdges)
5423 UpdateCalls(
Edge->Caller, Visited, UpdateCalls);
5427 if (!
Node->hasCall() ||
Node->emptyContextIds())
5430 if (
Node->IsAllocation) {
5431 auto AT = allocTypeToUse(
Node->AllocTypes);
5437 !ContextIdToContextSizeInfos.empty()) {
5438 uint64_t TotalCold = 0;
5440 for (
auto Id :
Node->getContextIds()) {
5441 auto TypeI = ContextIdToAllocationType.find(Id);
5442 assert(TypeI != ContextIdToAllocationType.end());
5443 auto CSI = ContextIdToContextSizeInfos.find(Id);
5444 if (CSI != ContextIdToContextSizeInfos.end()) {
5445 for (
auto &Info : CSI->second) {
5447 if (TypeI->second == AllocationType::Cold)
5448 TotalCold +=
Info.TotalSize;
5453 AT = AllocationType::Cold;
5455 updateAllocationCall(
Node->Call, AT);
5460 if (!CallsiteToCalleeFuncCloneMap.
count(Node))
5463 auto CalleeFunc = CallsiteToCalleeFuncCloneMap[
Node];
5464 updateCall(
Node->Call, CalleeFunc);
5466 for (
auto &
Call :
Node->MatchingCalls)
5467 updateCall(
Call, CalleeFunc);
5471 if (!UnassignedCallClones.
contains(Node))
5473 DenseSet<unsigned> NodeCallClones;
5474 for (
auto *
C :
Node->Clones)
5475 NodeCallClones.
insert(
C->Call.cloneNo());
5477 auto &ClonedCalls = UnassignedCallClones[
Node];
5478 for (
auto &[CloneNo, CallVector] : ClonedCalls) {
5482 if (NodeCallClones.
contains(CloneNo))
5485 for (
auto &
Call : CallVector)
5486 updateCall(
Call, CalleeFunc);
5495 DenseSet<const ContextNode *> Visited;
5496 for (
auto &Entry : AllocationCallToContextNodeMap)
5497 UpdateCalls(
Entry.second, Visited, UpdateCalls);
5508 for (
auto &SN : FS->callsites()) {
5513 SN.Clones.size() >
I &&
5514 "Callsite summary has fewer entries than other summaries in function");
5515 if (SN.Clones.size() <=
I || !SN.Clones[
I])
5522 for (
auto &AN : FS->allocs()) {
5526 assert(AN.Versions.size() >
I &&
5527 "Alloc summary has fewer entries than other summaries in function");
5528 if (AN.Versions.size() <=
I ||
5545 NewGV->takeName(DeclGV);
5552 auto CloneFuncAliases = [&](
Function *NewF,
unsigned I) {
5553 if (!FuncToAliasMap.count(&
F))
5555 for (
auto *
A : FuncToAliasMap[&
F]) {
5557 auto *PrevA = M.getNamedAlias(AliasName);
5559 A->getType()->getPointerAddressSpace(),
5560 A->getLinkage(), AliasName, NewF);
5561 NewA->copyAttributesFrom(
A);
5563 TakeDeclNameAndReplace(PrevA, NewA);
5572 FunctionsClonedThinBackend++;
5589 for (
unsigned I = 1;
I < NumClones;
I++) {
5590 VMaps.
emplace_back(std::make_unique<ValueToValueMapTy>());
5597 FunctionCloneDuplicatesThinBackend++;
5598 auto *Func = HashToFunc[Hash];
5599 if (Func->hasAvailableExternallyLinkage()) {
5605 auto Decl = M.getOrInsertFunction(Name, Func->getFunctionType());
5607 <<
"created clone decl " <<
ore::NV(
"Decl", Decl.getCallee()));
5610 auto *PrevF = M.getFunction(Name);
5613 TakeDeclNameAndReplace(PrevF, Alias);
5615 <<
"created clone alias " <<
ore::NV(
"Alias", Alias));
5618 CloneFuncAliases(Func,
I);
5622 HashToFunc[Hash] = NewF;
5623 FunctionClonesThinBackend++;
5626 for (
auto &BB : *NewF) {
5627 for (
auto &Inst : BB) {
5628 Inst.setMetadata(LLVMContext::MD_memprof,
nullptr);
5629 Inst.setMetadata(LLVMContext::MD_callsite,
nullptr);
5634 TakeDeclNameAndReplace(PrevF, NewF);
5636 NewF->setName(Name);
5639 <<
"created clone " <<
ore::NV(
"NewFunction", NewF));
5642 CloneFuncAliases(NewF,
I);
5651 const Function *CallingFunc =
nullptr) {
5670 auto SrcFileMD =
F.getMetadata(
"thinlto_src_file");
5676 if (!SrcFileMD &&
F.isDeclaration()) {
5680 SrcFileMD = CallingFunc->getMetadata(
"thinlto_src_file");
5685 assert(SrcFileMD || OrigName ==
F.getName());
5687 StringRef SrcFile = M.getSourceFileName();
5699 if (!TheFnVI && OrigName ==
F.getName() &&
F.hasLocalLinkage() &&
5700 F.getName().contains(
'.')) {
5701 OrigName =
F.getName().rsplit(
'.').first;
5710 assert(TheFnVI ||
F.isDeclaration());
5714bool MemProfContextDisambiguation::initializeIndirectCallPromotionInfo(
5716 ICallAnalysis = std::make_unique<ICallPromotionAnalysis>();
5717 Symtab = std::make_unique<InstrProfSymtab>();
5728 if (
Error E = Symtab->create(M,
true,
false)) {
5729 std::string SymtabFailure =
toString(std::move(
E));
5730 M.getContext().emitError(
"Failed to create symtab: " + SymtabFailure);
5743 auto MIBIter = AllocNode.
MIBs.begin();
5744 for (
auto &MDOp : MemProfMD->
operands()) {
5746 auto StackIdIndexIter = MIBIter->StackIdIndices.begin();
5751 auto ContextIterBegin =
5755 (ContextIterBegin != StackContext.
end() && *ContextIterBegin == 0) ? 1
5757 for (
auto ContextIter = ContextIterBegin; ContextIter != StackContext.
end();
5762 if (LastStackContextId == *ContextIter)
5764 LastStackContextId = *ContextIter;
5765 assert(StackIdIndexIter != MIBIter->StackIdIndices.end());
5775bool MemProfContextDisambiguation::applyImport(
Module &M) {
5782 std::map<const Function *, SmallPtrSet<const GlobalAlias *, 1>>
5784 for (
auto &
A :
M.aliases()) {
5785 auto *Aliasee =
A.getAliaseeObject();
5787 FuncToAliasMap[
F].insert(&
A);
5790 if (!initializeIndirectCallPromotionInfo(M))
5797 OptimizationRemarkEmitter ORE(&
F);
5800 bool ClonesCreated =
false;
5801 unsigned NumClonesCreated = 0;
5802 auto CloneFuncIfNeeded = [&](
unsigned NumClones, FunctionSummary *
FS) {
5812 if (ClonesCreated) {
5813 assert(NumClonesCreated == NumClones);
5820 ClonesCreated =
true;
5821 NumClonesCreated = NumClones;
5824 auto CloneCallsite = [&](
const CallsiteInfo &StackNode, CallBase *CB,
5825 Function *CalledFunction, FunctionSummary *
FS) {
5827 CloneFuncIfNeeded(StackNode.
Clones.
size(), FS);
5839 if (CalledFunction != CB->getCalledOperand() &&
5840 (!GA || CalledFunction != GA->getAliaseeObject())) {
5841 SkippedCallsCloning++;
5847 auto CalleeOrigName = CalledFunction->getName();
5848 for (
unsigned J = 0; J < StackNode.
Clones.
size(); J++) {
5851 if (J > 0 && VMaps[J - 1]->
empty())
5855 if (!StackNode.
Clones[J])
5857 auto NewF =
M.getOrInsertFunction(
5859 CalledFunction->getFunctionType());
5873 ORE.emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofCall", CBClone)
5874 <<
ore::NV(
"Call", CBClone) <<
" in clone "
5876 <<
" assigned to call function clone "
5877 <<
ore::NV(
"Callee", NewF.getCallee()));
5891 ImportSummary->findSummaryInModule(TheFnVI,
M.getModuleIdentifier());
5895 auto SrcModuleMD =
F.getMetadata(
"thinlto_src_module");
5897 "enable-import-metadata is needed to emit thinlto_src_module");
5898 StringRef SrcModule =
5901 if (GVS->modulePath() == SrcModule) {
5902 GVSummary = GVS.get();
5927 if (
FS->allocs().empty() &&
FS->callsites().empty())
5930 auto SI =
FS->callsites().begin();
5931 auto AI =
FS->allocs().begin();
5936 DenseMap<ValueInfo, CallsiteInfo> MapTailCallCalleeVIToCallsite;
5939 for (
auto CallsiteIt =
FS->callsites().rbegin();
5940 CallsiteIt !=
FS->callsites().rend(); CallsiteIt++) {
5941 auto &Callsite = *CallsiteIt;
5945 if (!Callsite.StackIdIndices.empty())
5947 MapTailCallCalleeVIToCallsite.
insert({Callsite.Callee, Callsite});
5956 for (
auto &BB :
F) {
5957 for (
auto &
I : BB) {
5963 auto *CalledValue = CB->getCalledOperand();
5964 auto *CalledFunction = CB->getCalledFunction();
5965 if (CalledValue && !CalledFunction) {
5966 CalledValue = CalledValue->stripPointerCasts();
5973 assert(!CalledFunction &&
5974 "Expected null called function in callsite for alias");
5978 CallStack<MDNode, MDNode::op_iterator> CallsiteContext(
5979 I.getMetadata(LLVMContext::MD_callsite));
5980 auto *MemProfMD =
I.getMetadata(LLVMContext::MD_memprof);
5986 if (CB->getAttributes().hasFnAttr(
"memprof") && !MemProfMD) {
5987 CB->getAttributes().getFnAttr(
"memprof").getValueAsString() ==
"cold"
5988 ? AllocTypeColdThinBackend++
5989 : AllocTypeNotColdThinBackend++;
5990 OrigAllocsThinBackend++;
5991 AllocVersionsThinBackend++;
5992 if (!MaxAllocVersionsThinBackend)
5993 MaxAllocVersionsThinBackend = 1;
6000 auto &AllocNode = *(AI++);
6008 CloneFuncIfNeeded(AllocNode.Versions.size(), FS);
6010 OrigAllocsThinBackend++;
6011 AllocVersionsThinBackend += AllocNode.Versions.size();
6012 if (MaxAllocVersionsThinBackend < AllocNode.Versions.size())
6013 MaxAllocVersionsThinBackend = AllocNode.Versions.size();
6023 if (AllocNode.Versions.size() == 1 &&
6026 AllocationType::NotCold ||
6028 AllocationType::None);
6029 UnclonableAllocsThinBackend++;
6035 return Type == ((uint8_t)AllocationType::NotCold |
6036 (uint8_t)AllocationType::Cold);
6040 for (
unsigned J = 0; J < AllocNode.Versions.size(); J++) {
6043 if (J > 0 && VMaps[J - 1]->
empty())
6046 if (AllocNode.Versions[J] == (uint8_t)AllocationType::None)
6049 AllocTy == AllocationType::Cold ? AllocTypeColdThinBackend++
6050 : AllocTypeNotColdThinBackend++;
6065 ORE.emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofAttribute", CBClone)
6066 <<
ore::NV(
"AllocationCall", CBClone) <<
" in clone "
6068 <<
" marked with memprof allocation attribute "
6069 <<
ore::NV(
"Attribute", AllocTypeString));
6071 }
else if (!CallsiteContext.empty()) {
6072 if (!CalledFunction) {
6076 assert(!CI || !CI->isInlineAsm());
6086 recordICPInfo(CB,
FS->callsites(), SI, ICallAnalysisInfo);
6092 CloneFuncIfNeeded(NumClones, FS);
6097 assert(SI !=
FS->callsites().end());
6098 auto &StackNode = *(
SI++);
6104 for (
auto StackId : CallsiteContext) {
6106 assert(ImportSummary->getStackIdAtIndex(*StackIdIndexIter) ==
6112 CloneCallsite(StackNode, CB, CalledFunction, FS);
6114 }
else if (CB->isTailCall() && CalledFunction) {
6117 ValueInfo CalleeVI =
6119 if (CalleeVI && MapTailCallCalleeVIToCallsite.
count(CalleeVI)) {
6120 auto Callsite = MapTailCallCalleeVIToCallsite.
find(CalleeVI);
6121 assert(Callsite != MapTailCallCalleeVIToCallsite.
end());
6122 CloneCallsite(Callsite->second, CB, CalledFunction, FS);
6129 performICP(M,
FS->callsites(), VMaps, ICallAnalysisInfo, ORE);
6139 for (
auto &BB :
F) {
6140 for (
auto &
I : BB) {
6143 I.setMetadata(LLVMContext::MD_memprof,
nullptr);
6144 I.setMetadata(LLVMContext::MD_callsite,
nullptr);
6152unsigned MemProfContextDisambiguation::recordICPInfo(
6157 uint32_t NumCandidates;
6158 uint64_t TotalCount;
6159 auto CandidateProfileData =
6160 ICallAnalysis->getPromotionCandidatesForInstruction(
6162 if (CandidateProfileData.empty())
6168 bool ICPNeeded =
false;
6169 unsigned NumClones = 0;
6170 size_t CallsiteInfoStartIndex = std::distance(AllCallsites.
begin(), SI);
6171 for (
const auto &Candidate : CandidateProfileData) {
6173 auto CalleeValueInfo =
6175 ImportSummary->getValueInfo(Candidate.Value);
6178 assert(!CalleeValueInfo ||
SI->Callee == CalleeValueInfo);
6180 auto &StackNode = *(
SI++);
6185 [](
unsigned CloneNo) { return CloneNo != 0; });
6195 ICallAnalysisInfo.
push_back({CB, CandidateProfileData.vec(), NumCandidates,
6196 TotalCount, CallsiteInfoStartIndex});
6200void MemProfContextDisambiguation::performICP(
6202 ArrayRef<std::unique_ptr<ValueToValueMapTy>> VMaps,
6204 OptimizationRemarkEmitter &ORE) {
6211 for (
auto &Info : ICallAnalysisInfo) {
6214 auto TotalCount =
Info.TotalCount;
6215 unsigned NumClones = 0;
6218 for (
auto &Candidate :
Info.CandidateProfileData) {
6229 Function *TargetFunction = Symtab->getFunction(Candidate.Value);
6230 if (TargetFunction ==
nullptr ||
6238 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnableToFindTarget", CB)
6239 <<
"Memprof cannot promote indirect call: target with md5sum "
6240 <<
ore::NV(
"target md5sum", Candidate.Value) <<
" not found";
6245 RemainingCandidates.
push_back(Candidate);
6250 const char *Reason =
nullptr;
6253 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnableToPromote", CB)
6254 <<
"Memprof cannot promote indirect call to "
6255 <<
ore::NV(
"TargetFunction", TargetFunction)
6256 <<
" with count of " <<
ore::NV(
"TotalCount", TotalCount)
6259 RemainingCandidates.
push_back(Candidate);
6268 CallBase *CBClone = CB;
6269 for (
unsigned J = 0; J < NumClones; J++) {
6272 if (J > 0 && VMaps[J - 1]->
empty())
6282 TotalCount, isSamplePGO, &ORE);
6283 auto *TargetToUse = TargetFunction;
6286 if (StackNode.
Clones[J]) {
6305 <<
ore::NV(
"Call", CBClone) <<
" in clone "
6307 <<
" promoted and assigned to call function clone "
6308 <<
ore::NV(
"Callee", TargetToUse));
6312 TotalCount -= Candidate.Count;
6316 CallBase *CBClone = CB;
6317 for (
unsigned J = 0; J < NumClones; J++) {
6320 if (J > 0 && VMaps[J - 1]->
empty())
6326 CBClone->
setMetadata(LLVMContext::MD_prof,
nullptr);
6329 if (TotalCount != 0)
6331 IPVK_IndirectCallTarget,
Info.NumCandidates);
6336template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
6337bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::process(
6338 function_ref<
void(StringRef, StringRef,
const Twine &)> EmitRemark,
6339 bool AllowExtraAnalysis) {
6341 dbgs() <<
"CCG before cloning:\n";
6345 exportToDot(
"postbuild");
6358 dbgs() <<
"CCG after cloning:\n";
6362 exportToDot(
"cloned");
6364 bool Changed = assignFunctions();
6367 dbgs() <<
"CCG after assigning function clones:\n";
6371 exportToDot(
"clonefuncassign");
6374 printTotalSizes(
errs(), EmitRemark);
6379bool MemProfContextDisambiguation::processModule(
6381 llvm::function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter) {
6386 return applyImport(M);
6399 ModuleCallsiteContextGraph CCG(M, OREGetter);
6402 return CCG.process();
6407 : ImportSummary(Summary), isSamplePGO(isSamplePGO) {
6412 "-memprof-dot-scope=alloc requires -memprof-dot-alloc-id");
6416 "-memprof-dot-scope=context requires -memprof-dot-context-id");
6420 "-memprof-dot-scope=all can't have both -memprof-dot-alloc-id and "
6421 "-memprof-dot-context-id");
6422 if (ImportSummary) {
6432 auto ReadSummaryFile =
6434 if (!ReadSummaryFile) {
6441 if (!ImportSummaryForTestingOrErr) {
6447 ImportSummaryForTesting = std::move(*ImportSummaryForTestingOrErr);
6448 ImportSummary = ImportSummaryForTesting.get();
6457 if (!processModule(M, OREGetter))
6476 bool AllowExtraAnalysis =
6479 IndexCallsiteContextGraph CCG(Index, isPrevailing);
6480 CCG.process(EmitRemark, AllowExtraAnalysis);
6495 for (
auto &BB :
F) {
6496 for (
auto &
I : BB) {
6500 if (CI->hasFnAttr(
"memprof")) {
6501 CI->removeFnAttr(
"memprof");
6504 if (!CI->hasMetadata(LLVMContext::MD_callsite)) {
6505 assert(!CI->hasMetadata(LLVMContext::MD_memprof));
6511 CI->setMetadata(LLVMContext::MD_memprof,
nullptr);
6512 CI->setMetadata(LLVMContext::MD_callsite,
nullptr);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Unify divergent function exit nodes
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
Module.h This file contains the declarations for the Module class.
Machine Check Debug Module
This file implements a map that provides insertion order iteration.
static cl::opt< unsigned > TailCallSearchDepth("memprof-tail-call-search-depth", cl::init(5), cl::Hidden, cl::desc("Max depth to recursively search for missing " "frames through tail calls."))
uint64_t ComputeHash(const FunctionSummary *FS, unsigned I)
static cl::opt< DotScope > DotGraphScope("memprof-dot-scope", cl::desc("Scope of graph to export to dot"), cl::Hidden, cl::init(DotScope::All), cl::values(clEnumValN(DotScope::All, "all", "Export full callsite graph"), clEnumValN(DotScope::Alloc, "alloc", "Export only nodes with contexts feeding given " "-memprof-dot-alloc-id"), clEnumValN(DotScope::Context, "context", "Export only nodes with given -memprof-dot-context-id")))
static cl::opt< bool > DoMergeIteration("memprof-merge-iteration", cl::init(true), cl::Hidden, cl::desc("Iteratively apply merging on a node to catch new callers"))
static bool isMemProfClone(const Function &F)
static cl::opt< unsigned > AllocIdForDot("memprof-dot-alloc-id", cl::init(0), cl::Hidden, cl::desc("Id of alloc to export if -memprof-dot-scope=alloc " "or to highlight if -memprof-dot-scope=all"))
static cl::opt< unsigned > ContextIdForDot("memprof-dot-context-id", cl::init(0), cl::Hidden, cl::desc("Id of context to export if -memprof-dot-scope=context or to " "highlight otherwise"))
static cl::opt< bool > ExportToDot("memprof-export-to-dot", cl::init(false), cl::Hidden, cl::desc("Export graph to dot files."))
static void checkEdge(const std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > &Edge)
static cl::opt< bool > AllowRecursiveCallsites("memprof-allow-recursive-callsites", cl::init(true), cl::Hidden, cl::desc("Allow cloning of callsites involved in recursive cycles"))
bool checkColdOrNotCold(uint8_t AllocType)
static ValueInfo findValueInfoForFunc(const Function &F, const Module &M, const ModuleSummaryIndex *ImportSummary, const Function *CallingFunc=nullptr)
static cl::opt< bool > CloneRecursiveContexts("memprof-clone-recursive-contexts", cl::init(true), cl::Hidden, cl::desc("Allow cloning of contexts through recursive cycles"))
static std::string getAllocTypeString(uint8_t AllocTypes)
bool DOTGraphTraits< constCallsiteContextGraph< DerivedCCG, FuncTy, CallTy > * >::DoHighlight
static unsigned getMemProfCloneNum(const Function &F)
static cl::opt< unsigned > MemProfICPNoInlineThreshold("memprof-icp-noinline-threshold", cl::init(0), cl::Hidden, cl::desc("Minimum absolute count for promoted target to be inlinable"))
static SmallVector< std::unique_ptr< ValueToValueMapTy >, 4 > createFunctionClones(Function &F, unsigned NumClones, Module &M, OptimizationRemarkEmitter &ORE, std::map< const Function *, SmallPtrSet< const GlobalAlias *, 1 > > &FuncToAliasMap, FunctionSummary *FS)
static cl::opt< bool > VerifyCCG("memprof-verify-ccg", cl::init(false), cl::Hidden, cl::desc("Perform verification checks on CallingContextGraph."))
static void checkNode(const ContextNode< DerivedCCG, FuncTy, CallTy > *Node, bool CheckEdges=true)
static cl::opt< bool > MergeClones("memprof-merge-clones", cl::init(true), cl::Hidden, cl::desc("Merge clones before assigning functions"))
static std::string getMemProfFuncName(Twine Base, unsigned CloneNo)
static cl::opt< std::string > MemProfImportSummary("memprof-import-summary", cl::desc("Import summary to use for testing the ThinLTO backend via opt"), cl::Hidden)
static const std::string MemProfCloneSuffix
static void updateSubprogramLinkageName(Function *NewFunc, StringRef Name)
static cl::opt< bool > AllowRecursiveContexts("memprof-allow-recursive-contexts", cl::init(true), cl::Hidden, cl::desc("Allow cloning of contexts having recursive cycles"))
static cl::opt< std::string > DotFilePathPrefix("memprof-dot-file-path-prefix", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path prefix of the MemProf dot files."))
static cl::opt< bool > VerifyNodes("memprof-verify-nodes", cl::init(false), cl::Hidden, cl::desc("Perform frequent verification checks on nodes."))
static void checkAllocContextIds(const AllocInfo &AllocNode, const MDNode *MemProfMD, const CallStack< MDNode, MDNode::op_iterator > &CallsiteContext, const ModuleSummaryIndex *ImportSummary)
static cl::opt< bool > DumpCCG("memprof-dump-ccg", cl::init(false), cl::Hidden, cl::desc("Dump CallingContextGraph to stdout after each stage."))
This is the interface to build a ModuleSummaryIndex for a module.
ModuleSummaryIndex.h This file contains the declarations the classes that hold the module index and s...
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
FunctionAnalysisManager FAM
std::pair< BasicBlock *, BasicBlock * > Edge
This file defines generic set operations that may be used on set's of different types,...
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
void print(OutputBuffer &OB) const
ValueInfo getAliaseeVI() const
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
bool empty() const
Check if the array is empty.
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
void addFnAttr(Attribute::AttrKind Kind)
Adds the attribute to the function.
void setCalledOperand(Value *V)
Subprogram description. Uses SubclassData1.
ValueT lookup(const_arg_type_t< KeyT > Val) const
Return the entry for the specified key, or a default constructed value if no such entry exists.
iterator find(const_arg_type_t< KeyT > Val)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
void reserve(size_type NumEntries)
Grow the densemap so that it can contain at least NumEntries items before resizing again.
Implements a dense probed hash-table based set.
Function summary information to aid decisions and implementation of importing.
FunctionType * getFunctionType() const
Returns the FunctionType for me.
DISubprogram * getSubprogram() const
Get the attached subprogram.
const Function & getFunction() const
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
static LLVM_ABI GlobalAlias * create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent)
If a parent module is specified, the alias is automatically inserted into the end of the specified mo...
Function and variable summary information to aid decisions and implementation of importing.
static LLVM_ABI GUID getGUIDAssumingExternalLinkage(StringRef GlobalName)
Return a 64-bit global unique ID constructed from the name of a global symbol.
static bool isLocalLinkage(LinkageTypes Linkage)
LLVM_ABI bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
uint64_t GUID
Declare a type to represent a global unique identifier for a global value.
Module * getParent()
Get the module that this global value is contained inside of...
LLVM_ABI void eraseFromParent()
This method unlinks 'this' from the containing module and deletes it.
static LLVM_ABI std::string getGlobalIdentifier(StringRef Name, GlobalValue::LinkageTypes Linkage, StringRef FileName)
Return the modified name for a global value suitable to be used as the key for a global lookup (e....
bool isWeakForLinker() const
@ InternalLinkage
Rename collisions when linking (static functions).
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
This is an important class for using LLVM in a threaded context.
const MDOperand & getOperand(unsigned I) const
ArrayRef< MDOperand > operands() const
unsigned getNumOperands() const
Return number of MDNode operands.
LLVM_ABI TempMDNode clone() const
Create a (temporary) clone of this.
static std::enable_if_t< std::is_base_of< MDNode, T >::value, T * > replaceWithUniqued(std::unique_ptr< T, TempMDNodeDeleter > N)
Replace a temporary node with a uniqued one.
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
This class implements a map that also provides access to all stored values in a deterministic order.
size_type count(const KeyT &Key) const
LLVM_ABI MemProfContextDisambiguation(const ModuleSummaryIndex *Summary=nullptr, bool isSamplePGO=false)
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
Class to hold module path string table and global value map, and encapsulate methods for operating on...
static StringRef getOriginalNameBeforePromote(StringRef Name)
Helper to obtain the unpromoted name for a global value (or the original name if not promoted).
ValueInfo getValueInfo(const GlobalValueSummaryMapTy::value_type &R) const
Return a ValueInfo for the index value_type (convenient when iterating index).
uint64_t getStackIdAtIndex(unsigned Index) const
A Module instance is used to store all the information related to an LLVM module.
LLVMContext & getContext() const
Get the global data context.
A NodeSet contains a set of SUnit DAG nodes with additional information that assigns a priority to th...
A discriminated union of two or more pointer types, with the discriminator in the low bits of the poi...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
A class that wrap the SHA1 algorithm.
LLVM_ABI void update(ArrayRef< uint8_t > Data)
Digest more data.
LLVM_ABI std::array< uint8_t, 20 > result()
Return the current raw 160-bits SHA1 for the digested data since the last call to init().
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Represent a constant reference to a string, i.e.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
std::pair< iterator, bool > insert(const ValueT &V)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
bool erase(const ValueT &V)
void insert_range(Range &&R)
void swap(DenseSetImpl &RHS)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
void reserve(size_t Size)
Grow the DenseSet so that it can contain at least NumEntries items before resizing again.
An efficient, type-erasing, non-owning reference to a callable.
Helper class to iterate through stack ids in both metadata (memprof MIB and callsite) and the corresp...
CallStackIterator beginAfterSharedPrefix(const CallStack &Other)
CallStackIterator end() const
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
@ CE
Windows NT (Windows on ARM)
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract(Y &&MD)
Extract a Value from Metadata, if any.
LLVM_ABI AllocationType getMIBAllocType(const MDNode *MIB)
Returns the allocation type from an MIB metadata node.
LLVM_ABI bool metadataMayIncludeContextSizeInfo()
Whether the alloc memprof metadata may include context size info for some MIBs (but possibly not all)...
LLVM_ABI bool hasSingleAllocType(uint8_t AllocTypes)
True if the AllocTypes bitmask contains just a single type.
LLVM_ABI std::string getAllocTypeAttributeString(AllocationType Type)
Returns the string to use in attributes with the given type.
LLVM_ABI MDNode * getMIBStackNode(const MDNode *MIB)
Returns the stack node from an MIB metadata node.
LLVM_ABI void removeAnyExistingAmbiguousAttribute(CallBase *CB)
Removes any existing "ambiguous" memprof attribute.
DiagnosticInfoOptimizationBase::Argument NV
LLVM_ABI CallBase & promoteIndirectCall(CallBase &CB, Function *F, uint64_t Count, uint64_t TotalCount, bool AttachProfToDirectCall, OptimizationRemarkEmitter *ORE)
NodeAddr< NodeBase * > Node
NodeAddr< FuncNode * > Func
friend class Instruction
Iterator for Instructions in a `BasicBlock.
uint64_t read64le(const void *P)
void write32le(void *P, uint32_t V)
This is an optimization pass for GlobalISel generic memory operations.
cl::opt< unsigned > MinClonedColdBytePercent("memprof-cloning-cold-threshold", cl::init(100), cl::Hidden, cl::desc("Min percent of cold bytes to hint alloc cold during cloning"))
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
LLVM_ABI void logAllUnhandledErrors(Error E, raw_ostream &OS, Twine ErrorBanner={})
Log all errors (if any) in E to OS.
void stable_sort(R &&Range)
cl::opt< bool > MemProfReportHintedSizes("memprof-report-hinted-sizes", cl::init(false), cl::Hidden, cl::desc("Report total allocation sizes of hinted allocations"))
LLVM_ABI bool isLegalToPromote(const CallBase &CB, Function *Callee, const char **FailureReason=nullptr)
Return true if the given indirect call site can be made to call Callee.
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI bool mayHaveMemprofSummary(const CallBase *CB)
Returns true if the instruction could have memprof metadata, used to ensure consistency between summa...
constexpr from_range_t from_range
static cl::opt< bool > MemProfRequireDefinitionForPromotion("memprof-require-definition-for-promotion", cl::init(false), cl::Hidden, cl::desc("Require target function definition when promoting indirect calls"))
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
cl::opt< unsigned > MemProfTopNImportant("memprof-top-n-important", cl::init(10), cl::Hidden, cl::desc("Number of largest cold contexts to consider important"))
bool set_is_subset(const S1Ty &S1, const S2Ty &S2)
set_is_subset(A, B) - Return true iff A in B
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
void set_subtract(S1Ty &S1, const S2Ty &S2)
set_subtract(A, B) - Compute A := A - B
InnerAnalysisManagerProxy< FunctionAnalysisManager, Module > FunctionAnalysisManagerModuleProxy
Provide the FunctionAnalysisManager to Module proxy.
raw_ostream & WriteGraph(raw_ostream &O, const GraphType &G, bool ShortNames=false, const Twine &Title="")
bool set_intersects(const S1Ty &S1, const S2Ty &S2)
set_intersects(A, B) - Return true iff A ^ B is non empty
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
RelativeUniformCounterPtr ValuesPtrExpr VTableAddr Value
LLVM_ABI Expected< std::unique_ptr< ModuleSummaryIndex > > getModuleSummaryIndex(MemoryBufferRef Buffer)
Parse the specified bitcode buffer, returning the module summary index.
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
cl::opt< unsigned > MaxSummaryIndirectEdges("module-summary-max-indirect-edges", cl::init(0), cl::Hidden, cl::desc("Max number of summary edges added from " "indirect call profile metadata"))
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
bool set_union(S1Ty &S1, const S2Ty &S2)
set_union(A, B) - Compute A := A u B, return whether A changed.
cl::opt< bool > SupportsHotColdNew
Indicate we are linking with an allocator that supports hot/cold operator new interfaces.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
S1Ty set_intersection(const S1Ty &S1, const S2Ty &S2)
set_intersection(A, B) - Return A ^ B
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
cl::opt< bool > EnableMemProfContextDisambiguation
Enable MemProf context disambiguation for thin link.
RelativeUniformCounterPtr ValuesPtrExpr VTableAddr Count
S1Ty set_difference(const S1Ty &S1, const S2Ty &S2)
set_difference(A, B) - Return A - B
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Expected< T > errorOrToExpected(ErrorOr< T > &&EO)
Convert an ErrorOr<T> to an Expected<T>.
ArrayRef(const T &OneElt) -> ArrayRef< T >
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
LLVM_ABI Function * CloneFunction(Function *F, ValueToValueMapTy &VMap, ClonedCodeInfo *CodeInfo=nullptr)
Return a copy of the specified function and add it to that function's module.
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
cl::opt< bool > MemProfFixupImportant("memprof-fixup-important", cl::init(true), cl::Hidden, cl::desc("Enables edge fixup for important contexts"))
DOTGraphTraits(bool IsSimple=false)
typename GTraits::NodeRef NodeRef
static std::string getEdgeAttributes(NodeRef, ChildIteratorType ChildIter, GraphType G)
const CallsiteContextGraph< DerivedCCG, FuncTy, CallTy > * GraphType
typename GTraits::ChildIteratorType ChildIteratorType
static std::string getNodeAttributes(NodeRef Node, GraphType G)
static bool isNodeHidden(NodeRef Node, GraphType G)
static std::string getNodeLabel(NodeRef Node, GraphType G)
GraphTraits< GraphType > GTraits
static NodeRef getNode(const NodePtrTy &P)
static const ContextNode< DerivedCCG, FuncTy, CallTy > * GetCallee(const EdgePtrTy &P)
static ChildIteratorType child_end(NodeRef N)
std::unique_ptr< ContextNode< DerivedCCG, FuncTy, CallTy > > NodePtrTy
mapped_iterator< typename std::vector< std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > >::const_iterator, decltype(&GetCallee)> ChildIteratorType
const CallsiteContextGraph< DerivedCCG, FuncTy, CallTy > * GraphType
const ContextNode< DerivedCCG, FuncTy, CallTy > * NodeRef
mapped_iterator< typename std::vector< NodePtrTy >::const_iterator, decltype(&getNode)> nodes_iterator
static ChildIteratorType child_begin(NodeRef N)
static NodeRef getEntryNode(GraphType G)
static nodes_iterator nodes_begin(GraphType G)
static nodes_iterator nodes_end(GraphType G)
std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > EdgePtrTy
Summary of memprof metadata on allocations.
std::vector< MIBInfo > MIBs
SmallVector< unsigned > StackIdIndices
SmallVector< unsigned > Clones
DOTGraphTraits - Template class that can be specialized to customize how graphs are converted to 'dot...
DefaultDOTGraphTraits(bool simple=false)
An information struct used to provide DenseMap with the various necessary components for a given valu...
typename GraphType::UnknownGraphTypeError NodeRef
Struct that holds a reference to a particular GUID in a global value summary.
ArrayRef< std::unique_ptr< GlobalValueSummary > > getSummaryList() const
GlobalValue::GUID getGUID() const
PointerUnion< CallsiteInfo *, AllocInfo * > SimpleType
static SimpleType getSimplifiedValue(IndexCall &Val)
const PointerUnion< CallsiteInfo *, AllocInfo * > SimpleType
static SimpleType getSimplifiedValue(const IndexCall &Val)
Define a template that can be specialized by smart pointers to reflect the fact that they are automat...