52#include <unordered_map>
57#define DEBUG_TYPE "memprof-context-disambiguation"
60 "Number of function clones created during whole program analysis");
62 "Number of function clones created during ThinLTO backend");
64 "Number of functions that had clones created during ThinLTO backend");
66 FunctionCloneDuplicatesThinBackend,
67 "Number of function clone duplicates detected during ThinLTO backend");
68STATISTIC(AllocTypeNotCold,
"Number of not cold static allocations (possibly "
69 "cloned) during whole program analysis");
70STATISTIC(AllocTypeCold,
"Number of cold static allocations (possibly cloned) "
71 "during whole program analysis");
73 "Number of not cold static allocations (possibly cloned) during "
75STATISTIC(AllocTypeColdThinBackend,
"Number of cold static allocations "
76 "(possibly cloned) during ThinLTO backend");
78 "Number of original (not cloned) allocations with memprof profiles "
79 "during ThinLTO backend");
81 AllocVersionsThinBackend,
82 "Number of allocation versions (including clones) during ThinLTO backend");
84 "Maximum number of allocation versions created for an original "
85 "allocation during ThinLTO backend");
87 "Number of unclonable ambigous allocations during ThinLTO backend");
89 "Number of edges removed due to mismatched callees (profiled vs IR)");
91 "Number of profiled callees found via tail calls");
93 "Aggregate depth of profiled callees found via tail calls");
95 "Maximum depth of profiled callees found via tail calls");
97 "Number of profiled callees found via multiple tail call chains");
98STATISTIC(DeferredBackedges,
"Number of backedges with deferred cloning");
99STATISTIC(NewMergedNodes,
"Number of new nodes created during merging");
100STATISTIC(NonNewMergedNodes,
"Number of non new nodes used during merging");
102 "Number of missing alloc nodes for context ids");
104 "Number of calls skipped during cloning due to unexpected operand");
106 "Number of callsites assigned to call multiple non-matching clones");
107STATISTIC(TotalMergeInvokes,
"Number of merge invocations for nodes");
108STATISTIC(TotalMergeIters,
"Number of merge iterations for nodes");
109STATISTIC(MaxMergeIters,
"Max merge iterations for nodes");
110STATISTIC(NumImportantContextIds,
"Number of important context ids");
111STATISTIC(NumFixupEdgeIdsInserted,
"Number of fixup edge ids inserted");
112STATISTIC(NumFixupEdgesAdded,
"Number of fixup edges added");
113STATISTIC(NumFixedContexts,
"Number of contexts with fixed edges");
115 "Number of aliasees prevailing in a different module than its alias");
120 cl::desc(
"Specify the path prefix of the MemProf dot files."));
124 cl::desc(
"Export graph to dot files."));
129 cl::desc(
"Iteratively apply merging on a node to catch new callers"));
139 "memprof-dot-scope",
cl::desc(
"Scope of graph to export to dot"),
144 "Export only nodes with contexts feeding given "
145 "-memprof-dot-alloc-id"),
147 "Export only nodes with given -memprof-dot-context-id")));
151 cl::desc(
"Id of alloc to export if -memprof-dot-scope=alloc "
152 "or to highlight if -memprof-dot-scope=all"));
156 cl::desc(
"Id of context to export if -memprof-dot-scope=context or to "
157 "highlight otherwise"));
161 cl::desc(
"Dump CallingContextGraph to stdout after each stage."));
165 cl::desc(
"Perform verification checks on CallingContextGraph."));
169 cl::desc(
"Perform frequent verification checks on nodes."));
172 "memprof-import-summary",
173 cl::desc(
"Import summary to use for testing the ThinLTO backend via opt"),
179 cl::desc(
"Max depth to recursively search for missing "
180 "frames through tail calls."));
185 cl::desc(
"Allow cloning of callsites involved in recursive cycles"));
189 cl::desc(
"Allow cloning of contexts through recursive cycles"));
196 cl::desc(
"Merge clones before assigning functions"));
205 cl::desc(
"Allow cloning of contexts having recursive cycles"));
211 cl::desc(
"Minimum absolute count for promoted target to be inlinable"));
222 cl::desc(
"Linking with hot/cold operator new interfaces"));
227 "Require target function definition when promoting indirect calls"));
234 cl::desc(
"Number of largest cold contexts to consider important"));
238 cl::desc(
"Enables edge fixup for important contexts"));
260template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
261class CallsiteContextGraph {
263 CallsiteContextGraph() =
default;
264 CallsiteContextGraph(
const CallsiteContextGraph &) =
default;
265 CallsiteContextGraph(CallsiteContextGraph &&) =
default;
269 EmitRemark =
nullptr);
273 void identifyClones();
280 bool assignFunctions();
286 EmitRemark =
nullptr)
const;
289 const CallsiteContextGraph &CCG) {
295 const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>;
297 const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>;
299 void exportToDot(std::string Label)
const;
302 struct FuncInfo final
303 :
public std::pair<FuncTy *, unsigned > {
304 using Base = std::pair<FuncTy *, unsigned>;
306 FuncInfo(FuncTy *
F =
nullptr,
unsigned CloneNo = 0) :
Base(
F, CloneNo) {}
307 explicit operator bool()
const {
return this->first !=
nullptr; }
308 FuncTy *func()
const {
return this->first; }
309 unsigned cloneNo()
const {
return this->second; }
313 struct CallInfo final :
public std::pair<CallTy, unsigned > {
314 using Base = std::pair<CallTy, unsigned>;
316 CallInfo(CallTy
Call =
nullptr,
unsigned CloneNo = 0)
318 explicit operator bool()
const {
return (
bool)this->first; }
319 CallTy call()
const {
return this->first; }
320 unsigned cloneNo()
const {
return this->second; }
321 void setCloneNo(
unsigned N) { this->second =
N; }
323 if (!
operator bool()) {
329 OS <<
"\t(clone " << cloneNo() <<
")";
355 bool Recursive =
false;
382 std::vector<std::shared_ptr<ContextEdge>> CalleeEdges;
386 std::vector<std::shared_ptr<ContextEdge>> CallerEdges;
390 bool useCallerEdgesForContextInfo()
const {
395 assert(!CalleeEdges.empty() || CallerEdges.empty() || IsAllocation ||
413 for (
auto &Edge : CalleeEdges.empty() ? CallerEdges : CalleeEdges)
414 Count += Edge->getContextIds().size();
418 CalleeEdges, useCallerEdgesForContextInfo()
420 : std::vector<std::shared_ptr<ContextEdge>>());
421 for (
const auto &Edge : Edges)
428 uint8_t computeAllocType()
const {
433 CalleeEdges, useCallerEdgesForContextInfo()
435 : std::vector<std::shared_ptr<ContextEdge>>());
436 for (
const auto &Edge : Edges) {
447 bool emptyContextIds()
const {
449 CalleeEdges, useCallerEdgesForContextInfo()
451 : std::vector<std::shared_ptr<ContextEdge>>());
452 for (
const auto &Edge : Edges) {
453 if (!Edge->getContextIds().empty())
460 std::vector<ContextNode *> Clones;
463 ContextNode *CloneOf =
nullptr;
465 ContextNode(
bool IsAllocation) : IsAllocation(IsAllocation),
Call() {}
467 ContextNode(
bool IsAllocation, CallInfo
C)
468 : IsAllocation(IsAllocation),
Call(
C) {}
470 void addClone(ContextNode *Clone) {
472 CloneOf->Clones.push_back(Clone);
473 Clone->CloneOf = CloneOf;
475 Clones.push_back(Clone);
477 Clone->CloneOf =
this;
481 ContextNode *getOrigNode() {
488 unsigned int ContextId);
490 ContextEdge *findEdgeFromCallee(
const ContextNode *Callee);
491 ContextEdge *findEdgeFromCaller(
const ContextNode *Caller);
492 void eraseCalleeEdge(
const ContextEdge *Edge);
493 void eraseCallerEdge(
const ContextEdge *Edge);
495 void setCall(CallInfo
C) {
Call = std::move(
C); }
497 bool hasCall()
const {
return (
bool)
Call.call(); }
503 bool isRemoved()
const {
539 bool IsBackedge =
false;
546 : Callee(Callee), Caller(Caller), AllocTypes(
AllocType),
547 ContextIds(std::move(ContextIds)) {}
553 inline void clear() {
563 inline bool isRemoved()
const {
564 if (Callee || Caller)
585 void removeNoneTypeCalleeEdges(ContextNode *
Node);
586 void removeNoneTypeCallerEdges(ContextNode *
Node);
588 recursivelyRemoveNoneTypeCalleeEdges(ContextNode *
Node,
594 template <
class NodeT,
class IteratorT>
595 std::vector<uint64_t>
600 ContextNode *addAllocNode(CallInfo
Call,
const FuncTy *
F);
603 template <
class NodeT,
class IteratorT>
604 void addStackNodesForMIB(
608 std::map<uint64_t, uint32_t> &TotalSizeToContextIdTopNCold);
613 void updateStackNodes();
622 void fixupImportantContexts();
626 void handleCallsitesWithMultipleTargets();
629 void markBackedges();
639 bool partitionCallsByCallee(
641 std::vector<std::pair<CallInfo, ContextNode *>> &NewCallToNode);
648 std::map<const ContextNode *, const FuncTy *> NodeToCallingFunc;
655 using EdgeIter =
typename std::vector<std::shared_ptr<ContextEdge>>
::iterator;
660 struct CallContextInfo {
664 std::vector<uint64_t> StackIds;
678 void removeEdgeFromGraph(ContextEdge *Edge, EdgeIter *EI =
nullptr,
679 bool CalleeIter =
true);
687 void assignStackNodesPostOrder(
701 void propagateDuplicateContextIds(
707 void connectNewNode(ContextNode *NewNode, ContextNode *OrigNode,
715 return static_cast<const DerivedCCG *
>(
this)->getStackId(IdOrIndex);
725 calleesMatch(CallTy
Call, EdgeIter &EI,
730 const FuncTy *getCalleeFunc(CallTy
Call) {
731 return static_cast<DerivedCCG *
>(
this)->getCalleeFunc(
Call);
737 bool calleeMatchesFunc(
738 CallTy
Call,
const FuncTy *Func,
const FuncTy *CallerFunc,
739 std::vector<std::pair<CallTy, FuncTy *>> &FoundCalleeChain) {
740 return static_cast<DerivedCCG *
>(
this)->calleeMatchesFunc(
741 Call, Func, CallerFunc, FoundCalleeChain);
745 bool sameCallee(CallTy Call1, CallTy Call2) {
746 return static_cast<DerivedCCG *
>(
this)->sameCallee(Call1, Call2);
751 std::vector<uint64_t> getStackIdsWithContextNodesForCall(CallTy
Call) {
752 return static_cast<DerivedCCG *
>(
this)->getStackIdsWithContextNodesForCall(
758 return static_cast<DerivedCCG *
>(
this)->getLastStackId(
Call);
764 static_cast<DerivedCCG *
>(
this)->updateAllocationCall(
Call,
AllocType);
769 return static_cast<const DerivedCCG *
>(
this)->getAllocationCallType(
Call);
774 void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc) {
775 static_cast<DerivedCCG *
>(
this)->updateCall(CallerCall, CalleeFunc);
781 FuncInfo cloneFunctionForCallsite(
783 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
784 return static_cast<DerivedCCG *
>(
this)->cloneFunctionForCallsite(
785 Func,
Call, CallMap, CallsWithMetadataInFunc, CloneNo);
790 std::string getLabel(
const FuncTy *Func,
const CallTy
Call,
791 unsigned CloneNo)
const {
792 return static_cast<const DerivedCCG *
>(
this)->getLabel(Func,
Call, CloneNo);
796 ContextNode *createNewNode(
bool IsAllocation,
const FuncTy *
F =
nullptr,
797 CallInfo
C = CallInfo()) {
798 NodeOwner.push_back(std::make_unique<ContextNode>(IsAllocation,
C));
799 auto *NewNode = NodeOwner.back().get();
801 NodeToCallingFunc[NewNode] =
F;
802 NewNode->NodeId = NodeOwner.size();
807 ContextNode *getNodeForInst(
const CallInfo &
C);
808 ContextNode *getNodeForAlloc(
const CallInfo &
C);
809 ContextNode *getNodeForStackId(
uint64_t StackId);
831 moveEdgeToNewCalleeClone(
const std::shared_ptr<ContextEdge> &Edge,
838 void moveEdgeToExistingCalleeClone(
const std::shared_ptr<ContextEdge> &Edge,
839 ContextNode *NewCallee,
840 bool NewClone =
false,
848 void moveCalleeEdgeToNewCaller(
const std::shared_ptr<ContextEdge> &Edge,
849 ContextNode *NewCaller);
860 void mergeNodeCalleeClones(
865 void findOtherCallersToShareMerge(
866 ContextNode *
Node, std::vector<std::shared_ptr<ContextEdge>> &CalleeEdges,
894 struct ImportantContextInfo {
896 std::vector<uint64_t> StackIds;
899 unsigned MaxLength = 0;
903 std::map<std::vector<uint64_t>, ContextNode *> StackIdsToNode;
912 void recordStackNode(std::vector<uint64_t> &StackIds, ContextNode *
Node,
926 auto Size = StackIds.size();
927 for (
auto Id : Ids) {
928 auto &Entry = ImportantContextIdInfo[Id];
929 Entry.StackIdsToNode[StackIds] =
Node;
931 if (
Size > Entry.MaxLength)
932 Entry.MaxLength =
Size;
941 std::vector<std::unique_ptr<ContextNode>> NodeOwner;
947 unsigned int LastContextId = 0;
950template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
952 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode;
953template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
955 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge;
956template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
958 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::FuncInfo;
959template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
961 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::CallInfo;
964class ModuleCallsiteContextGraph
965 :
public CallsiteContextGraph<ModuleCallsiteContextGraph, Function,
968 ModuleCallsiteContextGraph(
970 llvm::function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter);
973 friend CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
976 uint64_t getStackId(uint64_t IdOrIndex)
const;
978 bool calleeMatchesFunc(
979 Instruction *
Call,
const Function *Func,
const Function *CallerFunc,
980 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain);
981 bool sameCallee(Instruction *Call1, Instruction *Call2);
982 bool findProfiledCalleeThroughTailCalls(
983 const Function *ProfiledCallee,
Value *CurCallee,
unsigned Depth,
984 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain,
985 bool &FoundMultipleCalleeChains);
986 uint64_t getLastStackId(Instruction *
Call);
987 std::vector<uint64_t> getStackIdsWithContextNodesForCall(Instruction *
Call);
990 void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc);
991 CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
993 cloneFunctionForCallsite(FuncInfo &Func, CallInfo &
Call,
994 DenseMap<CallInfo, CallInfo> &CallMap,
995 std::vector<CallInfo> &CallsWithMetadataInFunc,
997 std::string getLabel(
const Function *Func,
const Instruction *
Call,
998 unsigned CloneNo)
const;
1001 llvm::function_ref<OptimizationRemarkEmitter &(
Function *)> OREGetter;
1007struct IndexCall :
public PointerUnion<CallsiteInfo *, AllocInfo *> {
1008 IndexCall() : PointerUnion() {}
1009 IndexCall(std::nullptr_t) : IndexCall() {}
1010 IndexCall(CallsiteInfo *StackNode) : PointerUnion(StackNode) {}
1011 IndexCall(AllocInfo *AllocNode) : PointerUnion(AllocNode) {}
1012 IndexCall(PointerUnion PT) : PointerUnion(PT) {}
1014 IndexCall *operator->() {
return this; }
1016 void print(raw_ostream &OS)
const {
1017 PointerUnion<CallsiteInfo *, AllocInfo *>
Base = *
this;
1042class IndexCallsiteContextGraph
1043 :
public CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
1046 IndexCallsiteContextGraph(
1047 ModuleSummaryIndex &Index,
1051 ~IndexCallsiteContextGraph() {
1056 for (
auto &
I : FunctionCalleesToSynthesizedCallsiteInfos) {
1058 for (
auto &Callsite :
I.second)
1059 FS->addCallsite(*Callsite.second);
1064 friend CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
1067 uint64_t getStackId(uint64_t IdOrIndex)
const;
1068 const FunctionSummary *getCalleeFunc(IndexCall &
Call);
1069 bool calleeMatchesFunc(
1070 IndexCall &
Call,
const FunctionSummary *Func,
1071 const FunctionSummary *CallerFunc,
1072 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain);
1073 bool sameCallee(IndexCall &Call1, IndexCall &Call2);
1074 bool findProfiledCalleeThroughTailCalls(
1075 ValueInfo ProfiledCallee, ValueInfo CurCallee,
unsigned Depth,
1076 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain,
1077 bool &FoundMultipleCalleeChains);
1078 uint64_t getLastStackId(IndexCall &
Call);
1079 std::vector<uint64_t> getStackIdsWithContextNodesForCall(IndexCall &
Call);
1082 void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc);
1083 CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
1084 IndexCall>::FuncInfo
1085 cloneFunctionForCallsite(FuncInfo &Func, CallInfo &
Call,
1086 DenseMap<CallInfo, CallInfo> &CallMap,
1087 std::vector<CallInfo> &CallsWithMetadataInFunc,
1089 std::string getLabel(
const FunctionSummary *Func,
const IndexCall &
Call,
1090 unsigned CloneNo)
const;
1091 DenseSet<GlobalValue::GUID> findAliaseeGUIDsPrevailingInDifferentModule();
1095 std::map<const FunctionSummary *, ValueInfo> FSToVIMap;
1097 const ModuleSummaryIndex &Index;
1105 std::unordered_map<FunctionSummary *,
1106 std::map<ValueInfo, std::unique_ptr<CallsiteInfo>>>
1107 FunctionCalleesToSynthesizedCallsiteInfos;
1118 :
public DenseMapInfo<std::pair<IndexCall, unsigned>> {};
1121 :
public DenseMapInfo<PointerUnion<CallsiteInfo *, AllocInfo *>> {};
1142template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1143bool allocTypesMatch(
1144 const std::vector<uint8_t> &InAllocTypes,
1145 const std::vector<std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>>
1149 assert(InAllocTypes.size() == Edges.size());
1151 InAllocTypes.begin(), InAllocTypes.end(), Edges.begin(), Edges.end(),
1153 const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &r) {
1157 if (l == (uint8_t)AllocationType::None ||
1158 r->AllocTypes == (uint8_t)AllocationType::None)
1160 return allocTypeToUse(l) == allocTypeToUse(r->AllocTypes);
1169template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1170bool allocTypesMatchClone(
1171 const std::vector<uint8_t> &InAllocTypes,
1172 const ContextNode<DerivedCCG, FuncTy, CallTy> *Clone) {
1173 const ContextNode<DerivedCCG, FuncTy, CallTy> *
Node = Clone->CloneOf;
1177 assert(InAllocTypes.size() ==
Node->CalleeEdges.size());
1181 for (
const auto &
E : Clone->CalleeEdges) {
1183 EdgeCalleeMap[
E->Callee] =
E->AllocTypes;
1187 for (
unsigned I = 0;
I <
Node->CalleeEdges.size();
I++) {
1188 auto Iter = EdgeCalleeMap.
find(
Node->CalleeEdges[
I]->Callee);
1190 if (Iter == EdgeCalleeMap.
end())
1198 if (allocTypeToUse(Iter->second) != allocTypeToUse(InAllocTypes[
I]))
1206template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1207typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1208CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForInst(
1209 const CallInfo &
C) {
1210 ContextNode *
Node = getNodeForAlloc(
C);
1214 return NonAllocationCallToContextNodeMap.lookup(
C);
1217template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1218typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1219CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForAlloc(
1220 const CallInfo &
C) {
1221 return AllocationCallToContextNodeMap.lookup(
C);
1224template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1225typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1226CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForStackId(
1228 auto StackEntryNode = StackEntryIdToContextNodeMap.find(StackId);
1229 if (StackEntryNode != StackEntryIdToContextNodeMap.end())
1230 return StackEntryNode->second;
1234template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1235void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1237 unsigned int ContextId) {
1238 for (
auto &
Edge : CallerEdges) {
1239 if (
Edge->Caller == Caller) {
1241 Edge->getContextIds().insert(ContextId);
1245 std::shared_ptr<ContextEdge>
Edge = std::make_shared<ContextEdge>(
1246 this, Caller, (uint8_t)
AllocType, DenseSet<uint32_t>({ContextId}));
1247 CallerEdges.push_back(
Edge);
1251template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1252void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::removeEdgeFromGraph(
1253 ContextEdge *
Edge, EdgeIter *EI,
bool CalleeIter) {
1269 auto CalleeCallerCount =
Callee->CallerEdges.size();
1270 auto CallerCalleeCount =
Caller->CalleeEdges.size();
1275 }
else if (CalleeIter) {
1277 *EI =
Caller->CalleeEdges.erase(*EI);
1280 *EI =
Callee->CallerEdges.erase(*EI);
1282 assert(
Callee->CallerEdges.size() < CalleeCallerCount);
1283 assert(
Caller->CalleeEdges.size() < CallerCalleeCount);
1286template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1287void CallsiteContextGraph<
1288 DerivedCCG, FuncTy, CallTy>::removeNoneTypeCalleeEdges(ContextNode *Node) {
1289 for (
auto EI =
Node->CalleeEdges.begin(); EI !=
Node->CalleeEdges.end();) {
1291 if (
Edge->AllocTypes == (uint8_t)AllocationType::None) {
1293 removeEdgeFromGraph(
Edge.get(), &EI,
true);
1299template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1300void CallsiteContextGraph<
1301 DerivedCCG, FuncTy, CallTy>::removeNoneTypeCallerEdges(ContextNode *Node) {
1302 for (
auto EI =
Node->CallerEdges.begin(); EI !=
Node->CallerEdges.end();) {
1304 if (
Edge->AllocTypes == (uint8_t)AllocationType::None) {
1306 Edge->Caller->eraseCalleeEdge(
Edge.get());
1307 EI =
Node->CallerEdges.erase(EI);
1313template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1314typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge *
1315CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1316 findEdgeFromCallee(
const ContextNode *Callee) {
1317 for (
const auto &
Edge : CalleeEdges)
1318 if (
Edge->Callee == Callee)
1323template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1324typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge *
1325CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1326 findEdgeFromCaller(
const ContextNode *Caller) {
1327 for (
const auto &
Edge : CallerEdges)
1328 if (
Edge->Caller == Caller)
1333template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1334void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1335 eraseCalleeEdge(
const ContextEdge *
Edge) {
1337 CalleeEdges, [
Edge](
const std::shared_ptr<ContextEdge> &CalleeEdge) {
1338 return CalleeEdge.get() ==
Edge;
1340 assert(EI != CalleeEdges.end());
1341 CalleeEdges.erase(EI);
1344template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1345void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1346 eraseCallerEdge(
const ContextEdge *
Edge) {
1348 CallerEdges, [
Edge](
const std::shared_ptr<ContextEdge> &CallerEdge) {
1349 return CallerEdge.get() ==
Edge;
1351 assert(EI != CallerEdges.end());
1352 CallerEdges.erase(EI);
1355template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1356uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::computeAllocType(
1357 DenseSet<uint32_t> &ContextIds)
const {
1359 (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
1360 uint8_t
AllocType = (uint8_t)AllocationType::None;
1361 for (
auto Id : ContextIds) {
1362 AllocType |= (uint8_t)ContextIdToAllocationType.at(Id);
1370template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1372CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypesImpl(
1373 const DenseSet<uint32_t> &Node1Ids,
1374 const DenseSet<uint32_t> &Node2Ids)
const {
1376 (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
1377 uint8_t
AllocType = (uint8_t)AllocationType::None;
1378 for (
auto Id : Node1Ids) {
1379 if (!Node2Ids.
count(Id))
1381 AllocType |= (uint8_t)ContextIdToAllocationType.at(Id);
1389template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1390uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypes(
1391 const DenseSet<uint32_t> &Node1Ids,
1392 const DenseSet<uint32_t> &Node2Ids)
const {
1393 if (Node1Ids.
size() < Node2Ids.
size())
1394 return intersectAllocTypesImpl(Node1Ids, Node2Ids);
1396 return intersectAllocTypesImpl(Node2Ids, Node1Ids);
1399template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1400typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1401CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addAllocNode(
1402 CallInfo
Call,
const FuncTy *
F) {
1404 ContextNode *AllocNode = createNewNode(
true,
F,
Call);
1405 AllocationCallToContextNodeMap[
Call] = AllocNode;
1407 AllocNode->OrigStackOrAllocId = LastContextId;
1410 AllocNode->AllocTypes = (uint8_t)AllocationType::None;
1426template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1427template <
class NodeT,
class IteratorT>
1428void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addStackNodesForMIB(
1429 ContextNode *AllocNode, CallStack<NodeT, IteratorT> &StackContext,
1432 std::map<uint64_t, uint32_t> &TotalSizeToContextIdTopNCold) {
1438 ContextIdToAllocationType[++LastContextId] =
AllocType;
1440 bool IsImportant =
false;
1441 if (!ContextSizeInfo.
empty()) {
1442 auto &
Entry = ContextIdToContextSizeInfos[LastContextId];
1446 uint64_t TotalCold = 0;
1447 for (
auto &CSI : ContextSizeInfo)
1448 TotalCold += CSI.TotalSize;
1454 TotalCold > TotalSizeToContextIdTopNCold.begin()->first) {
1457 auto IdToRemove = TotalSizeToContextIdTopNCold.begin()->second;
1458 TotalSizeToContextIdTopNCold.erase(
1459 TotalSizeToContextIdTopNCold.begin());
1460 assert(ImportantContextIdInfo.count(IdToRemove));
1461 ImportantContextIdInfo.erase(IdToRemove);
1463 TotalSizeToContextIdTopNCold[TotalCold] = LastContextId;
1467 Entry.insert(
Entry.begin(), ContextSizeInfo.begin(), ContextSizeInfo.end());
1471 AllocNode->AllocTypes |= (uint8_t)
AllocType;
1476 ContextNode *PrevNode = AllocNode;
1480 SmallSet<uint64_t, 8> StackIdSet;
1483 ContextIter != StackContext.
end(); ++ContextIter) {
1484 auto StackId = getStackId(*ContextIter);
1486 ImportantContextIdInfo[LastContextId].StackIds.push_back(StackId);
1487 ContextNode *StackNode = getNodeForStackId(StackId);
1489 StackNode = createNewNode(
false);
1490 StackEntryIdToContextNodeMap[StackId] = StackNode;
1491 StackNode->OrigStackOrAllocId = StackId;
1496 auto Ins = StackIdSet.
insert(StackId);
1498 StackNode->Recursive =
true;
1500 StackNode->AllocTypes |= (uint8_t)
AllocType;
1501 PrevNode->addOrUpdateCallerEdge(StackNode,
AllocType, LastContextId);
1502 PrevNode = StackNode;
1506template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1508CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::duplicateContextIds(
1509 const DenseSet<uint32_t> &StackSequenceContextIds,
1510 DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds) {
1511 DenseSet<uint32_t> NewContextIds;
1512 for (
auto OldId : StackSequenceContextIds) {
1513 NewContextIds.
insert(++LastContextId);
1514 OldToNewContextIds[OldId].insert(LastContextId);
1515 assert(ContextIdToAllocationType.count(OldId));
1517 ContextIdToAllocationType[LastContextId] = ContextIdToAllocationType[OldId];
1518 auto CSI = ContextIdToContextSizeInfos.find(OldId);
1519 if (CSI != ContextIdToContextSizeInfos.end())
1520 ContextIdToContextSizeInfos[LastContextId] = CSI->second;
1521 if (DotAllocContextIds.
contains(OldId))
1522 DotAllocContextIds.
insert(LastContextId);
1524 return NewContextIds;
1527template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1528void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
1529 propagateDuplicateContextIds(
1530 const DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds) {
1532 auto GetNewIds = [&OldToNewContextIds](
const DenseSet<uint32_t> &ContextIds) {
1533 DenseSet<uint32_t> NewIds;
1534 for (
auto Id : ContextIds)
1535 if (
auto NewId = OldToNewContextIds.find(Id);
1536 NewId != OldToNewContextIds.end())
1542 auto UpdateCallers = [&](ContextNode *
Node,
1543 DenseSet<const ContextEdge *> &Visited,
1544 auto &&UpdateCallers) ->
void {
1545 for (
const auto &
Edge :
Node->CallerEdges) {
1549 ContextNode *NextNode =
Edge->Caller;
1550 DenseSet<uint32_t> NewIdsToAdd = GetNewIds(
Edge->getContextIds());
1553 if (!NewIdsToAdd.
empty()) {
1554 Edge->getContextIds().insert_range(NewIdsToAdd);
1555 UpdateCallers(NextNode, Visited, UpdateCallers);
1560 DenseSet<const ContextEdge *> Visited;
1561 for (
auto &Entry : AllocationCallToContextNodeMap) {
1563 UpdateCallers(Node, Visited, UpdateCallers);
1567template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1568void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::connectNewNode(
1569 ContextNode *NewNode, ContextNode *OrigNode,
bool TowardsCallee,
1572 DenseSet<uint32_t> RemainingContextIds) {
1574 TowardsCallee ? OrigNode->CalleeEdges : OrigNode->CallerEdges;
1575 DenseSet<uint32_t> RecursiveContextIds;
1576 DenseSet<uint32_t> AllCallerContextIds;
1581 for (
auto &CE : OrigEdges) {
1582 AllCallerContextIds.
reserve(
CE->getContextIds().size());
1583 for (
auto Id :
CE->getContextIds())
1584 if (!AllCallerContextIds.
insert(Id).second)
1585 RecursiveContextIds.
insert(Id);
1589 for (
auto EI = OrigEdges.begin(); EI != OrigEdges.end();) {
1591 DenseSet<uint32_t> NewEdgeContextIds;
1592 DenseSet<uint32_t> NotFoundContextIds;
1596 set_subtract(
Edge->getContextIds(), RemainingContextIds, NewEdgeContextIds,
1597 NotFoundContextIds);
1600 if (RecursiveContextIds.
empty()) {
1603 RemainingContextIds.
swap(NotFoundContextIds);
1613 DenseSet<uint32_t> NonRecursiveRemainingCurEdgeIds =
1615 set_subtract(RemainingContextIds, NonRecursiveRemainingCurEdgeIds);
1618 if (NewEdgeContextIds.
empty()) {
1622 if (TowardsCallee) {
1623 uint8_t NewAllocType = computeAllocType(NewEdgeContextIds);
1624 auto NewEdge = std::make_shared<ContextEdge>(
1625 Edge->Callee, NewNode, NewAllocType, std::move(NewEdgeContextIds));
1626 NewNode->CalleeEdges.push_back(NewEdge);
1627 NewEdge->Callee->CallerEdges.push_back(NewEdge);
1629 uint8_t NewAllocType = computeAllocType(NewEdgeContextIds);
1630 auto NewEdge = std::make_shared<ContextEdge>(
1631 NewNode,
Edge->Caller, NewAllocType, std::move(NewEdgeContextIds));
1632 NewNode->CallerEdges.push_back(NewEdge);
1633 NewEdge->Caller->CalleeEdges.push_back(NewEdge);
1636 if (
Edge->getContextIds().empty()) {
1637 removeEdgeFromGraph(
Edge.get(), &EI, TowardsCallee);
1644template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1646 const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &Edge) {
1650 assert(!Edge->ContextIds.empty());
1653template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1655 bool CheckEdges =
true) {
1656 if (
Node->isRemoved())
1660 auto NodeContextIds =
Node->getContextIds();
1664 if (
Node->CallerEdges.size()) {
1666 Node->CallerEdges.front()->ContextIds);
1670 set_union(CallerEdgeContextIds, Edge->ContextIds);
1677 NodeContextIds == CallerEdgeContextIds ||
1680 if (
Node->CalleeEdges.size()) {
1682 Node->CalleeEdges.front()->ContextIds);
1686 set_union(CalleeEdgeContextIds, Edge->getContextIds());
1692 NodeContextIds == CalleeEdgeContextIds);
1701 for (
const auto &
E :
Node->CalleeEdges)
1707template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1708void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
1709 assignStackNodesPostOrder(ContextNode *Node,
1710 DenseSet<const ContextNode *> &Visited,
1711 DenseMap<uint64_t, std::vector<CallContextInfo>>
1712 &StackIdToMatchingCalls,
1713 DenseMap<CallInfo, CallInfo> &CallToMatchingCall,
1714 const DenseSet<uint32_t> &ImportantContextIds) {
1722 auto CallerEdges =
Node->CallerEdges;
1723 for (
auto &
Edge : CallerEdges) {
1725 if (
Edge->isRemoved()) {
1729 assignStackNodesPostOrder(
Edge->Caller, Visited, StackIdToMatchingCalls,
1730 CallToMatchingCall, ImportantContextIds);
1739 if (
Node->IsAllocation ||
1740 !StackIdToMatchingCalls.count(
Node->OrigStackOrAllocId))
1743 auto &Calls = StackIdToMatchingCalls[
Node->OrigStackOrAllocId];
1747 if (Calls.size() == 1) {
1748 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[0];
1749 if (Ids.size() == 1) {
1750 assert(SavedContextIds.empty());
1752 assert(Node == getNodeForStackId(Ids[0]));
1753 if (
Node->Recursive)
1756 NonAllocationCallToContextNodeMap[
Call] =
Node;
1758 recordStackNode(Ids, Node,
Node->getContextIds(), ImportantContextIds);
1766 uint64_t LastId =
Node->OrigStackOrAllocId;
1767 ContextNode *LastNode = getNodeForStackId(LastId);
1770 assert(LastNode == Node);
1772 ContextNode *LastNode =
Node;
1777 DenseSet<uint32_t> LastNodeContextIds = LastNode->getContextIds();
1779 [[maybe_unused]]
bool PrevIterCreatedNode =
false;
1780 bool CreatedNode =
false;
1781 for (
unsigned I = 0;
I < Calls.size();
1782 I++, PrevIterCreatedNode = CreatedNode) {
1783 CreatedNode =
false;
1784 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[
I];
1787 if (SavedContextIds.empty()) {
1794 auto MatchingCall = CallToMatchingCall[
Call];
1795 if (!NonAllocationCallToContextNodeMap.contains(MatchingCall)) {
1799 assert(
I > 0 && !PrevIterCreatedNode);
1802 NonAllocationCallToContextNodeMap[MatchingCall]->MatchingCalls.push_back(
1807 assert(LastId == Ids.back());
1816 ContextNode *PrevNode = LastNode;
1820 for (
auto IdIter = Ids.rbegin() + 1; IdIter != Ids.rend(); IdIter++) {
1822 ContextNode *CurNode = getNodeForStackId(Id);
1826 assert(!CurNode->Recursive);
1828 auto *
Edge = CurNode->findEdgeFromCaller(PrevNode);
1840 if (SavedContextIds.empty()) {
1849 ContextNode *NewNode = createNewNode(
false, Func,
Call);
1850 NonAllocationCallToContextNodeMap[
Call] = NewNode;
1852 NewNode->AllocTypes = computeAllocType(SavedContextIds);
1854 ContextNode *FirstNode = getNodeForStackId(Ids[0]);
1860 connectNewNode(NewNode, FirstNode,
true, SavedContextIds);
1865 connectNewNode(NewNode, LastNode,
false, SavedContextIds);
1870 for (
auto Id : Ids) {
1871 ContextNode *CurNode = getNodeForStackId(Id);
1878 auto *PrevEdge = CurNode->findEdgeFromCallee(PrevNode);
1885 set_subtract(PrevEdge->getContextIds(), SavedContextIds);
1886 if (PrevEdge->getContextIds().empty())
1887 removeEdgeFromGraph(PrevEdge);
1892 CurNode->AllocTypes = CurNode->CalleeEdges.empty()
1893 ? (uint8_t)AllocationType::None
1894 : CurNode->computeAllocType();
1898 recordStackNode(Ids, NewNode, SavedContextIds, ImportantContextIds);
1902 for (
auto Id : Ids) {
1903 ContextNode *CurNode = getNodeForStackId(Id);
1912template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1913void CallsiteContextGraph<DerivedCCG, FuncTy,
1914 CallTy>::fixupImportantContexts() {
1915 if (ImportantContextIdInfo.empty())
1919 NumImportantContextIds = ImportantContextIdInfo.size();
1925 exportToDot(
"beforestackfixup");
1950 for (
auto &[CurContextId, Info] : ImportantContextIdInfo) {
1951 if (
Info.StackIdsToNode.empty())
1954 ContextNode *PrevNode =
nullptr;
1955 ContextNode *CurNode =
nullptr;
1956 DenseSet<const ContextEdge *> VisitedEdges;
1957 ArrayRef<uint64_t> AllStackIds(
Info.StackIds);
1960 for (
unsigned I = 0;
I < AllStackIds.size();
I++, PrevNode = CurNode) {
1964 auto LenToEnd = AllStackIds.size() -
I;
1972 auto CheckStackIds = AllStackIds.slice(
I, Len);
1973 auto EntryIt =
Info.StackIdsToNode.find(CheckStackIds);
1974 if (EntryIt ==
Info.StackIdsToNode.end())
1976 CurNode = EntryIt->second;
1993 auto *CurEdge = PrevNode->findEdgeFromCaller(CurNode);
1996 if (CurEdge->getContextIds().insert(CurContextId).second) {
1997 NumFixupEdgeIdsInserted++;
2002 NumFixupEdgesAdded++;
2003 DenseSet<uint32_t> ContextIds({CurContextId});
2004 auto AllocType = computeAllocType(ContextIds);
2005 auto NewEdge = std::make_shared<ContextEdge>(
2006 PrevNode, CurNode,
AllocType, std::move(ContextIds));
2007 PrevNode->CallerEdges.push_back(NewEdge);
2008 CurNode->CalleeEdges.push_back(NewEdge);
2010 CurEdge = NewEdge.get();
2013 VisitedEdges.
insert(CurEdge);
2016 for (
auto &
Edge : PrevNode->CallerEdges) {
2020 Edge->getContextIds().erase(CurContextId);
2028template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2029void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
2037 DenseMap<uint64_t, std::vector<CallContextInfo>> StackIdToMatchingCalls;
2038 for (
auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) {
2039 for (
auto &
Call : CallsWithMetadata) {
2041 if (AllocationCallToContextNodeMap.count(
Call))
2043 auto StackIdsWithContextNodes =
2044 getStackIdsWithContextNodesForCall(
Call.call());
2047 if (StackIdsWithContextNodes.empty())
2051 StackIdToMatchingCalls[StackIdsWithContextNodes.back()].push_back(
2052 {
Call.call(), StackIdsWithContextNodes,
Func, {}});
2062 DenseMap<uint32_t, DenseSet<uint32_t>> OldToNewContextIds;
2066 DenseMap<CallInfo, CallInfo> CallToMatchingCall;
2067 for (
auto &It : StackIdToMatchingCalls) {
2068 auto &Calls = It.getSecond();
2070 if (Calls.size() == 1) {
2071 auto &Ids = Calls[0].StackIds;
2072 if (Ids.size() == 1)
2085 DenseMap<const FuncTy *, unsigned> FuncToIndex;
2086 for (
const auto &[Idx, CallCtxInfo] :
enumerate(Calls))
2087 FuncToIndex.
insert({CallCtxInfo.Func, Idx});
2090 [&FuncToIndex](
const CallContextInfo &
A,
const CallContextInfo &
B) {
2091 return A.StackIds.size() >
B.StackIds.size() ||
2092 (
A.StackIds.size() ==
B.StackIds.size() &&
2093 (
A.StackIds <
B.StackIds ||
2094 (
A.StackIds ==
B.StackIds &&
2095 FuncToIndex[
A.Func] < FuncToIndex[
B.Func])));
2101 uint64_t LastId = It.getFirst();
2102 ContextNode *LastNode = getNodeForStackId(LastId);
2106 if (LastNode->Recursive)
2111 DenseSet<uint32_t> LastNodeContextIds = LastNode->getContextIds();
2119 DenseSet<const FuncTy *> MatchingIdsFuncSet;
2122 for (
unsigned I = 0;
I < Calls.size();
I++) {
2123 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[
I];
2124 assert(SavedContextIds.empty());
2125 assert(LastId == Ids.back());
2130 if (
I > 0 && Ids != Calls[
I - 1].StackIds)
2131 MatchingIdsFuncSet.
clear();
2138 DenseSet<uint32_t> StackSequenceContextIds = LastNodeContextIds;
2140 ContextNode *PrevNode = LastNode;
2141 ContextNode *CurNode = LastNode;
2146 for (
auto IdIter = Ids.rbegin() + 1; IdIter != Ids.rend(); IdIter++) {
2148 CurNode = getNodeForStackId(Id);
2152 if (CurNode->Recursive) {
2157 auto *
Edge = CurNode->findEdgeFromCaller(PrevNode);
2178 if (StackSequenceContextIds.
empty()) {
2191 if (Ids.back() != getLastStackId(
Call)) {
2192 for (
const auto &PE : LastNode->CallerEdges) {
2193 set_subtract(StackSequenceContextIds, PE->getContextIds());
2194 if (StackSequenceContextIds.
empty())
2198 if (StackSequenceContextIds.
empty())
2210 MatchingIdsFuncSet.
insert(Func);
2217 bool DuplicateContextIds =
false;
2218 for (
unsigned J =
I + 1; J < Calls.size(); J++) {
2219 auto &CallCtxInfo = Calls[J];
2220 auto &NextIds = CallCtxInfo.StackIds;
2223 auto *NextFunc = CallCtxInfo.Func;
2224 if (NextFunc != Func) {
2227 DuplicateContextIds =
true;
2230 auto &NextCall = CallCtxInfo.Call;
2231 CallToMatchingCall[NextCall] =
Call;
2242 OldToNewContextIds.
reserve(OldToNewContextIds.
size() +
2243 StackSequenceContextIds.
size());
2246 ? duplicateContextIds(StackSequenceContextIds, OldToNewContextIds)
2247 : StackSequenceContextIds;
2248 assert(!SavedContextIds.empty());
2250 if (!DuplicateContextIds) {
2254 set_subtract(LastNodeContextIds, StackSequenceContextIds);
2255 if (LastNodeContextIds.
empty())
2262 propagateDuplicateContextIds(OldToNewContextIds);
2272 DenseSet<const ContextNode *> Visited;
2274 ImportantContextIdInfo.keys());
2275 for (
auto &Entry : AllocationCallToContextNodeMap)
2276 assignStackNodesPostOrder(
Entry.second, Visited, StackIdToMatchingCalls,
2277 CallToMatchingCall, ImportantContextIds);
2279 fixupImportantContexts();
2285uint64_t ModuleCallsiteContextGraph::getLastStackId(Instruction *
Call) {
2286 CallStack<MDNode, MDNode::op_iterator> CallsiteContext(
2288 return CallsiteContext.
back();
2291uint64_t IndexCallsiteContextGraph::getLastStackId(IndexCall &
Call) {
2293 CallStack<CallsiteInfo, SmallVector<unsigned>::const_iterator>
2296 return Index.getStackIdAtIndex(CallsiteContext.
back());
2318 auto Pos =
F.getName().find_last_of(
'.');
2321 bool Err =
F.getName().drop_front(Pos + 1).getAsInteger(10, CloneNo);
2327std::string ModuleCallsiteContextGraph::getLabel(
const Function *Func,
2328 const Instruction *
Call,
2329 unsigned CloneNo)
const {
2335std::string IndexCallsiteContextGraph::getLabel(
const FunctionSummary *Func,
2336 const IndexCall &
Call,
2337 unsigned CloneNo)
const {
2338 auto VI = FSToVIMap.find(Func);
2339 assert(VI != FSToVIMap.end());
2342 return CallerName +
" -> alloc";
2345 return CallerName +
" -> " +
2347 Callsite->Clones[CloneNo]);
2351std::vector<uint64_t>
2352ModuleCallsiteContextGraph::getStackIdsWithContextNodesForCall(
2353 Instruction *
Call) {
2354 CallStack<MDNode, MDNode::op_iterator> CallsiteContext(
2356 return getStackIdsWithContextNodes<MDNode, MDNode::op_iterator>(
2360std::vector<uint64_t>
2361IndexCallsiteContextGraph::getStackIdsWithContextNodesForCall(IndexCall &
Call) {
2363 CallStack<CallsiteInfo, SmallVector<unsigned>::const_iterator>
2365 return getStackIdsWithContextNodes<CallsiteInfo,
2366 SmallVector<unsigned>::const_iterator>(
2370template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2371template <
class NodeT,
class IteratorT>
2372std::vector<uint64_t>
2373CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getStackIdsWithContextNodes(
2374 CallStack<NodeT, IteratorT> &CallsiteContext) {
2375 std::vector<uint64_t> StackIds;
2376 for (
auto IdOrIndex : CallsiteContext) {
2377 auto StackId = getStackId(IdOrIndex);
2378 ContextNode *
Node = getNodeForStackId(StackId);
2381 StackIds.push_back(StackId);
2386ModuleCallsiteContextGraph::ModuleCallsiteContextGraph(
2388 llvm::function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter)
2389 :
Mod(
M), OREGetter(OREGetter) {
2393 std::map<uint64_t, uint32_t> TotalSizeToContextIdTopNCold;
2395 std::vector<CallInfo> CallsWithMetadata;
2396 for (
auto &BB :
F) {
2397 for (
auto &
I : BB) {
2400 if (
auto *MemProfMD =
I.getMetadata(LLVMContext::MD_memprof)) {
2401 CallsWithMetadata.push_back(&
I);
2402 auto *AllocNode = addAllocNode(&
I, &
F);
2403 auto *CallsiteMD =
I.getMetadata(LLVMContext::MD_callsite);
2407 for (
auto &MDOp : MemProfMD->operands()) {
2409 std::vector<ContextTotalSize> ContextSizeInfo;
2411 if (MIBMD->getNumOperands() > 2) {
2412 for (
unsigned I = 2;
I < MIBMD->getNumOperands();
I++) {
2413 MDNode *ContextSizePair =
2422 ContextSizeInfo.push_back({FullStackId, TotalSize});
2428 addStackNodesForMIB<MDNode, MDNode::op_iterator>(
2429 AllocNode, StackContext, CallsiteContext,
2431 TotalSizeToContextIdTopNCold);
2436 DotAllocContextIds = AllocNode->getContextIds();
2440 I.setMetadata(LLVMContext::MD_memprof,
nullptr);
2441 I.setMetadata(LLVMContext::MD_callsite,
nullptr);
2444 else if (
I.getMetadata(LLVMContext::MD_callsite)) {
2445 CallsWithMetadata.push_back(&
I);
2449 if (!CallsWithMetadata.empty())
2450 FuncToCallsWithMetadata[&
F] = CallsWithMetadata;
2454 dbgs() <<
"CCG before updating call stack chains:\n";
2459 exportToDot(
"prestackupdate");
2464 exportToDot(
"poststackupdate");
2466 handleCallsitesWithMultipleTargets();
2471 for (
auto &FuncEntry : FuncToCallsWithMetadata)
2472 for (
auto &
Call : FuncEntry.second)
2473 Call.call()->setMetadata(LLVMContext::MD_callsite,
nullptr);
2479IndexCallsiteContextGraph::findAliaseeGUIDsPrevailingInDifferentModule() {
2480 DenseSet<GlobalValue::GUID> AliaseeGUIDs;
2481 for (
auto &
I : Index) {
2483 for (
auto &S :
VI.getSummaryList()) {
2488 auto *AliaseeSummary = &AS->getAliasee();
2496 !isPrevailing(
VI.getGUID(), S.get()))
2501 auto AliaseeGUID = AS->getAliaseeGUID();
2503 if (!isPrevailing(AliaseeGUID, AliaseeSummary))
2504 AliaseeGUIDs.
insert(AliaseeGUID);
2507 AliaseesPrevailingInDiffModuleFromAlias += AliaseeGUIDs.
size();
2508 return AliaseeGUIDs;
2511IndexCallsiteContextGraph::IndexCallsiteContextGraph(
2512 ModuleSummaryIndex &Index,
2522 findAliaseeGUIDsPrevailingInDifferentModule();
2526 std::map<uint64_t, uint32_t> TotalSizeToContextIdTopNCold;
2527 for (
auto &
I : Index) {
2528 auto VI = Index.getValueInfo(
I);
2529 if (GUIDsToSkip.
contains(VI.getGUID()))
2531 for (
auto &S : VI.getSummaryList()) {
2540 !isPrevailing(VI.getGUID(), S.get()))
2545 std::vector<CallInfo> CallsWithMetadata;
2546 if (!
FS->allocs().empty()) {
2547 for (
auto &AN :
FS->mutableAllocs()) {
2552 if (AN.MIBs.empty())
2554 IndexCall AllocCall(&AN);
2555 CallsWithMetadata.push_back(AllocCall);
2556 auto *AllocNode = addAllocNode(AllocCall, FS);
2564 AN.ContextSizeInfos.size() == AN.MIBs.size());
2566 for (
auto &MIB : AN.MIBs) {
2569 std::vector<ContextTotalSize> ContextSizeInfo;
2570 if (!AN.ContextSizeInfos.empty()) {
2571 for (
auto [FullStackId, TotalSize] : AN.ContextSizeInfos[
I])
2572 ContextSizeInfo.push_back({FullStackId, TotalSize});
2574 addStackNodesForMIB<MIBInfo, SmallVector<unsigned>::const_iterator>(
2575 AllocNode, StackContext, EmptyContext, MIB.AllocType,
2576 ContextSizeInfo, TotalSizeToContextIdTopNCold);
2582 DotAllocContextIds = AllocNode->getContextIds();
2588 AN.Versions[0] = (
uint8_t)allocTypeToUse(AllocNode->AllocTypes);
2592 if (!
FS->callsites().empty())
2593 for (
auto &SN :
FS->mutableCallsites()) {
2594 IndexCall StackNodeCall(&SN);
2595 CallsWithMetadata.push_back(StackNodeCall);
2598 if (!CallsWithMetadata.empty())
2599 FuncToCallsWithMetadata[
FS] = CallsWithMetadata;
2601 if (!
FS->allocs().empty() || !
FS->callsites().empty())
2607 dbgs() <<
"CCG before updating call stack chains:\n";
2612 exportToDot(
"prestackupdate");
2617 exportToDot(
"poststackupdate");
2619 handleCallsitesWithMultipleTargets();
2624template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2625void CallsiteContextGraph<DerivedCCG, FuncTy,
2626 CallTy>::handleCallsitesWithMultipleTargets() {
2641 std::vector<std::pair<CallInfo, ContextNode *>> NewCallToNode;
2642 for (
auto &Entry : NonAllocationCallToContextNodeMap) {
2643 auto *
Node = Entry.second;
2652 std::vector<CallInfo> AllCalls;
2653 AllCalls.reserve(
Node->MatchingCalls.size() + 1);
2654 AllCalls.push_back(
Node->Call);
2668 if (partitionCallsByCallee(
Node, AllCalls, NewCallToNode))
2671 auto It = AllCalls.begin();
2673 for (; It != AllCalls.end(); ++It) {
2676 for (
auto EI =
Node->CalleeEdges.begin(); EI !=
Node->CalleeEdges.end();
2679 if (!Edge->Callee->hasCall())
2681 assert(NodeToCallingFunc.count(Edge->Callee));
2683 if (!calleesMatch(
ThisCall.call(), EI, TailCallToContextNodeMap)) {
2692 if (
Node->Call != ThisCall) {
2693 Node->setCall(ThisCall);
2704 Node->MatchingCalls.clear();
2707 if (It == AllCalls.end()) {
2708 RemovedEdgesWithMismatchedCallees++;
2712 Node->setCall(CallInfo());
2717 for (++It; It != AllCalls.end(); ++It) {
2721 Node->MatchingCalls.push_back(ThisCall);
2730 NonAllocationCallToContextNodeMap.remove_if([](
const auto &it) {
2731 return !it.second->hasCall() || it.second->Call != it.first;
2735 for (
auto &[
Call,
Node] : NewCallToNode)
2736 NonAllocationCallToContextNodeMap[
Call] =
Node;
2740 for (
auto &[
Call,
Node] : TailCallToContextNodeMap)
2741 NonAllocationCallToContextNodeMap[
Call] =
Node;
2744template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2745bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::partitionCallsByCallee(
2747 std::vector<std::pair<CallInfo, ContextNode *>> &NewCallToNode) {
2751 struct CallsWithSameCallee {
2752 std::vector<CallInfo> Calls;
2753 ContextNode *
Node =
nullptr;
2759 for (
auto ThisCall : AllCalls) {
2760 auto *
F = getCalleeFunc(
ThisCall.call());
2762 CalleeFuncToCallInfo[
F].Calls.push_back(ThisCall);
2771 for (
const auto &Edge :
Node->CalleeEdges) {
2772 if (!Edge->Callee->hasCall())
2774 const FuncTy *ProfiledCalleeFunc = NodeToCallingFunc[Edge->Callee];
2775 if (CalleeFuncToCallInfo.
contains(ProfiledCalleeFunc))
2776 CalleeNodeToCallInfo[Edge->Callee] =
2777 &CalleeFuncToCallInfo[ProfiledCalleeFunc];
2783 if (CalleeNodeToCallInfo.
empty())
2795 ContextNode *UnmatchedCalleesNode =
nullptr;
2797 bool UsedOrigNode =
false;
2802 auto CalleeEdges =
Node->CalleeEdges;
2803 for (
auto &Edge : CalleeEdges) {
2804 if (!Edge->Callee->hasCall())
2809 ContextNode *CallerNodeToUse =
nullptr;
2813 if (!CalleeNodeToCallInfo.
contains(Edge->Callee)) {
2814 if (!UnmatchedCalleesNode)
2815 UnmatchedCalleesNode =
2816 createNewNode(
false, NodeToCallingFunc[
Node]);
2817 CallerNodeToUse = UnmatchedCalleesNode;
2821 auto *Info = CalleeNodeToCallInfo[Edge->Callee];
2824 if (!UsedOrigNode) {
2827 Node->MatchingCalls.clear();
2828 UsedOrigNode =
true;
2831 createNewNode(
false, NodeToCallingFunc[
Node]);
2832 assert(!Info->Calls.empty());
2835 Info->Node->setCall(Info->Calls.front());
2841 NewCallToNode.push_back({Info->Node->Call, Info->Node});
2843 CallerNodeToUse = Info->Node;
2847 if (CallerNodeToUse ==
Node)
2850 moveCalleeEdgeToNewCaller(Edge, CallerNodeToUse);
2857 for (
auto &
I : CalleeNodeToCallInfo)
2858 removeNoneTypeCallerEdges(
I.second->Node);
2859 if (UnmatchedCalleesNode)
2860 removeNoneTypeCallerEdges(UnmatchedCalleesNode);
2861 removeNoneTypeCallerEdges(
Node);
2871uint64_t IndexCallsiteContextGraph::getStackId(uint64_t IdOrIndex)
const {
2874 return Index.getStackIdAtIndex(IdOrIndex);
2877template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2878bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::calleesMatch(
2879 CallTy
Call, EdgeIter &EI,
2880 MapVector<CallInfo, ContextNode *> &TailCallToContextNodeMap) {
2882 const FuncTy *ProfiledCalleeFunc = NodeToCallingFunc[
Edge->Callee];
2883 const FuncTy *CallerFunc = NodeToCallingFunc[
Edge->Caller];
2886 std::vector<std::pair<CallTy, FuncTy *>> FoundCalleeChain;
2887 if (!calleeMatchesFunc(
Call, ProfiledCalleeFunc, CallerFunc,
2892 if (FoundCalleeChain.empty())
2896 auto *CurEdge =
Callee->findEdgeFromCaller(Caller);
2900 CurEdge->ContextIds.insert_range(
Edge->ContextIds);
2901 CurEdge->AllocTypes |=
Edge->AllocTypes;
2906 auto NewEdge = std::make_shared<ContextEdge>(
2907 Callee, Caller,
Edge->AllocTypes,
Edge->ContextIds);
2908 Callee->CallerEdges.push_back(NewEdge);
2909 if (Caller ==
Edge->Caller) {
2913 EI =
Caller->CalleeEdges.insert(EI, NewEdge);
2916 "Iterator position not restored after insert and increment");
2918 Caller->CalleeEdges.push_back(NewEdge);
2923 auto *CurCalleeNode =
Edge->Callee;
2924 for (
auto &[NewCall, Func] : FoundCalleeChain) {
2925 ContextNode *NewNode =
nullptr;
2927 if (TailCallToContextNodeMap.
count(NewCall)) {
2928 NewNode = TailCallToContextNodeMap[NewCall];
2929 NewNode->AllocTypes |=
Edge->AllocTypes;
2931 FuncToCallsWithMetadata[
Func].push_back({NewCall});
2933 NewNode = createNewNode(
false, Func, NewCall);
2934 TailCallToContextNodeMap[NewCall] = NewNode;
2935 NewNode->AllocTypes =
Edge->AllocTypes;
2939 AddEdge(NewNode, CurCalleeNode);
2941 CurCalleeNode = NewNode;
2945 AddEdge(
Edge->Caller, CurCalleeNode);
2953 removeEdgeFromGraph(
Edge.get(), &EI,
true);
2965bool ModuleCallsiteContextGraph::findProfiledCalleeThroughTailCalls(
2966 const Function *ProfiledCallee,
Value *CurCallee,
unsigned Depth,
2967 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain,
2968 bool &FoundMultipleCalleeChains) {
2975 FoundCalleeChain.push_back({Callsite,
F});
2990 bool FoundSingleCalleeChain =
false;
2991 for (
auto &BB : *CalleeFunc) {
2992 for (
auto &
I : BB) {
2994 if (!CB || !CB->isTailCall())
2996 auto *CalledValue = CB->getCalledOperand();
2997 auto *CalledFunction = CB->getCalledFunction();
2998 if (CalledValue && !CalledFunction) {
2999 CalledValue = CalledValue->stripPointerCasts();
3006 assert(!CalledFunction &&
3007 "Expected null called function in callsite for alias");
3010 if (!CalledFunction)
3012 if (CalledFunction == ProfiledCallee) {
3013 if (FoundSingleCalleeChain) {
3014 FoundMultipleCalleeChains =
true;
3017 FoundSingleCalleeChain =
true;
3018 FoundProfiledCalleeCount++;
3019 FoundProfiledCalleeDepth +=
Depth;
3020 if (
Depth > FoundProfiledCalleeMaxDepth)
3021 FoundProfiledCalleeMaxDepth =
Depth;
3022 SaveCallsiteInfo(&
I, CalleeFunc);
3023 }
else if (findProfiledCalleeThroughTailCalls(
3024 ProfiledCallee, CalledFunction,
Depth + 1,
3025 FoundCalleeChain, FoundMultipleCalleeChains)) {
3028 assert(!FoundMultipleCalleeChains);
3029 if (FoundSingleCalleeChain) {
3030 FoundMultipleCalleeChains =
true;
3033 FoundSingleCalleeChain =
true;
3034 SaveCallsiteInfo(&
I, CalleeFunc);
3035 }
else if (FoundMultipleCalleeChains)
3040 return FoundSingleCalleeChain;
3043const Function *ModuleCallsiteContextGraph::getCalleeFunc(Instruction *
Call) {
3045 if (!CB->getCalledOperand() || CB->isIndirectCall())
3047 auto *CalleeVal = CB->getCalledOperand()->stripPointerCasts();
3054bool ModuleCallsiteContextGraph::calleeMatchesFunc(
3055 Instruction *
Call,
const Function *Func,
const Function *CallerFunc,
3056 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain) {
3058 if (!CB->getCalledOperand() || CB->isIndirectCall())
3060 auto *CalleeVal = CB->getCalledOperand()->stripPointerCasts();
3062 if (CalleeFunc == Func)
3065 if (Alias && Alias->getAliasee() == Func)
3076 bool FoundMultipleCalleeChains =
false;
3077 if (!findProfiledCalleeThroughTailCalls(Func, CalleeVal,
Depth,
3079 FoundMultipleCalleeChains)) {
3080 LLVM_DEBUG(
dbgs() <<
"Not found through unique tail call chain: "
3081 <<
Func->getName() <<
" from " << CallerFunc->
getName()
3082 <<
" that actually called " << CalleeVal->getName()
3083 << (FoundMultipleCalleeChains
3084 ?
" (found multiple possible chains)"
3087 if (FoundMultipleCalleeChains)
3088 FoundProfiledCalleeNonUniquelyCount++;
3095bool ModuleCallsiteContextGraph::sameCallee(Instruction *Call1,
3096 Instruction *Call2) {
3098 if (!CB1->getCalledOperand() || CB1->isIndirectCall())
3100 auto *CalleeVal1 = CB1->getCalledOperand()->stripPointerCasts();
3103 if (!CB2->getCalledOperand() || CB2->isIndirectCall())
3105 auto *CalleeVal2 = CB2->getCalledOperand()->stripPointerCasts();
3107 return CalleeFunc1 == CalleeFunc2;
3110bool IndexCallsiteContextGraph::findProfiledCalleeThroughTailCalls(
3111 ValueInfo ProfiledCallee, ValueInfo CurCallee,
unsigned Depth,
3112 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain,
3113 bool &FoundMultipleCalleeChains) {
3119 auto CreateAndSaveCallsiteInfo = [&](ValueInfo
Callee, FunctionSummary *
FS) {
3122 if (!FunctionCalleesToSynthesizedCallsiteInfos.count(FS) ||
3123 !FunctionCalleesToSynthesizedCallsiteInfos[FS].count(Callee))
3126 FunctionCalleesToSynthesizedCallsiteInfos[
FS][
Callee] =
3127 std::make_unique<CallsiteInfo>(Callee, SmallVector<unsigned>());
3128 CallsiteInfo *NewCallsiteInfo =
3129 FunctionCalleesToSynthesizedCallsiteInfos[
FS][
Callee].get();
3130 FoundCalleeChain.push_back({NewCallsiteInfo,
FS});
3137 bool FoundSingleCalleeChain =
false;
3140 !isPrevailing(CurCallee.
getGUID(), S.get()))
3145 auto FSVI = CurCallee;
3148 FSVI = AS->getAliaseeVI();
3149 for (
auto &CallEdge :
FS->calls()) {
3150 if (!CallEdge.second.hasTailCall())
3152 if (CallEdge.first == ProfiledCallee) {
3153 if (FoundSingleCalleeChain) {
3154 FoundMultipleCalleeChains =
true;
3157 FoundSingleCalleeChain =
true;
3158 FoundProfiledCalleeCount++;
3159 FoundProfiledCalleeDepth +=
Depth;
3160 if (
Depth > FoundProfiledCalleeMaxDepth)
3161 FoundProfiledCalleeMaxDepth =
Depth;
3162 CreateAndSaveCallsiteInfo(CallEdge.first, FS);
3164 assert(!FSToVIMap.count(FS) || FSToVIMap[FS] == FSVI);
3165 FSToVIMap[
FS] = FSVI;
3166 }
else if (findProfiledCalleeThroughTailCalls(
3167 ProfiledCallee, CallEdge.first,
Depth + 1,
3168 FoundCalleeChain, FoundMultipleCalleeChains)) {
3171 assert(!FoundMultipleCalleeChains);
3172 if (FoundSingleCalleeChain) {
3173 FoundMultipleCalleeChains =
true;
3176 FoundSingleCalleeChain =
true;
3177 CreateAndSaveCallsiteInfo(CallEdge.first, FS);
3179 assert(!FSToVIMap.count(FS) || FSToVIMap[FS] == FSVI);
3180 FSToVIMap[
FS] = FSVI;
3181 }
else if (FoundMultipleCalleeChains)
3186 return FoundSingleCalleeChain;
3189const FunctionSummary *
3190IndexCallsiteContextGraph::getCalleeFunc(IndexCall &
Call) {
3192 if (
Callee.getSummaryList().empty())
3197bool IndexCallsiteContextGraph::calleeMatchesFunc(
3198 IndexCall &
Call,
const FunctionSummary *Func,
3199 const FunctionSummary *CallerFunc,
3200 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain) {
3204 AliasSummary *Alias =
3205 Callee.getSummaryList().empty()
3208 assert(FSToVIMap.count(Func));
3209 auto FuncVI = FSToVIMap[
Func];
3210 if (Callee == FuncVI ||
3225 bool FoundMultipleCalleeChains =
false;
3226 if (!findProfiledCalleeThroughTailCalls(
3227 FuncVI, Callee,
Depth, FoundCalleeChain, FoundMultipleCalleeChains)) {
3228 LLVM_DEBUG(
dbgs() <<
"Not found through unique tail call chain: " << FuncVI
3229 <<
" from " << FSToVIMap[CallerFunc]
3230 <<
" that actually called " << Callee
3231 << (FoundMultipleCalleeChains
3232 ?
" (found multiple possible chains)"
3235 if (FoundMultipleCalleeChains)
3236 FoundProfiledCalleeNonUniquelyCount++;
3243bool IndexCallsiteContextGraph::sameCallee(IndexCall &Call1, IndexCall &Call2) {
3246 return Callee1 == Callee2;
3249template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3250void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::dump()
3256template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3257void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::print(
3258 raw_ostream &OS)
const {
3259 OS <<
"Node " <<
this <<
"\n";
3263 OS <<
" (recursive)";
3265 if (!MatchingCalls.empty()) {
3266 OS <<
"\tMatchingCalls:\n";
3267 for (
auto &MatchingCall : MatchingCalls) {
3269 MatchingCall.print(OS);
3273 OS <<
"\tNodeId: " <<
NodeId <<
"\n";
3275 OS <<
"\tContextIds:";
3277 auto ContextIds = getContextIds();
3278 std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
3279 std::sort(SortedIds.begin(), SortedIds.end());
3280 for (
auto Id : SortedIds)
3283 OS <<
"\tCalleeEdges:\n";
3284 for (
auto &
Edge : CalleeEdges)
3285 OS <<
"\t\t" << *
Edge <<
" (Callee NodeId: " <<
Edge->Callee->NodeId
3287 OS <<
"\tCallerEdges:\n";
3288 for (
auto &
Edge : CallerEdges)
3289 OS <<
"\t\t" << *
Edge <<
" (Caller NodeId: " <<
Edge->Caller->NodeId
3291 if (!Clones.empty()) {
3294 for (
auto *
C : Clones)
3295 OS <<
LS <<
C <<
" NodeId: " <<
C->NodeId;
3297 }
else if (CloneOf) {
3298 OS <<
"\tClone of " << CloneOf <<
" NodeId: " << CloneOf->NodeId <<
"\n";
3302template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3303void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::dump()
3309template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3310void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::print(
3311 raw_ostream &OS)
const {
3312 OS <<
"Edge from Callee " <<
Callee <<
" to Caller: " <<
Caller
3313 << (IsBackedge ?
" (BE)" :
"")
3315 OS <<
" ContextIds:";
3316 std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
3317 std::sort(SortedIds.begin(), SortedIds.end());
3318 for (
auto Id : SortedIds)
3322template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3323void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::dump()
const {
3327template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3328void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::print(
3329 raw_ostream &OS)
const {
3330 OS <<
"Callsite Context Graph:\n";
3331 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3333 if (
Node->isRemoved())
3340template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3341void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::printTotalSizes(
3343 function_ref<
void(StringRef, StringRef,
const Twine &)> EmitRemark)
const {
3344 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3346 if (
Node->isRemoved())
3348 if (!
Node->IsAllocation)
3350 DenseSet<uint32_t> ContextIds =
Node->getContextIds();
3351 auto AllocTypeFromCall = getAllocationCallType(
Node->Call);
3352 std::vector<uint32_t> SortedIds(ContextIds.
begin(), ContextIds.
end());
3353 std::sort(SortedIds.begin(), SortedIds.end());
3354 for (
auto Id : SortedIds) {
3355 auto TypeI = ContextIdToAllocationType.find(Id);
3356 assert(TypeI != ContextIdToAllocationType.end());
3357 auto CSI = ContextIdToContextSizeInfos.find(Id);
3358 if (CSI != ContextIdToContextSizeInfos.end()) {
3359 for (
auto &Info : CSI->second) {
3362 " full allocation context " + std::to_string(
Info.FullStackId) +
3363 " with total size " + std::to_string(
Info.TotalSize) +
" is " +
3365 if (allocTypeToUse(
Node->AllocTypes) != AllocTypeFromCall)
3367 " due to cold byte percent";
3369 Msg +=
" (context id " + std::to_string(Id) +
")";
3372 EmitRemark(
DEBUG_TYPE,
"MemProfReport", Msg);
3379template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3380void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::check()
const {
3381 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3384 for (
auto &
Edge :
Node->CallerEdges)
3389template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3391 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3392 using NodeRef =
const ContextNode<DerivedCCG, FuncTy, CallTy> *;
3394 using NodePtrTy = std::unique_ptr<ContextNode<DerivedCCG, FuncTy, CallTy>>;
3410 return G->NodeOwner.begin()->get();
3413 using EdgePtrTy = std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>;
3414 static const ContextNode<DerivedCCG, FuncTy, CallTy> *
3433template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3447 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3453 std::string LabelString =
3454 (
Twine(
"OrigId: ") + (
Node->IsAllocation ?
"Alloc" :
"") +
3457 LabelString +=
"\n";
3458 if (
Node->hasCall()) {
3459 auto Func =
G->NodeToCallingFunc.find(
Node);
3460 assert(Func !=
G->NodeToCallingFunc.end());
3462 G->getLabel(Func->second,
Node->Call.call(),
Node->Call.cloneNo());
3463 for (
auto &MatchingCall :
Node->MatchingCalls) {
3464 LabelString +=
"\n";
3465 LabelString +=
G->getLabel(Func->second, MatchingCall.call(),
3466 MatchingCall.cloneNo());
3469 LabelString +=
"null call";
3470 if (
Node->Recursive)
3471 LabelString +=
" (recursive)";
3473 LabelString +=
" (external)";
3479 auto ContextIds =
Node->getContextIds();
3483 bool Highlight =
false;
3492 std::string AttributeString = (
Twine(
"tooltip=\"") + getNodeId(
Node) +
" " +
3493 getContextIds(ContextIds) +
"\"")
3497 AttributeString +=
",fontsize=\"30\"";
3499 (
Twine(
",fillcolor=\"") + getColor(
Node->AllocTypes, Highlight) +
"\"")
3501 if (
Node->CloneOf) {
3502 AttributeString +=
",color=\"blue\"";
3503 AttributeString +=
",style=\"filled,bold,dashed\"";
3505 AttributeString +=
",style=\"filled\"";
3506 return AttributeString;
3511 auto &Edge = *(ChildIter.getCurrent());
3516 bool Highlight =
false;
3525 auto Color = getColor(Edge->AllocTypes, Highlight);
3526 std::string AttributeString =
3527 (
Twine(
"tooltip=\"") + getContextIds(Edge->ContextIds) +
"\"" +
3529 Twine(
",fillcolor=\"") + Color +
"\"" +
Twine(
",color=\"") + Color +
3532 if (Edge->IsBackedge)
3533 AttributeString +=
",style=\"dotted\"";
3536 AttributeString +=
",penwidth=\"2.0\",weight=\"2\"";
3537 return AttributeString;
3543 if (
Node->isRemoved())
3556 std::string IdString =
"ContextIds:";
3557 if (ContextIds.
size() < 100) {
3558 std::vector<uint32_t> SortedIds(ContextIds.
begin(), ContextIds.
end());
3559 std::sort(SortedIds.begin(), SortedIds.end());
3560 for (
auto Id : SortedIds)
3561 IdString += (
" " +
Twine(Id)).str();
3563 IdString += (
" (" + Twine(ContextIds.
size()) +
" ids)").str();
3568 static std::string getColor(uint8_t AllocTypes,
bool Highlight) {
3574 if (AllocTypes == (uint8_t)AllocationType::NotCold)
3576 return !
DoHighlight || Highlight ?
"brown1" :
"lightpink";
3577 if (AllocTypes == (uint8_t)AllocationType::Cold)
3578 return !
DoHighlight || Highlight ?
"cyan" :
"lightskyblue";
3580 ((uint8_t)AllocationType::NotCold | (uint8_t)AllocationType::Cold))
3581 return Highlight ?
"magenta" :
"mediumorchid1";
3585 static std::string getNodeId(NodeRef Node) {
3586 std::stringstream SStream;
3587 SStream << std::hex <<
"N0x" << (
unsigned long long)Node;
3588 std::string
Result = SStream.str();
3597template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3602template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3603void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::exportToDot(
3604 std::string Label)
const {
3609template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3610typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
3611CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::moveEdgeToNewCalleeClone(
3612 const std::shared_ptr<ContextEdge> &
Edge,
3613 DenseSet<uint32_t> ContextIdsToMove) {
3615 assert(NodeToCallingFunc.count(Node));
3616 ContextNode *Clone =
3617 createNewNode(
Node->IsAllocation, NodeToCallingFunc[Node],
Node->Call);
3618 Node->addClone(Clone);
3619 Clone->MatchingCalls =
Node->MatchingCalls;
3620 moveEdgeToExistingCalleeClone(
Edge, Clone,
true,
3625template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3626void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
3627 moveEdgeToExistingCalleeClone(
const std::shared_ptr<ContextEdge> &
Edge,
3628 ContextNode *NewCallee,
bool NewClone,
3629 DenseSet<uint32_t> ContextIdsToMove) {
3632 assert(NewCallee->getOrigNode() ==
Edge->Callee->getOrigNode());
3634 bool EdgeIsRecursive =
Edge->Callee ==
Edge->Caller;
3636 ContextNode *OldCallee =
Edge->Callee;
3640 auto ExistingEdgeToNewCallee = NewCallee->findEdgeFromCaller(
Edge->Caller);
3644 if (ContextIdsToMove.
empty())
3645 ContextIdsToMove =
Edge->getContextIds();
3649 if (
Edge->getContextIds().size() == ContextIdsToMove.
size()) {
3652 NewCallee->AllocTypes |=
Edge->AllocTypes;
3654 if (ExistingEdgeToNewCallee) {
3657 ExistingEdgeToNewCallee->getContextIds().insert_range(ContextIdsToMove);
3658 ExistingEdgeToNewCallee->AllocTypes |=
Edge->AllocTypes;
3659 assert(
Edge->ContextIds == ContextIdsToMove);
3660 removeEdgeFromGraph(
Edge.get());
3663 Edge->Callee = NewCallee;
3664 NewCallee->CallerEdges.push_back(
Edge);
3666 OldCallee->eraseCallerEdge(
Edge.get());
3673 auto CallerEdgeAllocType = computeAllocType(ContextIdsToMove);
3674 if (ExistingEdgeToNewCallee) {
3677 ExistingEdgeToNewCallee->getContextIds().insert_range(ContextIdsToMove);
3678 ExistingEdgeToNewCallee->AllocTypes |= CallerEdgeAllocType;
3681 auto NewEdge = std::make_shared<ContextEdge>(
3682 NewCallee,
Edge->Caller, CallerEdgeAllocType, ContextIdsToMove);
3683 Edge->Caller->CalleeEdges.push_back(NewEdge);
3684 NewCallee->CallerEdges.push_back(NewEdge);
3688 NewCallee->AllocTypes |= CallerEdgeAllocType;
3690 Edge->AllocTypes = computeAllocType(
Edge->ContextIds);
3695 for (
auto &OldCalleeEdge : OldCallee->CalleeEdges) {
3696 ContextNode *CalleeToUse = OldCalleeEdge->Callee;
3700 if (CalleeToUse == OldCallee) {
3704 if (EdgeIsRecursive) {
3708 CalleeToUse = NewCallee;
3712 DenseSet<uint32_t> EdgeContextIdsToMove =
3714 set_subtract(OldCalleeEdge->getContextIds(), EdgeContextIdsToMove);
3715 OldCalleeEdge->AllocTypes =
3716 computeAllocType(OldCalleeEdge->getContextIds());
3723 if (
auto *NewCalleeEdge = NewCallee->findEdgeFromCallee(CalleeToUse)) {
3724 NewCalleeEdge->getContextIds().insert_range(EdgeContextIdsToMove);
3725 NewCalleeEdge->AllocTypes |= computeAllocType(EdgeContextIdsToMove);
3729 auto NewEdge = std::make_shared<ContextEdge>(
3730 CalleeToUse, NewCallee, computeAllocType(EdgeContextIdsToMove),
3731 EdgeContextIdsToMove);
3732 NewCallee->CalleeEdges.push_back(NewEdge);
3733 NewEdge->Callee->CallerEdges.push_back(NewEdge);
3737 OldCallee->AllocTypes = OldCallee->computeAllocType();
3739 assert((OldCallee->AllocTypes == (uint8_t)AllocationType::None) ==
3740 OldCallee->emptyContextIds());
3744 for (
const auto &OldCalleeEdge : OldCallee->CalleeEdges)
3747 for (
const auto &NewCalleeEdge : NewCallee->CalleeEdges)
3753template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3754void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
3755 moveCalleeEdgeToNewCaller(
const std::shared_ptr<ContextEdge> &
Edge,
3756 ContextNode *NewCaller) {
3757 auto *OldCallee =
Edge->Callee;
3758 auto *NewCallee = OldCallee;
3761 bool Recursive =
Edge->Caller ==
Edge->Callee;
3763 NewCallee = NewCaller;
3765 ContextNode *OldCaller =
Edge->Caller;
3766 OldCaller->eraseCalleeEdge(
Edge.get());
3770 auto ExistingEdgeToNewCaller = NewCaller->findEdgeFromCallee(NewCallee);
3772 if (ExistingEdgeToNewCaller) {
3775 ExistingEdgeToNewCaller->getContextIds().insert_range(
3776 Edge->getContextIds());
3777 ExistingEdgeToNewCaller->AllocTypes |=
Edge->AllocTypes;
3778 Edge->ContextIds.clear();
3779 Edge->AllocTypes = (uint8_t)AllocationType::None;
3780 OldCallee->eraseCallerEdge(
Edge.get());
3783 Edge->Caller = NewCaller;
3784 NewCaller->CalleeEdges.push_back(
Edge);
3786 assert(NewCallee == NewCaller);
3789 Edge->Callee = NewCallee;
3790 NewCallee->CallerEdges.push_back(
Edge);
3791 OldCallee->eraseCallerEdge(
Edge.get());
3797 NewCaller->AllocTypes |=
Edge->AllocTypes;
3804 bool IsNewNode = NewCaller->CallerEdges.empty();
3813 for (
auto &OldCallerEdge : OldCaller->CallerEdges) {
3814 auto OldCallerCaller = OldCallerEdge->Caller;
3818 OldCallerEdge->getContextIds(),
Edge->getContextIds());
3819 if (OldCaller == OldCallerCaller) {
3820 OldCallerCaller = NewCaller;
3826 set_subtract(OldCallerEdge->getContextIds(), EdgeContextIdsToMove);
3827 OldCallerEdge->AllocTypes =
3828 computeAllocType(OldCallerEdge->getContextIds());
3833 auto *ExistingCallerEdge = NewCaller->findEdgeFromCaller(OldCallerCaller);
3837 if (ExistingCallerEdge) {
3838 ExistingCallerEdge->getContextIds().insert_range(EdgeContextIdsToMove);
3839 ExistingCallerEdge->AllocTypes |=
3840 computeAllocType(EdgeContextIdsToMove);
3843 auto NewEdge = std::make_shared<ContextEdge>(
3844 NewCaller, OldCallerCaller, computeAllocType(EdgeContextIdsToMove),
3845 EdgeContextIdsToMove);
3846 NewCaller->CallerEdges.push_back(NewEdge);
3847 NewEdge->Caller->CalleeEdges.push_back(NewEdge);
3852 OldCaller->AllocTypes = OldCaller->computeAllocType();
3854 assert((OldCaller->AllocTypes == (uint8_t)AllocationType::None) ==
3855 OldCaller->emptyContextIds());
3859 for (
const auto &OldCallerEdge : OldCaller->CallerEdges)
3862 for (
const auto &NewCallerEdge : NewCaller->CallerEdges)
3868template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3869void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
3870 recursivelyRemoveNoneTypeCalleeEdges(
3871 ContextNode *Node, DenseSet<const ContextNode *> &Visited) {
3876 removeNoneTypeCalleeEdges(Node);
3878 for (
auto *Clone :
Node->Clones)
3879 recursivelyRemoveNoneTypeCalleeEdges(Clone, Visited);
3883 auto CallerEdges =
Node->CallerEdges;
3884 for (
auto &
Edge : CallerEdges) {
3886 if (
Edge->isRemoved()) {
3890 recursivelyRemoveNoneTypeCalleeEdges(
Edge->Caller, Visited);
3895template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3896void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::markBackedges() {
3901 DenseSet<const ContextNode *> Visited;
3902 DenseSet<const ContextNode *> CurrentStack;
3903 for (
auto &Entry : NonAllocationCallToContextNodeMap) {
3905 if (
Node->isRemoved())
3908 if (!
Node->CallerEdges.empty())
3910 markBackedges(Node, Visited, CurrentStack);
3916template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3917void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::markBackedges(
3918 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
3919 DenseSet<const ContextNode *> &CurrentStack) {
3920 auto I = Visited.
insert(Node);
3924 for (
auto &CalleeEdge :
Node->CalleeEdges) {
3925 auto *
Callee = CalleeEdge->Callee;
3926 if (Visited.
count(Callee)) {
3929 if (CurrentStack.
count(Callee))
3930 CalleeEdge->IsBackedge =
true;
3933 CurrentStack.
insert(Callee);
3934 markBackedges(Callee, Visited, CurrentStack);
3935 CurrentStack.
erase(Callee);
3939template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3940void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones() {
3941 DenseSet<const ContextNode *> Visited;
3942 for (
auto &Entry : AllocationCallToContextNodeMap) {
3944 identifyClones(
Entry.second, Visited,
Entry.second->getContextIds());
3947 for (
auto &Entry : AllocationCallToContextNodeMap)
3948 recursivelyRemoveNoneTypeCalleeEdges(
Entry.second, Visited);
3961template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3962void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones(
3963 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
3964 const DenseSet<uint32_t> &AllocContextIds) {
3974 if (!
Node->hasCall())
3993 auto CallerEdges =
Node->CallerEdges;
3994 for (
auto &
Edge : CallerEdges) {
3996 if (
Edge->isRemoved()) {
4002 if (
Edge->IsBackedge) {
4009 if (!Visited.
count(
Edge->Caller) && !
Edge->Caller->CloneOf) {
4010 identifyClones(
Edge->Caller, Visited, AllocContextIds);
4033 const unsigned AllocTypeCloningPriority[] = { 3, 4,
4037 [&](
const std::shared_ptr<ContextEdge> &
A,
4038 const std::shared_ptr<ContextEdge> &
B) {
4041 if (A->ContextIds.empty())
4047 if (B->ContextIds.empty())
4050 if (A->AllocTypes == B->AllocTypes)
4053 return *A->ContextIds.begin() < *B->ContextIds.begin();
4054 return AllocTypeCloningPriority[A->AllocTypes] <
4055 AllocTypeCloningPriority[B->AllocTypes];
4058 assert(
Node->AllocTypes != (uint8_t)AllocationType::None);
4060 DenseSet<uint32_t> RecursiveContextIds;
4065 DenseSet<uint32_t> AllCallerContextIds;
4066 for (
auto &CE :
Node->CallerEdges) {
4069 AllCallerContextIds.
reserve(
CE->getContextIds().size());
4070 for (
auto Id :
CE->getContextIds())
4071 if (!AllCallerContextIds.
insert(Id).second)
4072 RecursiveContextIds.
insert(Id);
4082 auto CallerEdges =
Node->CallerEdges;
4083 for (
auto &CallerEdge : CallerEdges) {
4085 if (CallerEdge->isRemoved()) {
4089 assert(CallerEdge->Callee == Node);
4098 if (!CallerEdge->Caller->hasCall())
4103 auto CallerEdgeContextsForAlloc =
4105 if (!RecursiveContextIds.
empty())
4106 CallerEdgeContextsForAlloc =
4108 if (CallerEdgeContextsForAlloc.empty())
4111 auto CallerAllocTypeForAlloc = computeAllocType(CallerEdgeContextsForAlloc);
4115 std::vector<uint8_t> CalleeEdgeAllocTypesForCallerEdge;
4116 CalleeEdgeAllocTypesForCallerEdge.reserve(
Node->CalleeEdges.size());
4117 for (
auto &CalleeEdge :
Node->CalleeEdges)
4118 CalleeEdgeAllocTypesForCallerEdge.push_back(intersectAllocTypes(
4119 CalleeEdge->getContextIds(), CallerEdgeContextsForAlloc));
4135 assert(CallerEdge->AllocTypes != (uint8_t)AllocationType::None);
4136 assert(
Node->AllocTypes != (uint8_t)AllocationType::None);
4137 if (!CallerEdge->IsBackedge &&
4138 allocTypeToUse(CallerAllocTypeForAlloc) ==
4139 allocTypeToUse(
Node->AllocTypes) &&
4140 allocTypesMatch<DerivedCCG, FuncTy, CallTy>(
4141 CalleeEdgeAllocTypesForCallerEdge,
Node->CalleeEdges)) {
4145 if (CallerEdge->IsBackedge) {
4149 DeferredBackedges++;
4162 if (CallerEdge->IsBackedge && !CallerEdge->Caller->CloneOf &&
4163 !Visited.
count(CallerEdge->Caller)) {
4164 const auto OrigIdCount = CallerEdge->getContextIds().size();
4167 identifyClones(CallerEdge->Caller, Visited, CallerEdgeContextsForAlloc);
4168 removeNoneTypeCalleeEdges(CallerEdge->Caller);
4172 bool UpdatedEdge =
false;
4173 if (OrigIdCount > CallerEdge->getContextIds().size()) {
4174 for (
auto E :
Node->CallerEdges) {
4176 if (
E->Caller->CloneOf != CallerEdge->Caller)
4180 auto CallerEdgeContextsForAllocNew =
4182 if (CallerEdgeContextsForAllocNew.empty())
4192 CallerEdgeContextsForAlloc.swap(CallerEdgeContextsForAllocNew);
4202 if (CallerEdge->isRemoved())
4212 CallerEdgeContextsForAlloc, CallerEdge->getContextIds());
4213 if (CallerEdgeContextsForAlloc.empty())
4218 CallerAllocTypeForAlloc = computeAllocType(CallerEdgeContextsForAlloc);
4219 CalleeEdgeAllocTypesForCallerEdge.clear();
4220 for (
auto &CalleeEdge :
Node->CalleeEdges) {
4221 CalleeEdgeAllocTypesForCallerEdge.push_back(intersectAllocTypes(
4222 CalleeEdge->getContextIds(), CallerEdgeContextsForAlloc));
4228 ContextNode *Clone =
nullptr;
4229 for (
auto *CurClone :
Node->Clones) {
4230 if (allocTypeToUse(CurClone->AllocTypes) !=
4231 allocTypeToUse(CallerAllocTypeForAlloc))
4238 assert(!BothSingleAlloc ||
4239 CurClone->AllocTypes == CallerAllocTypeForAlloc);
4245 if (BothSingleAlloc || allocTypesMatchClone<DerivedCCG, FuncTy, CallTy>(
4246 CalleeEdgeAllocTypesForCallerEdge, CurClone)) {
4254 moveEdgeToExistingCalleeClone(CallerEdge, Clone,
false,
4255 CallerEdgeContextsForAlloc);
4257 Clone = moveEdgeToNewCalleeClone(CallerEdge, CallerEdgeContextsForAlloc);
4260 assert(Clone->AllocTypes != (uint8_t)AllocationType::None);
4267 assert(
Node->AllocTypes != (uint8_t)AllocationType::None);
4273void ModuleCallsiteContextGraph::updateAllocationCall(
4278 "memprof", AllocTypeString);
4281 .emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofAttribute",
Call.call())
4282 <<
ore::NV(
"AllocationCall",
Call.call()) <<
" in clone "
4284 <<
" marked with memprof allocation attribute "
4285 <<
ore::NV(
"Attribute", AllocTypeString));
4288void IndexCallsiteContextGraph::updateAllocationCall(CallInfo &
Call,
4292 assert(AI->Versions.size() >
Call.cloneNo());
4297ModuleCallsiteContextGraph::getAllocationCallType(
const CallInfo &
Call)
const {
4299 if (!CB->getAttributes().hasFnAttr(
"memprof"))
4300 return AllocationType::None;
4301 return CB->getAttributes().getFnAttr(
"memprof").getValueAsString() ==
"cold"
4302 ? AllocationType::Cold
4303 : AllocationType::NotCold;
4307IndexCallsiteContextGraph::getAllocationCallType(
const CallInfo &
Call)
const {
4309 assert(AI->Versions.size() >
Call.cloneNo());
4313void ModuleCallsiteContextGraph::updateCall(CallInfo &CallerCall,
4314 FuncInfo CalleeFunc) {
4315 auto *CurF = getCalleeFunc(CallerCall.call());
4316 auto NewCalleeCloneNo = CalleeFunc.cloneNo();
4323 if (CurCalleeCloneNo != NewCalleeCloneNo) {
4325 << CurCalleeCloneNo <<
" now " << NewCalleeCloneNo
4327 MismatchedCloneAssignments++;
4330 if (NewCalleeCloneNo > 0)
4331 cast<CallBase>(CallerCall.call())->setCalledFunction(CalleeFunc.func());
4332 OREGetter(CallerCall.call()->getFunction())
4333 .emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofCall", CallerCall.call())
4334 <<
ore::NV(
"Call", CallerCall.call()) <<
" in clone "
4335 <<
ore::NV(
"Caller", CallerCall.call()->getFunction())
4336 <<
" assigned to call function clone "
4337 <<
ore::NV(
"Callee", CalleeFunc.func()));
4340void IndexCallsiteContextGraph::updateCall(CallInfo &CallerCall,
4341 FuncInfo CalleeFunc) {
4344 "Caller cannot be an allocation which should not have profiled calls");
4345 assert(CI->Clones.size() > CallerCall.cloneNo());
4346 auto NewCalleeCloneNo = CalleeFunc.cloneNo();
4347 auto &CurCalleeCloneNo = CI->Clones[CallerCall.cloneNo()];
4352 if (CurCalleeCloneNo != 0 && CurCalleeCloneNo != NewCalleeCloneNo) {
4354 << CurCalleeCloneNo <<
" now " << NewCalleeCloneNo
4356 MismatchedCloneAssignments++;
4358 CurCalleeCloneNo = NewCalleeCloneNo;
4370 SP->replaceLinkageName(MDName);
4374 TempDISubprogram NewDecl = Decl->
clone();
4375 NewDecl->replaceLinkageName(MDName);
4379CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
4381ModuleCallsiteContextGraph::cloneFunctionForCallsite(
4382 FuncInfo &Func, CallInfo &
Call, DenseMap<CallInfo, CallInfo> &CallMap,
4383 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
4388 assert(!
Func.func()->getParent()->getFunction(Name));
4389 NewFunc->setName(Name);
4391 for (
auto &Inst : CallsWithMetadataInFunc) {
4393 assert(Inst.cloneNo() == 0);
4396 OREGetter(
Func.func())
4397 .emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofClone",
Func.func())
4398 <<
"created clone " <<
ore::NV(
"NewFunction", NewFunc));
4399 return {NewFunc, CloneNo};
4402CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
4403 IndexCall>::FuncInfo
4404IndexCallsiteContextGraph::cloneFunctionForCallsite(
4405 FuncInfo &Func, CallInfo &
Call, DenseMap<CallInfo, CallInfo> &CallMap,
4406 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
4420 for (
auto &Inst : CallsWithMetadataInFunc) {
4422 assert(Inst.cloneNo() == 0);
4424 assert(AI->Versions.size() == CloneNo);
4427 AI->Versions.push_back(0);
4430 assert(CI && CI->Clones.size() == CloneNo);
4433 CI->Clones.push_back(0);
4435 CallMap[Inst] = {Inst.call(), CloneNo};
4437 return {
Func.func(), CloneNo};
4454template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4455void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::mergeClones() {
4461 DenseMap<uint32_t, ContextNode *> ContextIdToAllocationNode;
4462 for (
auto &Entry : AllocationCallToContextNodeMap) {
4464 for (
auto Id :
Node->getContextIds())
4465 ContextIdToAllocationNode[
Id] =
Node->getOrigNode();
4466 for (
auto *Clone :
Node->Clones) {
4467 for (
auto Id : Clone->getContextIds())
4468 ContextIdToAllocationNode[
Id] = Clone->getOrigNode();
4475 DenseSet<const ContextNode *> Visited;
4476 for (
auto &Entry : AllocationCallToContextNodeMap) {
4479 mergeClones(Node, Visited, ContextIdToAllocationNode);
4485 auto Clones =
Node->Clones;
4486 for (
auto *Clone : Clones)
4487 mergeClones(Clone, Visited, ContextIdToAllocationNode);
4491 dbgs() <<
"CCG after merging:\n";
4495 exportToDot(
"aftermerge");
4503template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4504void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::mergeClones(
4505 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
4506 DenseMap<uint32_t, ContextNode *> &ContextIdToAllocationNode) {
4516 bool FoundUnvisited =
true;
4518 while (FoundUnvisited) {
4520 FoundUnvisited =
false;
4523 auto CallerEdges =
Node->CallerEdges;
4524 for (
auto CallerEdge : CallerEdges) {
4526 if (CallerEdge->Callee != Node)
4531 FoundUnvisited =
true;
4532 mergeClones(CallerEdge->Caller, Visited, ContextIdToAllocationNode);
4536 TotalMergeInvokes++;
4537 TotalMergeIters += Iters;
4538 if (Iters > MaxMergeIters)
4539 MaxMergeIters = Iters;
4542 mergeNodeCalleeClones(Node, Visited, ContextIdToAllocationNode);
4545template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4546void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::mergeNodeCalleeClones(
4547 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
4548 DenseMap<uint32_t, ContextNode *> &ContextIdToAllocationNode) {
4550 if (
Node->emptyContextIds())
4555 MapVector<ContextNode *, std::vector<std::shared_ptr<ContextEdge>>>
4556 OrigNodeToCloneEdges;
4557 for (
const auto &
E :
Node->CalleeEdges) {
4562 OrigNodeToCloneEdges[
Base].push_back(
E);
4568 auto CalleeCallerEdgeLessThan = [](
const std::shared_ptr<ContextEdge> &
A,
4569 const std::shared_ptr<ContextEdge> &
B) {
4570 if (
A->Callee->CallerEdges.size() !=
B->Callee->CallerEdges.size())
4571 return A->Callee->CallerEdges.size() <
B->Callee->CallerEdges.size();
4572 if (
A->Callee->CloneOf && !
B->Callee->CloneOf)
4574 else if (!
A->Callee->CloneOf &&
B->Callee->CloneOf)
4578 return *
A->ContextIds.begin() < *
B->ContextIds.begin();
4583 for (
auto Entry : OrigNodeToCloneEdges) {
4586 auto &CalleeEdges =
Entry.second;
4587 auto NumCalleeClones = CalleeEdges.size();
4589 if (NumCalleeClones == 1)
4600 DenseSet<ContextNode *> OtherCallersToShareMerge;
4601 findOtherCallersToShareMerge(Node, CalleeEdges, ContextIdToAllocationNode,
4602 OtherCallersToShareMerge);
4607 ContextNode *MergeNode =
nullptr;
4608 DenseMap<ContextNode *, unsigned> CallerToMoveCount;
4609 for (
auto CalleeEdge : CalleeEdges) {
4610 auto *OrigCallee = CalleeEdge->Callee;
4616 if (CalleeEdge->Callee->CallerEdges.size() == 1) {
4617 MergeNode = OrigCallee;
4618 NonNewMergedNodes++;
4625 if (!OtherCallersToShareMerge.
empty()) {
4626 bool MoveAllCallerEdges =
true;
4627 for (
auto CalleeCallerE : OrigCallee->CallerEdges) {
4628 if (CalleeCallerE == CalleeEdge)
4630 if (!OtherCallersToShareMerge.
contains(CalleeCallerE->Caller)) {
4631 MoveAllCallerEdges =
false;
4637 if (MoveAllCallerEdges) {
4638 MergeNode = OrigCallee;
4639 NonNewMergedNodes++;
4646 assert(MergeNode != OrigCallee);
4647 moveEdgeToExistingCalleeClone(CalleeEdge, MergeNode,
4650 MergeNode = moveEdgeToNewCalleeClone(CalleeEdge);
4655 if (!OtherCallersToShareMerge.
empty()) {
4659 auto OrigCalleeCallerEdges = OrigCallee->CallerEdges;
4660 for (
auto &CalleeCallerE : OrigCalleeCallerEdges) {
4661 if (CalleeCallerE == CalleeEdge)
4663 if (!OtherCallersToShareMerge.
contains(CalleeCallerE->Caller))
4665 CallerToMoveCount[CalleeCallerE->Caller]++;
4666 moveEdgeToExistingCalleeClone(CalleeCallerE, MergeNode,
4670 removeNoneTypeCalleeEdges(OrigCallee);
4671 removeNoneTypeCalleeEdges(MergeNode);
4689template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4690void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
4691 findOtherCallersToShareMerge(
4693 std::vector<std::shared_ptr<ContextEdge>> &CalleeEdges,
4694 DenseMap<uint32_t, ContextNode *> &ContextIdToAllocationNode,
4695 DenseSet<ContextNode *> &OtherCallersToShareMerge) {
4696 auto NumCalleeClones = CalleeEdges.size();
4699 DenseMap<ContextNode *, unsigned> OtherCallersToSharedCalleeEdgeCount;
4702 unsigned PossibleOtherCallerNodes = 0;
4706 if (CalleeEdges[0]->
Callee->CallerEdges.size() < 2)
4712 DenseMap<ContextEdge *, DenseSet<ContextNode *>> CalleeEdgeToAllocNodes;
4713 for (
auto CalleeEdge : CalleeEdges) {
4714 assert(CalleeEdge->Callee->CallerEdges.size() > 1);
4717 for (
auto CalleeCallerEdges : CalleeEdge->Callee->CallerEdges) {
4718 if (CalleeCallerEdges->Caller == Node) {
4719 assert(CalleeCallerEdges == CalleeEdge);
4722 OtherCallersToSharedCalleeEdgeCount[CalleeCallerEdges->Caller]++;
4725 if (OtherCallersToSharedCalleeEdgeCount[CalleeCallerEdges->Caller] ==
4727 PossibleOtherCallerNodes++;
4731 for (
auto Id : CalleeEdge->getContextIds()) {
4732 auto *
Alloc = ContextIdToAllocationNode.
lookup(Id);
4736 MissingAllocForContextId++;
4739 CalleeEdgeToAllocNodes[CalleeEdge.get()].
insert(
Alloc);
4746 for (
auto CalleeEdge : CalleeEdges) {
4747 assert(CalleeEdge->Callee->CallerEdges.size() > 1);
4749 if (!PossibleOtherCallerNodes)
4751 auto &CurCalleeAllocNodes = CalleeEdgeToAllocNodes[CalleeEdge.get()];
4753 for (
auto &CalleeCallerE : CalleeEdge->Callee->CallerEdges) {
4755 if (CalleeCallerE == CalleeEdge)
4759 if (OtherCallersToSharedCalleeEdgeCount[CalleeCallerE->Caller] !=
4764 for (
auto Id : CalleeCallerE->getContextIds()) {
4765 auto *
Alloc = ContextIdToAllocationNode.
lookup(Id);
4770 if (!CurCalleeAllocNodes.contains(
Alloc)) {
4771 OtherCallersToSharedCalleeEdgeCount[CalleeCallerE->Caller] = 0;
4772 PossibleOtherCallerNodes--;
4779 if (!PossibleOtherCallerNodes)
4784 for (
auto &[OtherCaller,
Count] : OtherCallersToSharedCalleeEdgeCount) {
4785 if (
Count != NumCalleeClones)
4787 OtherCallersToShareMerge.
insert(OtherCaller);
4832template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4833bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
4840 DenseMap<ContextNode *, FuncInfo> CallsiteToCalleeFuncCloneMap;
4844 auto RecordCalleeFuncOfCallsite = [&](ContextNode *
Caller,
4845 const FuncInfo &CalleeFunc) {
4847 CallsiteToCalleeFuncCloneMap[
Caller] = CalleeFunc;
4851 struct FuncCloneInfo {
4856 DenseMap<CallInfo, CallInfo> CallMap;
4884 DenseMap<const ContextNode *, std::map<unsigned, SmallVector<CallInfo, 0>>>
4885 UnassignedCallClones;
4889 for (
auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) {
4890 FuncInfo OrigFunc(Func);
4895 std::vector<FuncCloneInfo> FuncCloneInfos;
4896 for (
auto &
Call : CallsWithMetadata) {
4897 ContextNode *
Node = getNodeForInst(
Call);
4901 if (!Node ||
Node->Clones.empty())
4904 "Not having a call should have prevented cloning");
4908 std::map<FuncInfo, ContextNode *> FuncCloneToCurNodeCloneMap;
4912 auto AssignCallsiteCloneToFuncClone = [&](
const FuncInfo &FuncClone,
4914 ContextNode *CallsiteClone,
4917 FuncCloneToCurNodeCloneMap[FuncClone] = CallsiteClone;
4919 assert(FuncCloneInfos.size() > FuncClone.cloneNo());
4920 DenseMap<CallInfo, CallInfo> &CallMap =
4921 FuncCloneInfos[FuncClone.cloneNo()].CallMap;
4922 CallInfo CallClone(
Call);
4923 if (
auto It = CallMap.
find(
Call); It != CallMap.
end())
4924 CallClone = It->second;
4925 CallsiteClone->setCall(CallClone);
4927 for (
auto &MatchingCall :
Node->MatchingCalls) {
4928 CallInfo CallClone(MatchingCall);
4929 if (
auto It = CallMap.
find(MatchingCall); It != CallMap.
end())
4930 CallClone = It->second;
4932 MatchingCall = CallClone;
4940 auto MoveEdgeToNewCalleeCloneAndSetUp =
4941 [&](
const std::shared_ptr<ContextEdge> &
Edge) {
4942 ContextNode *OrigCallee =
Edge->Callee;
4943 ContextNode *NewClone = moveEdgeToNewCalleeClone(
Edge);
4944 removeNoneTypeCalleeEdges(NewClone);
4945 assert(NewClone->AllocTypes != (uint8_t)AllocationType::None);
4949 if (CallsiteToCalleeFuncCloneMap.
count(OrigCallee))
4950 RecordCalleeFuncOfCallsite(
4951 NewClone, CallsiteToCalleeFuncCloneMap[OrigCallee]);
4958 std::deque<ContextNode *> ClonesWorklist;
4960 if (!
Node->emptyContextIds())
4961 ClonesWorklist.push_back(Node);
4967 unsigned NodeCloneCount = 0;
4968 while (!ClonesWorklist.empty()) {
4969 ContextNode *Clone = ClonesWorklist.front();
4970 ClonesWorklist.pop_front();
4979 if (FuncCloneInfos.size() < NodeCloneCount) {
4981 if (NodeCloneCount == 1) {
4986 Clone->CallerEdges, [&](
const std::shared_ptr<ContextEdge> &
E) {
4987 return CallsiteToCalleeFuncCloneMap.count(E->Caller);
4991 FuncCloneInfos.push_back(
4992 {OrigFunc, DenseMap<CallInfo, CallInfo>()});
4993 AssignCallsiteCloneToFuncClone(
4994 OrigFunc,
Call, Clone,
4995 AllocationCallToContextNodeMap.count(
Call));
4996 for (
auto &CE : Clone->CallerEdges) {
4998 if (!
CE->Caller->hasCall())
5000 RecordCalleeFuncOfCallsite(
CE->Caller, OrigFunc);
5010 FuncInfo PreviousAssignedFuncClone;
5012 Clone->CallerEdges, [&](
const std::shared_ptr<ContextEdge> &
E) {
5013 return CallsiteToCalleeFuncCloneMap.count(E->Caller);
5015 bool CallerAssignedToCloneOfFunc =
false;
5016 if (EI != Clone->CallerEdges.end()) {
5017 const std::shared_ptr<ContextEdge> &
Edge = *EI;
5018 PreviousAssignedFuncClone =
5019 CallsiteToCalleeFuncCloneMap[
Edge->Caller];
5020 CallerAssignedToCloneOfFunc =
true;
5025 DenseMap<CallInfo, CallInfo> NewCallMap;
5026 unsigned CloneNo = FuncCloneInfos.size();
5027 assert(CloneNo > 0 &&
"Clone 0 is the original function, which "
5028 "should already exist in the map");
5029 FuncInfo NewFuncClone = cloneFunctionForCallsite(
5030 OrigFunc,
Call, NewCallMap, CallsWithMetadata, CloneNo);
5031 FuncCloneInfos.push_back({NewFuncClone, std::move(NewCallMap)});
5032 FunctionClonesAnalysis++;
5038 if (!CallerAssignedToCloneOfFunc) {
5039 AssignCallsiteCloneToFuncClone(
5040 NewFuncClone,
Call, Clone,
5041 AllocationCallToContextNodeMap.count(
Call));
5042 for (
auto &CE : Clone->CallerEdges) {
5044 if (!
CE->Caller->hasCall())
5046 RecordCalleeFuncOfCallsite(
CE->Caller, NewFuncClone);
5058 auto CallerEdges = Clone->CallerEdges;
5059 for (
auto CE : CallerEdges) {
5061 if (
CE->isRemoved()) {
5067 if (!
CE->Caller->hasCall())
5070 if (!CallsiteToCalleeFuncCloneMap.
count(
CE->Caller) ||
5074 CallsiteToCalleeFuncCloneMap[
CE->Caller] !=
5075 PreviousAssignedFuncClone)
5078 RecordCalleeFuncOfCallsite(
CE->Caller, NewFuncClone);
5091 auto CalleeEdges =
CE->Caller->CalleeEdges;
5092 for (
auto CalleeEdge : CalleeEdges) {
5095 if (CalleeEdge->isRemoved()) {
5100 ContextNode *
Callee = CalleeEdge->Callee;
5104 if (Callee == Clone || !
Callee->hasCall())
5109 if (Callee == CalleeEdge->Caller)
5111 ContextNode *NewClone =
5112 MoveEdgeToNewCalleeCloneAndSetUp(CalleeEdge);
5115 removeNoneTypeCalleeEdges(Callee);
5123 CallInfo OrigCall(
Callee->getOrigNode()->Call);
5124 OrigCall.setCloneNo(0);
5125 DenseMap<CallInfo, CallInfo> &CallMap =
5126 FuncCloneInfos[NewFuncClone.cloneNo()].CallMap;
5128 CallInfo NewCall(CallMap[OrigCall]);
5130 NewClone->setCall(NewCall);
5132 for (
auto &MatchingCall : NewClone->MatchingCalls) {
5133 CallInfo OrigMatchingCall(MatchingCall);
5134 OrigMatchingCall.setCloneNo(0);
5136 CallInfo NewCall(CallMap[OrigMatchingCall]);
5139 MatchingCall = NewCall;
5148 auto FindFirstAvailFuncClone = [&]() {
5153 for (
auto &CF : FuncCloneInfos) {
5154 if (!FuncCloneToCurNodeCloneMap.count(CF.FuncClone))
5155 return CF.FuncClone;
5158 "Expected an available func clone for this callsite clone");
5175 std::map<FuncInfo, ContextNode *> FuncCloneToNewCallsiteCloneMap;
5176 FuncInfo FuncCloneAssignedToCurCallsiteClone;
5180 auto CloneCallerEdges = Clone->CallerEdges;
5181 for (
auto &
Edge : CloneCallerEdges) {
5185 if (
Edge->isRemoved())
5188 if (!
Edge->Caller->hasCall())
5192 if (CallsiteToCalleeFuncCloneMap.
count(
Edge->Caller)) {
5193 FuncInfo FuncCloneCalledByCaller =
5194 CallsiteToCalleeFuncCloneMap[
Edge->Caller];
5204 if ((FuncCloneToCurNodeCloneMap.count(FuncCloneCalledByCaller) &&
5205 FuncCloneToCurNodeCloneMap[FuncCloneCalledByCaller] !=
5213 (FuncCloneAssignedToCurCallsiteClone &&
5214 FuncCloneAssignedToCurCallsiteClone !=
5215 FuncCloneCalledByCaller)) {
5230 if (FuncCloneToNewCallsiteCloneMap.count(
5231 FuncCloneCalledByCaller)) {
5232 ContextNode *NewClone =
5233 FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller];
5234 moveEdgeToExistingCalleeClone(
Edge, NewClone);
5236 removeNoneTypeCalleeEdges(NewClone);
5239 ContextNode *NewClone = MoveEdgeToNewCalleeCloneAndSetUp(
Edge);
5240 FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller] =
5243 ClonesWorklist.push_back(NewClone);
5247 removeNoneTypeCalleeEdges(Clone);
5255 if (!FuncCloneAssignedToCurCallsiteClone) {
5256 FuncCloneAssignedToCurCallsiteClone = FuncCloneCalledByCaller;
5258 AssignCallsiteCloneToFuncClone(
5259 FuncCloneCalledByCaller,
Call, Clone,
5260 AllocationCallToContextNodeMap.count(
Call));
5264 assert(FuncCloneAssignedToCurCallsiteClone ==
5265 FuncCloneCalledByCaller);
5274 if (!FuncCloneAssignedToCurCallsiteClone) {
5275 FuncCloneAssignedToCurCallsiteClone = FindFirstAvailFuncClone();
5276 assert(FuncCloneAssignedToCurCallsiteClone);
5278 AssignCallsiteCloneToFuncClone(
5279 FuncCloneAssignedToCurCallsiteClone,
Call, Clone,
5280 AllocationCallToContextNodeMap.count(
Call));
5282 assert(FuncCloneToCurNodeCloneMap
5283 [FuncCloneAssignedToCurCallsiteClone] == Clone);
5285 RecordCalleeFuncOfCallsite(
Edge->Caller,
5286 FuncCloneAssignedToCurCallsiteClone);
5306 if (!FuncCloneAssignedToCurCallsiteClone) {
5307 FuncCloneAssignedToCurCallsiteClone = FindFirstAvailFuncClone();
5308 assert(FuncCloneAssignedToCurCallsiteClone &&
5309 "No available func clone for this callsite clone");
5310 AssignCallsiteCloneToFuncClone(
5311 FuncCloneAssignedToCurCallsiteClone,
Call, Clone,
5312 AllocationCallToContextNodeMap.contains(
Call));
5317 for (
const auto &PE :
Node->CalleeEdges)
5319 for (
const auto &CE :
Node->CallerEdges)
5321 for (
auto *Clone :
Node->Clones) {
5323 for (
const auto &PE : Clone->CalleeEdges)
5325 for (
const auto &CE : Clone->CallerEdges)
5331 if (FuncCloneInfos.size() < 2)
5337 for (
auto &
Call : CallsWithMetadata) {
5338 ContextNode *
Node = getNodeForInst(
Call);
5339 if (!Node || !
Node->hasCall() ||
Node->emptyContextIds())
5345 if (
Node->Clones.size() + 1 >= FuncCloneInfos.size())
5349 DenseSet<unsigned> NodeCallClones;
5350 for (
auto *
C :
Node->Clones)
5351 NodeCallClones.
insert(
C->Call.cloneNo());
5354 for (
auto &FC : FuncCloneInfos) {
5359 if (++
I == 1 || NodeCallClones.
contains(
I)) {
5364 auto &CallVector = UnassignedCallClones[
Node][
I];
5365 DenseMap<CallInfo, CallInfo> &CallMap =
FC.CallMap;
5366 if (
auto It = CallMap.
find(
Call); It != CallMap.
end()) {
5367 CallInfo CallClone = It->second;
5368 CallVector.push_back(CallClone);
5372 assert(
false &&
"Expected to find call in CallMap");
5375 for (
auto &MatchingCall :
Node->MatchingCalls) {
5376 if (
auto It = CallMap.
find(MatchingCall); It != CallMap.
end()) {
5377 CallInfo CallClone = It->second;
5378 CallVector.push_back(CallClone);
5382 assert(
false &&
"Expected to find call in CallMap");
5390 (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
5392 auto UpdateCalls = [&](ContextNode *
Node,
5393 DenseSet<const ContextNode *> &Visited,
5394 auto &&UpdateCalls) {
5395 auto Inserted = Visited.insert(Node);
5399 for (
auto *Clone :
Node->Clones)
5400 UpdateCalls(Clone, Visited, UpdateCalls);
5402 for (
auto &
Edge :
Node->CallerEdges)
5403 UpdateCalls(
Edge->Caller, Visited, UpdateCalls);
5407 if (!
Node->hasCall() ||
Node->emptyContextIds())
5410 if (
Node->IsAllocation) {
5411 auto AT = allocTypeToUse(
Node->AllocTypes);
5417 !ContextIdToContextSizeInfos.empty()) {
5418 uint64_t TotalCold = 0;
5420 for (
auto Id :
Node->getContextIds()) {
5421 auto TypeI = ContextIdToAllocationType.find(Id);
5422 assert(TypeI != ContextIdToAllocationType.end());
5423 auto CSI = ContextIdToContextSizeInfos.find(Id);
5424 if (CSI != ContextIdToContextSizeInfos.end()) {
5425 for (
auto &Info : CSI->second) {
5427 if (TypeI->second == AllocationType::Cold)
5428 TotalCold +=
Info.TotalSize;
5433 AT = AllocationType::Cold;
5435 updateAllocationCall(
Node->Call, AT);
5440 if (!CallsiteToCalleeFuncCloneMap.
count(Node))
5443 auto CalleeFunc = CallsiteToCalleeFuncCloneMap[
Node];
5444 updateCall(
Node->Call, CalleeFunc);
5446 for (
auto &
Call :
Node->MatchingCalls)
5447 updateCall(
Call, CalleeFunc);
5451 if (!UnassignedCallClones.
contains(Node))
5453 DenseSet<unsigned> NodeCallClones;
5454 for (
auto *
C :
Node->Clones)
5455 NodeCallClones.
insert(
C->Call.cloneNo());
5457 auto &ClonedCalls = UnassignedCallClones[
Node];
5458 for (
auto &[CloneNo, CallVector] : ClonedCalls) {
5462 if (NodeCallClones.
contains(CloneNo))
5465 for (
auto &
Call : CallVector)
5466 updateCall(
Call, CalleeFunc);
5475 DenseSet<const ContextNode *> Visited;
5476 for (
auto &Entry : AllocationCallToContextNodeMap)
5477 UpdateCalls(
Entry.second, Visited, UpdateCalls);
5488 for (
auto &SN : FS->callsites()) {
5493 SN.Clones.size() >
I &&
5494 "Callsite summary has fewer entries than other summaries in function");
5495 if (SN.Clones.size() <=
I || !SN.Clones[
I])
5502 for (
auto &AN : FS->allocs()) {
5506 assert(AN.Versions.size() >
I &&
5507 "Alloc summary has fewer entries than other summaries in function");
5508 if (AN.Versions.size() <=
I ||
5525 NewGV->takeName(DeclGV);
5532 auto CloneFuncAliases = [&](
Function *NewF,
unsigned I) {
5533 if (!FuncToAliasMap.count(&
F))
5535 for (
auto *
A : FuncToAliasMap[&
F]) {
5537 auto *PrevA = M.getNamedAlias(AliasName);
5539 A->getType()->getPointerAddressSpace(),
5540 A->getLinkage(), AliasName, NewF);
5541 NewA->copyAttributesFrom(
A);
5543 TakeDeclNameAndReplace(PrevA, NewA);
5552 FunctionsClonedThinBackend++;
5569 for (
unsigned I = 1;
I < NumClones;
I++) {
5570 VMaps.
emplace_back(std::make_unique<ValueToValueMapTy>());
5577 FunctionCloneDuplicatesThinBackend++;
5578 auto *Func = HashToFunc[Hash];
5579 if (Func->hasAvailableExternallyLinkage()) {
5585 auto Decl = M.getOrInsertFunction(Name, Func->getFunctionType());
5587 <<
"created clone decl " <<
ore::NV(
"Decl", Decl.getCallee()));
5590 auto *PrevF = M.getFunction(Name);
5593 TakeDeclNameAndReplace(PrevF, Alias);
5595 <<
"created clone alias " <<
ore::NV(
"Alias", Alias));
5598 CloneFuncAliases(Func,
I);
5602 HashToFunc[Hash] = NewF;
5603 FunctionClonesThinBackend++;
5606 for (
auto &BB : *NewF) {
5607 for (
auto &Inst : BB) {
5608 Inst.setMetadata(LLVMContext::MD_memprof,
nullptr);
5609 Inst.setMetadata(LLVMContext::MD_callsite,
nullptr);
5614 TakeDeclNameAndReplace(PrevF, NewF);
5616 NewF->setName(Name);
5619 <<
"created clone " <<
ore::NV(
"NewFunction", NewF));
5622 CloneFuncAliases(NewF,
I);
5631 const Function *CallingFunc =
nullptr) {
5650 auto SrcFileMD =
F.getMetadata(
"thinlto_src_file");
5656 if (!SrcFileMD &&
F.isDeclaration()) {
5660 SrcFileMD = CallingFunc->getMetadata(
"thinlto_src_file");
5665 assert(SrcFileMD || OrigName ==
F.getName());
5667 StringRef SrcFile = M.getSourceFileName();
5679 if (!TheFnVI && OrigName ==
F.getName() &&
F.hasLocalLinkage() &&
5680 F.getName().contains(
'.')) {
5681 OrigName =
F.getName().rsplit(
'.').first;
5690 assert(TheFnVI ||
F.isDeclaration());
5694bool MemProfContextDisambiguation::initializeIndirectCallPromotionInfo(
5696 ICallAnalysis = std::make_unique<ICallPromotionAnalysis>();
5697 Symtab = std::make_unique<InstrProfSymtab>();
5708 if (
Error E = Symtab->create(M,
true,
false)) {
5709 std::string SymtabFailure =
toString(std::move(
E));
5710 M.getContext().emitError(
"Failed to create symtab: " + SymtabFailure);
5723 auto MIBIter = AllocNode.
MIBs.begin();
5724 for (
auto &MDOp : MemProfMD->
operands()) {
5726 auto StackIdIndexIter = MIBIter->StackIdIndices.begin();
5731 auto ContextIterBegin =
5735 (ContextIterBegin != StackContext.
end() && *ContextIterBegin == 0) ? 1
5737 for (
auto ContextIter = ContextIterBegin; ContextIter != StackContext.
end();
5742 if (LastStackContextId == *ContextIter)
5744 LastStackContextId = *ContextIter;
5745 assert(StackIdIndexIter != MIBIter->StackIdIndices.end());
5755bool MemProfContextDisambiguation::applyImport(
Module &M) {
5762 std::map<const Function *, SmallPtrSet<const GlobalAlias *, 1>>
5764 for (
auto &
A :
M.aliases()) {
5765 auto *Aliasee =
A.getAliaseeObject();
5767 FuncToAliasMap[
F].insert(&
A);
5770 if (!initializeIndirectCallPromotionInfo(M))
5777 OptimizationRemarkEmitter ORE(&
F);
5780 bool ClonesCreated =
false;
5781 unsigned NumClonesCreated = 0;
5782 auto CloneFuncIfNeeded = [&](
unsigned NumClones, FunctionSummary *
FS) {
5792 if (ClonesCreated) {
5793 assert(NumClonesCreated == NumClones);
5800 ClonesCreated =
true;
5801 NumClonesCreated = NumClones;
5804 auto CloneCallsite = [&](
const CallsiteInfo &StackNode, CallBase *CB,
5805 Function *CalledFunction, FunctionSummary *
FS) {
5807 CloneFuncIfNeeded(StackNode.
Clones.
size(), FS);
5819 if (CalledFunction != CB->getCalledOperand() &&
5820 (!GA || CalledFunction != GA->getAliaseeObject())) {
5821 SkippedCallsCloning++;
5827 auto CalleeOrigName = CalledFunction->getName();
5828 for (
unsigned J = 0; J < StackNode.
Clones.
size(); J++) {
5831 if (J > 0 && VMaps[J - 1]->
empty())
5835 if (!StackNode.
Clones[J])
5837 auto NewF =
M.getOrInsertFunction(
5839 CalledFunction->getFunctionType());
5853 ORE.emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofCall", CBClone)
5854 <<
ore::NV(
"Call", CBClone) <<
" in clone "
5856 <<
" assigned to call function clone "
5857 <<
ore::NV(
"Callee", NewF.getCallee()));
5871 ImportSummary->findSummaryInModule(TheFnVI,
M.getModuleIdentifier());
5875 auto SrcModuleMD =
F.getMetadata(
"thinlto_src_module");
5877 "enable-import-metadata is needed to emit thinlto_src_module");
5878 StringRef SrcModule =
5881 if (GVS->modulePath() == SrcModule) {
5882 GVSummary = GVS.get();
5886 assert(GVSummary && GVSummary->modulePath() == SrcModule);
5896 if (
FS->allocs().empty() &&
FS->callsites().empty())
5899 auto SI =
FS->callsites().begin();
5900 auto AI =
FS->allocs().begin();
5905 DenseMap<ValueInfo, CallsiteInfo> MapTailCallCalleeVIToCallsite;
5908 for (
auto CallsiteIt =
FS->callsites().rbegin();
5909 CallsiteIt !=
FS->callsites().rend(); CallsiteIt++) {
5910 auto &Callsite = *CallsiteIt;
5914 if (!Callsite.StackIdIndices.empty())
5916 MapTailCallCalleeVIToCallsite.
insert({Callsite.Callee, Callsite});
5925 for (
auto &BB :
F) {
5926 for (
auto &
I : BB) {
5932 auto *CalledValue = CB->getCalledOperand();
5933 auto *CalledFunction = CB->getCalledFunction();
5934 if (CalledValue && !CalledFunction) {
5935 CalledValue = CalledValue->stripPointerCasts();
5942 assert(!CalledFunction &&
5943 "Expected null called function in callsite for alias");
5947 CallStack<MDNode, MDNode::op_iterator> CallsiteContext(
5948 I.getMetadata(LLVMContext::MD_callsite));
5949 auto *MemProfMD =
I.getMetadata(LLVMContext::MD_memprof);
5955 if (CB->getAttributes().hasFnAttr(
"memprof") && !MemProfMD) {
5956 CB->getAttributes().getFnAttr(
"memprof").getValueAsString() ==
"cold"
5957 ? AllocTypeColdThinBackend++
5958 : AllocTypeNotColdThinBackend++;
5959 OrigAllocsThinBackend++;
5960 AllocVersionsThinBackend++;
5961 if (!MaxAllocVersionsThinBackend)
5962 MaxAllocVersionsThinBackend = 1;
5969 auto &AllocNode = *(AI++);
5977 CloneFuncIfNeeded(AllocNode.Versions.size(), FS);
5979 OrigAllocsThinBackend++;
5980 AllocVersionsThinBackend += AllocNode.Versions.size();
5981 if (MaxAllocVersionsThinBackend < AllocNode.Versions.size())
5982 MaxAllocVersionsThinBackend = AllocNode.Versions.size();
5992 if (AllocNode.Versions.size() == 1 &&
5995 AllocationType::NotCold ||
5997 AllocationType::None);
5998 UnclonableAllocsThinBackend++;
6004 return Type == ((uint8_t)AllocationType::NotCold |
6005 (uint8_t)AllocationType::Cold);
6009 for (
unsigned J = 0; J < AllocNode.Versions.size(); J++) {
6012 if (J > 0 && VMaps[J - 1]->
empty())
6015 if (AllocNode.Versions[J] == (uint8_t)AllocationType::None)
6018 AllocTy == AllocationType::Cold ? AllocTypeColdThinBackend++
6019 : AllocTypeNotColdThinBackend++;
6034 ORE.emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofAttribute", CBClone)
6035 <<
ore::NV(
"AllocationCall", CBClone) <<
" in clone "
6037 <<
" marked with memprof allocation attribute "
6038 <<
ore::NV(
"Attribute", AllocTypeString));
6040 }
else if (!CallsiteContext.empty()) {
6041 if (!CalledFunction) {
6045 assert(!CI || !CI->isInlineAsm());
6055 recordICPInfo(CB,
FS->callsites(), SI, ICallAnalysisInfo);
6061 CloneFuncIfNeeded(NumClones, FS);
6066 assert(SI !=
FS->callsites().end());
6067 auto &StackNode = *(
SI++);
6073 for (
auto StackId : CallsiteContext) {
6075 assert(ImportSummary->getStackIdAtIndex(*StackIdIndexIter) ==
6081 CloneCallsite(StackNode, CB, CalledFunction, FS);
6083 }
else if (CB->isTailCall() && CalledFunction) {
6086 ValueInfo CalleeVI =
6088 if (CalleeVI && MapTailCallCalleeVIToCallsite.
count(CalleeVI)) {
6089 auto Callsite = MapTailCallCalleeVIToCallsite.
find(CalleeVI);
6090 assert(Callsite != MapTailCallCalleeVIToCallsite.
end());
6091 CloneCallsite(Callsite->second, CB, CalledFunction, FS);
6098 performICP(M,
FS->callsites(), VMaps, ICallAnalysisInfo, ORE);
6108 for (
auto &BB :
F) {
6109 for (
auto &
I : BB) {
6112 I.setMetadata(LLVMContext::MD_memprof,
nullptr);
6113 I.setMetadata(LLVMContext::MD_callsite,
nullptr);
6121unsigned MemProfContextDisambiguation::recordICPInfo(
6126 uint32_t NumCandidates;
6127 uint64_t TotalCount;
6128 auto CandidateProfileData =
6129 ICallAnalysis->getPromotionCandidatesForInstruction(
6131 if (CandidateProfileData.empty())
6137 bool ICPNeeded =
false;
6138 unsigned NumClones = 0;
6139 size_t CallsiteInfoStartIndex = std::distance(AllCallsites.
begin(), SI);
6140 for (
const auto &Candidate : CandidateProfileData) {
6142 auto CalleeValueInfo =
6144 ImportSummary->getValueInfo(Candidate.Value);
6147 assert(!CalleeValueInfo ||
SI->Callee == CalleeValueInfo);
6149 auto &StackNode = *(
SI++);
6154 [](
unsigned CloneNo) { return CloneNo != 0; });
6164 ICallAnalysisInfo.
push_back({CB, CandidateProfileData.vec(), NumCandidates,
6165 TotalCount, CallsiteInfoStartIndex});
6169void MemProfContextDisambiguation::performICP(
6171 ArrayRef<std::unique_ptr<ValueToValueMapTy>> VMaps,
6173 OptimizationRemarkEmitter &ORE) {
6180 for (
auto &Info : ICallAnalysisInfo) {
6183 auto TotalCount =
Info.TotalCount;
6184 unsigned NumPromoted = 0;
6185 unsigned NumClones = 0;
6187 for (
auto &Candidate :
Info.CandidateProfileData) {
6198 Function *TargetFunction = Symtab->getFunction(Candidate.Value);
6199 if (TargetFunction ==
nullptr ||
6207 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnableToFindTarget", CB)
6208 <<
"Memprof cannot promote indirect call: target with md5sum "
6209 <<
ore::NV(
"target md5sum", Candidate.Value) <<
" not found";
6218 const char *Reason =
nullptr;
6221 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnableToPromote", CB)
6222 <<
"Memprof cannot promote indirect call to "
6223 <<
ore::NV(
"TargetFunction", TargetFunction)
6224 <<
" with count of " <<
ore::NV(
"TotalCount", TotalCount)
6235 CallBase *CBClone = CB;
6236 for (
unsigned J = 0; J < NumClones; J++) {
6239 if (J > 0 && VMaps[J - 1]->
empty())
6249 TotalCount, isSamplePGO, &ORE);
6250 auto *TargetToUse = TargetFunction;
6253 if (StackNode.
Clones[J]) {
6272 <<
ore::NV(
"Call", CBClone) <<
" in clone "
6274 <<
" promoted and assigned to call function clone "
6275 <<
ore::NV(
"Callee", TargetToUse));
6279 TotalCount -= Candidate.Count;
6284 CallBase *CBClone = CB;
6285 for (
unsigned J = 0; J < NumClones; J++) {
6288 if (J > 0 && VMaps[J - 1]->
empty())
6294 CBClone->
setMetadata(LLVMContext::MD_prof,
nullptr);
6297 if (TotalCount != 0)
6299 M, *CBClone,
ArrayRef(
Info.CandidateProfileData).slice(NumPromoted),
6300 TotalCount, IPVK_IndirectCallTarget,
Info.NumCandidates);
6305template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
6306bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::process(
6307 function_ref<
void(StringRef, StringRef,
const Twine &)> EmitRemark) {
6309 dbgs() <<
"CCG before cloning:\n";
6313 exportToDot(
"postbuild");
6326 dbgs() <<
"CCG after cloning:\n";
6330 exportToDot(
"cloned");
6332 bool Changed = assignFunctions();
6335 dbgs() <<
"CCG after assigning function clones:\n";
6339 exportToDot(
"clonefuncassign");
6342 printTotalSizes(
errs(), EmitRemark);
6347bool MemProfContextDisambiguation::processModule(
6349 llvm::function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter) {
6354 return applyImport(M);
6367 ModuleCallsiteContextGraph CCG(M, OREGetter);
6368 return CCG.process();
6373 : ImportSummary(Summary), isSamplePGO(isSamplePGO) {
6378 "-memprof-dot-scope=alloc requires -memprof-dot-alloc-id");
6382 "-memprof-dot-scope=context requires -memprof-dot-context-id");
6386 "-memprof-dot-scope=all can't have both -memprof-dot-alloc-id and "
6387 "-memprof-dot-context-id");
6388 if (ImportSummary) {
6398 auto ReadSummaryFile =
6400 if (!ReadSummaryFile) {
6407 if (!ImportSummaryForTestingOrErr) {
6413 ImportSummaryForTesting = std::move(*ImportSummaryForTestingOrErr);
6414 ImportSummary = ImportSummaryForTesting.get();
6423 if (!processModule(M, OREGetter))
6441 IndexCallsiteContextGraph CCG(Index, isPrevailing);
6442 CCG.process(EmitRemark);
6457 for (
auto &BB :
F) {
6458 for (
auto &
I : BB) {
6462 if (CI->hasFnAttr(
"memprof")) {
6463 CI->removeFnAttr(
"memprof");
6466 if (!CI->hasMetadata(LLVMContext::MD_callsite)) {
6467 assert(!CI->hasMetadata(LLVMContext::MD_memprof));
6473 CI->setMetadata(LLVMContext::MD_memprof,
nullptr);
6474 CI->setMetadata(LLVMContext::MD_callsite,
nullptr);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Unify divergent function exit nodes
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
Module.h This file contains the declarations for the Module class.
Machine Check Debug Module
This file implements a map that provides insertion order iteration.
static cl::opt< unsigned > TailCallSearchDepth("memprof-tail-call-search-depth", cl::init(5), cl::Hidden, cl::desc("Max depth to recursively search for missing " "frames through tail calls."))
uint64_t ComputeHash(const FunctionSummary *FS, unsigned I)
static cl::opt< DotScope > DotGraphScope("memprof-dot-scope", cl::desc("Scope of graph to export to dot"), cl::Hidden, cl::init(DotScope::All), cl::values(clEnumValN(DotScope::All, "all", "Export full callsite graph"), clEnumValN(DotScope::Alloc, "alloc", "Export only nodes with contexts feeding given " "-memprof-dot-alloc-id"), clEnumValN(DotScope::Context, "context", "Export only nodes with given -memprof-dot-context-id")))
static cl::opt< bool > DoMergeIteration("memprof-merge-iteration", cl::init(true), cl::Hidden, cl::desc("Iteratively apply merging on a node to catch new callers"))
static bool isMemProfClone(const Function &F)
static cl::opt< unsigned > AllocIdForDot("memprof-dot-alloc-id", cl::init(0), cl::Hidden, cl::desc("Id of alloc to export if -memprof-dot-scope=alloc " "or to highlight if -memprof-dot-scope=all"))
static cl::opt< unsigned > ContextIdForDot("memprof-dot-context-id", cl::init(0), cl::Hidden, cl::desc("Id of context to export if -memprof-dot-scope=context or to " "highlight otherwise"))
static cl::opt< bool > ExportToDot("memprof-export-to-dot", cl::init(false), cl::Hidden, cl::desc("Export graph to dot files."))
static void checkEdge(const std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > &Edge)
static cl::opt< bool > AllowRecursiveCallsites("memprof-allow-recursive-callsites", cl::init(true), cl::Hidden, cl::desc("Allow cloning of callsites involved in recursive cycles"))
bool checkColdOrNotCold(uint8_t AllocType)
static ValueInfo findValueInfoForFunc(const Function &F, const Module &M, const ModuleSummaryIndex *ImportSummary, const Function *CallingFunc=nullptr)
static cl::opt< bool > CloneRecursiveContexts("memprof-clone-recursive-contexts", cl::init(true), cl::Hidden, cl::desc("Allow cloning of contexts through recursive cycles"))
static std::string getAllocTypeString(uint8_t AllocTypes)
static cl::opt< unsigned > MemProfICPNoInlineThreshold("memprof-icp-noinline-threshold", cl::init(2), cl::Hidden, cl::desc("Minimum absolute count for promoted target to be inlinable"))
bool DOTGraphTraits< constCallsiteContextGraph< DerivedCCG, FuncTy, CallTy > * >::DoHighlight
static unsigned getMemProfCloneNum(const Function &F)
static SmallVector< std::unique_ptr< ValueToValueMapTy >, 4 > createFunctionClones(Function &F, unsigned NumClones, Module &M, OptimizationRemarkEmitter &ORE, std::map< const Function *, SmallPtrSet< const GlobalAlias *, 1 > > &FuncToAliasMap, FunctionSummary *FS)
static cl::opt< bool > VerifyCCG("memprof-verify-ccg", cl::init(false), cl::Hidden, cl::desc("Perform verification checks on CallingContextGraph."))
static void checkNode(const ContextNode< DerivedCCG, FuncTy, CallTy > *Node, bool CheckEdges=true)
static cl::opt< bool > MergeClones("memprof-merge-clones", cl::init(true), cl::Hidden, cl::desc("Merge clones before assigning functions"))
static std::string getMemProfFuncName(Twine Base, unsigned CloneNo)
static cl::opt< std::string > MemProfImportSummary("memprof-import-summary", cl::desc("Import summary to use for testing the ThinLTO backend via opt"), cl::Hidden)
static const std::string MemProfCloneSuffix
static void updateSubprogramLinkageName(Function *NewFunc, StringRef Name)
static cl::opt< bool > AllowRecursiveContexts("memprof-allow-recursive-contexts", cl::init(true), cl::Hidden, cl::desc("Allow cloning of contexts having recursive cycles"))
static cl::opt< std::string > DotFilePathPrefix("memprof-dot-file-path-prefix", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path prefix of the MemProf dot files."))
static cl::opt< bool > VerifyNodes("memprof-verify-nodes", cl::init(false), cl::Hidden, cl::desc("Perform frequent verification checks on nodes."))
static void checkAllocContextIds(const AllocInfo &AllocNode, const MDNode *MemProfMD, const CallStack< MDNode, MDNode::op_iterator > &CallsiteContext, const ModuleSummaryIndex *ImportSummary)
static cl::opt< bool > DumpCCG("memprof-dump-ccg", cl::init(false), cl::Hidden, cl::desc("Dump CallingContextGraph to stdout after each stage."))
This is the interface to build a ModuleSummaryIndex for a module.
ModuleSummaryIndex.h This file contains the declarations the classes that hold the module index and s...
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
FunctionAnalysisManager FAM
std::pair< BasicBlock *, BasicBlock * > Edge
This file defines generic set operations that may be used on set's of different types,...
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
void print(OutputBuffer &OB) const
ValueInfo getAliaseeVI() const
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
bool empty() const
empty - Check if the array is empty.
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
void addFnAttr(Attribute::AttrKind Kind)
Adds the attribute to the function.
void setCalledOperand(Value *V)
Subprogram description. Uses SubclassData1.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
void reserve(size_type NumEntries)
Grow the densemap so that it can contain at least NumEntries items before resizing again.
Implements a dense probed hash-table based set.
Function summary information to aid decisions and implementation of importing.
FunctionType * getFunctionType() const
Returns the FunctionType for me.
DISubprogram * getSubprogram() const
Get the attached subprogram.
const Function & getFunction() const
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
static LLVM_ABI GlobalAlias * create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent)
If a parent module is specified, the alias is automatically inserted into the end of the specified mo...
Function and variable summary information to aid decisions and implementation of importing.
static LLVM_ABI GUID getGUIDAssumingExternalLinkage(StringRef GlobalName)
Return a 64-bit global unique ID constructed from the name of a global symbol.
static bool isLocalLinkage(LinkageTypes Linkage)
LLVM_ABI bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
uint64_t GUID
Declare a type to represent a global unique identifier for a global value.
Module * getParent()
Get the module that this global value is contained inside of...
LLVM_ABI void eraseFromParent()
This method unlinks 'this' from the containing module and deletes it.
static LLVM_ABI std::string getGlobalIdentifier(StringRef Name, GlobalValue::LinkageTypes Linkage, StringRef FileName)
Return the modified name for a global value suitable to be used as the key for a global lookup (e....
bool isWeakForLinker() const
@ InternalLinkage
Rename collisions when linking (static functions).
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
const MDOperand & getOperand(unsigned I) const
ArrayRef< MDOperand > operands() const
unsigned getNumOperands() const
Return number of MDNode operands.
LLVM_ABI TempMDNode clone() const
Create a (temporary) clone of this.
static std::enable_if_t< std::is_base_of< MDNode, T >::value, T * > replaceWithUniqued(std::unique_ptr< T, TempMDNodeDeleter > N)
Replace a temporary node with a uniqued one.
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
This class implements a map that also provides access to all stored values in a deterministic order.
size_type count(const KeyT &Key) const
MemProfContextDisambiguation(const ModuleSummaryIndex *Summary=nullptr, bool isSamplePGO=false)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
Class to hold module path string table and global value map, and encapsulate methods for operating on...
static StringRef getOriginalNameBeforePromote(StringRef Name)
Helper to obtain the unpromoted name for a global value (or the original name if not promoted).
ValueInfo getValueInfo(const GlobalValueSummaryMapTy::value_type &R) const
Return a ValueInfo for the index value_type (convenient when iterating index).
uint64_t getStackIdAtIndex(unsigned Index) const
A Module instance is used to store all the information related to an LLVM module.
LLVMContext & getContext() const
Get the global data context.
A NodeSet contains a set of SUnit DAG nodes with additional information that assigns a priority to th...
A discriminated union of two or more pointer types, with the discriminator in the low bit of the poin...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
A class that wrap the SHA1 algorithm.
LLVM_ABI void update(ArrayRef< uint8_t > Data)
Digest more data.
LLVM_ABI std::array< uint8_t, 20 > result()
Return the current raw 160-bits SHA1 for the digested data since the last call to init().
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
std::pair< iterator, bool > insert(const ValueT &V)
void reserve(size_t Size)
Grow the DenseSet so that it can contain at least NumEntries items before resizing again.
void insert_range(Range &&R)
void swap(DenseSetImpl &RHS)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
bool erase(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
An efficient, type-erasing, non-owning reference to a callable.
Helper class to iterate through stack ids in both metadata (memprof MIB and callsite) and the corresp...
CallStackIterator beginAfterSharedPrefix(const CallStack &Other)
CallStackIterator end() const
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
@ CE
Windows NT (Windows on ARM)
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract(Y &&MD)
Extract a Value from Metadata, if any.
LLVM_ABI AllocationType getMIBAllocType(const MDNode *MIB)
Returns the allocation type from an MIB metadata node.
LLVM_ABI bool metadataMayIncludeContextSizeInfo()
Whether the alloc memprof metadata may include context size info for some MIBs (but possibly not all)...
LLVM_ABI bool hasSingleAllocType(uint8_t AllocTypes)
True if the AllocTypes bitmask contains just a single type.
LLVM_ABI std::string getAllocTypeAttributeString(AllocationType Type)
Returns the string to use in attributes with the given type.
LLVM_ABI MDNode * getMIBStackNode(const MDNode *MIB)
Returns the stack node from an MIB metadata node.
LLVM_ABI void removeAnyExistingAmbiguousAttribute(CallBase *CB)
Removes any existing "ambiguous" memprof attribute.
DiagnosticInfoOptimizationBase::Argument NV
LLVM_ABI CallBase & promoteIndirectCall(CallBase &CB, Function *F, uint64_t Count, uint64_t TotalCount, bool AttachProfToDirectCall, OptimizationRemarkEmitter *ORE)
NodeAddr< NodeBase * > Node
NodeAddr< FuncNode * > Func
friend class Instruction
Iterator for Instructions in a `BasicBlock.
uint64_t read64le(const void *P)
void write32le(void *P, uint32_t V)
This is an optimization pass for GlobalISel generic memory operations.
cl::opt< unsigned > MinClonedColdBytePercent("memprof-cloning-cold-threshold", cl::init(100), cl::Hidden, cl::desc("Min percent of cold bytes to hint alloc cold during cloning"))
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
LLVM_ABI void logAllUnhandledErrors(Error E, raw_ostream &OS, Twine ErrorBanner={})
Log all errors (if any) in E to OS.
FunctionAddr VTableAddr Value
void stable_sort(R &&Range)
cl::opt< bool > MemProfReportHintedSizes("memprof-report-hinted-sizes", cl::init(false), cl::Hidden, cl::desc("Report total allocation sizes of hinted allocations"))
LLVM_ABI bool isLegalToPromote(const CallBase &CB, Function *Callee, const char **FailureReason=nullptr)
Return true if the given indirect call site can be made to call Callee.
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI bool mayHaveMemprofSummary(const CallBase *CB)
Returns true if the instruction could have memprof metadata, used to ensure consistency between summa...
constexpr from_range_t from_range
static cl::opt< bool > MemProfRequireDefinitionForPromotion("memprof-require-definition-for-promotion", cl::init(false), cl::Hidden, cl::desc("Require target function definition when promoting indirect calls"))
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
cl::opt< unsigned > MemProfTopNImportant("memprof-top-n-important", cl::init(10), cl::Hidden, cl::desc("Number of largest cold contexts to consider important"))
bool set_is_subset(const S1Ty &S1, const S2Ty &S2)
set_is_subset(A, B) - Return true iff A in B
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
void set_subtract(S1Ty &S1, const S2Ty &S2)
set_subtract(A, B) - Compute A := A - B
InnerAnalysisManagerProxy< FunctionAnalysisManager, Module > FunctionAnalysisManagerModuleProxy
Provide the FunctionAnalysisManager to Module proxy.
raw_ostream & WriteGraph(raw_ostream &O, const GraphType &G, bool ShortNames=false, const Twine &Title="")
bool set_intersects(const S1Ty &S1, const S2Ty &S2)
set_intersects(A, B) - Return true iff A ^ B is non empty
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
LLVM_ABI Expected< std::unique_ptr< ModuleSummaryIndex > > getModuleSummaryIndex(MemoryBufferRef Buffer)
Parse the specified bitcode buffer, returning the module summary index.
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
cl::opt< unsigned > MaxSummaryIndirectEdges("module-summary-max-indirect-edges", cl::init(0), cl::Hidden, cl::desc("Max number of summary edges added from " "indirect call profile metadata"))
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
FunctionAddr VTableAddr Count
bool set_union(S1Ty &S1, const S2Ty &S2)
set_union(A, B) - Compute A := A u B, return whether A changed.
cl::opt< bool > SupportsHotColdNew
Indicate we are linking with an allocator that supports hot/cold operator new interfaces.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
S1Ty set_intersection(const S1Ty &S1, const S2Ty &S2)
set_intersection(A, B) - Return A ^ B
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
cl::opt< bool > EnableMemProfContextDisambiguation
Enable MemProf context disambiguation for thin link.
S1Ty set_difference(const S1Ty &S1, const S2Ty &S2)
set_difference(A, B) - Return A - B
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Expected< T > errorOrToExpected(ErrorOr< T > &&EO)
Convert an ErrorOr<T> to an Expected<T>.
ArrayRef(const T &OneElt) -> ArrayRef< T >
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
LLVM_ABI Function * CloneFunction(Function *F, ValueToValueMapTy &VMap, ClonedCodeInfo *CodeInfo=nullptr)
Return a copy of the specified function and add it to that function's module.
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
cl::opt< bool > MemProfFixupImportant("memprof-fixup-important", cl::init(true), cl::Hidden, cl::desc("Enables edge fixup for important contexts"))
DOTGraphTraits(bool IsSimple=false)
typename GTraits::NodeRef NodeRef
static std::string getEdgeAttributes(NodeRef, ChildIteratorType ChildIter, GraphType G)
const CallsiteContextGraph< DerivedCCG, FuncTy, CallTy > * GraphType
typename GTraits::ChildIteratorType ChildIteratorType
static std::string getNodeAttributes(NodeRef Node, GraphType G)
static bool isNodeHidden(NodeRef Node, GraphType G)
static std::string getNodeLabel(NodeRef Node, GraphType G)
GraphTraits< GraphType > GTraits
static NodeRef getNode(const NodePtrTy &P)
static const ContextNode< DerivedCCG, FuncTy, CallTy > * GetCallee(const EdgePtrTy &P)
static ChildIteratorType child_end(NodeRef N)
std::unique_ptr< ContextNode< DerivedCCG, FuncTy, CallTy > > NodePtrTy
mapped_iterator< typename std::vector< std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > >::const_iterator, decltype(&GetCallee)> ChildIteratorType
const CallsiteContextGraph< DerivedCCG, FuncTy, CallTy > * GraphType
const ContextNode< DerivedCCG, FuncTy, CallTy > * NodeRef
mapped_iterator< typename std::vector< NodePtrTy >::const_iterator, decltype(&getNode)> nodes_iterator
static ChildIteratorType child_begin(NodeRef N)
static NodeRef getEntryNode(GraphType G)
static nodes_iterator nodes_begin(GraphType G)
static nodes_iterator nodes_end(GraphType G)
std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > EdgePtrTy
Summary of memprof metadata on allocations.
std::vector< MIBInfo > MIBs
SmallVector< unsigned > StackIdIndices
SmallVector< unsigned > Clones
DOTGraphTraits - Template class that can be specialized to customize how graphs are converted to 'dot...
DefaultDOTGraphTraits(bool simple=false)
An information struct used to provide DenseMap with the various necessary components for a given valu...
typename GraphType::UnknownGraphTypeError NodeRef
Struct that holds a reference to a particular GUID in a global value summary.
ArrayRef< std::unique_ptr< GlobalValueSummary > > getSummaryList() const
GlobalValue::GUID getGUID() const
PointerUnion< CallsiteInfo *, AllocInfo * > SimpleType
static SimpleType getSimplifiedValue(IndexCall &Val)
const PointerUnion< CallsiteInfo *, AllocInfo * > SimpleType
static SimpleType getSimplifiedValue(const IndexCall &Val)
Define a template that can be specialized by smart pointers to reflect the fact that they are automat...