52#include <unordered_map>
57#define DEBUG_TYPE "memprof-context-disambiguation"
60 "Number of function clones created during whole program analysis");
62 "Number of function clones created during ThinLTO backend");
64 "Number of functions that had clones created during ThinLTO backend");
66 FunctionCloneDuplicatesThinBackend,
67 "Number of function clone duplicates detected during ThinLTO backend");
68STATISTIC(AllocTypeNotCold,
"Number of not cold static allocations (possibly "
69 "cloned) during whole program analysis");
70STATISTIC(AllocTypeCold,
"Number of cold static allocations (possibly cloned) "
71 "during whole program analysis");
73 "Number of not cold static allocations (possibly cloned) during "
75STATISTIC(AllocTypeColdThinBackend,
"Number of cold static allocations "
76 "(possibly cloned) during ThinLTO backend");
78 "Number of original (not cloned) allocations with memprof profiles "
79 "during ThinLTO backend");
81 AllocVersionsThinBackend,
82 "Number of allocation versions (including clones) during ThinLTO backend");
84 "Maximum number of allocation versions created for an original "
85 "allocation during ThinLTO backend");
87 "Number of unclonable ambigous allocations during ThinLTO backend");
89 "Number of edges removed due to mismatched callees (profiled vs IR)");
91 "Number of profiled callees found via tail calls");
93 "Aggregate depth of profiled callees found via tail calls");
95 "Maximum depth of profiled callees found via tail calls");
97 "Number of profiled callees found via multiple tail call chains");
98STATISTIC(DeferredBackedges,
"Number of backedges with deferred cloning");
99STATISTIC(NewMergedNodes,
"Number of new nodes created during merging");
100STATISTIC(NonNewMergedNodes,
"Number of non new nodes used during merging");
102 "Number of missing alloc nodes for context ids");
104 "Number of calls skipped during cloning due to unexpected operand");
106 "Number of callsites assigned to call multiple non-matching clones");
107STATISTIC(TotalMergeInvokes,
"Number of merge invocations for nodes");
108STATISTIC(TotalMergeIters,
"Number of merge iterations for nodes");
109STATISTIC(MaxMergeIters,
"Max merge iterations for nodes");
110STATISTIC(NumImportantContextIds,
"Number of important context ids");
111STATISTIC(NumFixupEdgeIdsInserted,
"Number of fixup edge ids inserted");
112STATISTIC(NumFixupEdgesAdded,
"Number of fixup edges added");
113STATISTIC(NumFixedContexts,
"Number of contexts with fixed edges");
115 "Number of aliasees prevailing in a different module than its alias");
120 cl::desc(
"Specify the path prefix of the MemProf dot files."));
124 cl::desc(
"Export graph to dot files."));
129 cl::desc(
"Iteratively apply merging on a node to catch new callers"));
139 "memprof-dot-scope",
cl::desc(
"Scope of graph to export to dot"),
144 "Export only nodes with contexts feeding given "
145 "-memprof-dot-alloc-id"),
147 "Export only nodes with given -memprof-dot-context-id")));
151 cl::desc(
"Id of alloc to export if -memprof-dot-scope=alloc "
152 "or to highlight if -memprof-dot-scope=all"));
156 cl::desc(
"Id of context to export if -memprof-dot-scope=context or to "
157 "highlight otherwise"));
161 cl::desc(
"Dump CallingContextGraph to stdout after each stage."));
165 cl::desc(
"Perform verification checks on CallingContextGraph."));
169 cl::desc(
"Perform frequent verification checks on nodes."));
172 "memprof-import-summary",
173 cl::desc(
"Import summary to use for testing the ThinLTO backend via opt"),
179 cl::desc(
"Max depth to recursively search for missing "
180 "frames through tail calls."));
185 cl::desc(
"Allow cloning of callsites involved in recursive cycles"));
189 cl::desc(
"Allow cloning of contexts through recursive cycles"));
196 cl::desc(
"Merge clones before assigning functions"));
205 cl::desc(
"Allow cloning of contexts having recursive cycles"));
211 cl::desc(
"Minimum absolute count for promoted target to be inlinable"));
215 "enable-memprof-context-disambiguation",
cl::Hidden,
216 cl::desc(
"Enable MemProf context disambiguation"));
222 cl::desc(
"Linking with hot/cold operator new interfaces"));
227 "Require target function definition when promoting indirect calls"));
234 cl::desc(
"Number of largest cold contexts to consider important"));
238 cl::desc(
"Enables edge fixup for important contexts"));
260template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
261class CallsiteContextGraph {
263 CallsiteContextGraph() =
default;
264 CallsiteContextGraph(
const CallsiteContextGraph &) =
default;
265 CallsiteContextGraph(CallsiteContextGraph &&) =
default;
269 EmitRemark =
nullptr,
270 bool AllowExtraAnalysis =
false);
274 void identifyClones();
281 bool assignFunctions();
287 EmitRemark =
nullptr)
const;
290 const CallsiteContextGraph &CCG) {
296 const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>;
298 const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>;
300 void exportToDot(std::string Label)
const;
303 struct FuncInfo final
304 :
public std::pair<FuncTy *, unsigned > {
305 using Base = std::pair<FuncTy *, unsigned>;
307 FuncInfo(FuncTy *
F =
nullptr,
unsigned CloneNo = 0) :
Base(
F, CloneNo) {}
308 explicit operator bool()
const {
return this->first !=
nullptr; }
309 FuncTy *func()
const {
return this->first; }
310 unsigned cloneNo()
const {
return this->second; }
314 struct CallInfo final :
public std::pair<CallTy, unsigned > {
315 using Base = std::pair<CallTy, unsigned>;
317 CallInfo(CallTy
Call =
nullptr,
unsigned CloneNo = 0)
319 explicit operator bool()
const {
return (
bool)this->first; }
320 CallTy call()
const {
return this->first; }
321 unsigned cloneNo()
const {
return this->second; }
322 void setCloneNo(
unsigned N) { this->second =
N; }
324 if (!
operator bool()) {
330 OS <<
"\t(clone " << cloneNo() <<
")";
356 bool Recursive =
false;
383 std::vector<std::shared_ptr<ContextEdge>> CalleeEdges;
387 std::vector<std::shared_ptr<ContextEdge>> CallerEdges;
391 bool useCallerEdgesForContextInfo()
const {
396 assert(!CalleeEdges.empty() || CallerEdges.empty() || IsAllocation ||
414 for (
auto &Edge : CalleeEdges.empty() ? CallerEdges : CalleeEdges)
415 Count += Edge->getContextIds().size();
419 CalleeEdges, useCallerEdgesForContextInfo()
421 : std::vector<std::shared_ptr<ContextEdge>>());
422 for (
const auto &Edge : Edges)
429 uint8_t computeAllocType()
const {
434 CalleeEdges, useCallerEdgesForContextInfo()
436 : std::vector<std::shared_ptr<ContextEdge>>());
437 for (
const auto &Edge : Edges) {
448 bool emptyContextIds()
const {
450 CalleeEdges, useCallerEdgesForContextInfo()
452 : std::vector<std::shared_ptr<ContextEdge>>());
453 for (
const auto &Edge : Edges) {
454 if (!Edge->getContextIds().empty())
461 std::vector<ContextNode *> Clones;
464 ContextNode *CloneOf =
nullptr;
466 ContextNode(
bool IsAllocation) : IsAllocation(IsAllocation),
Call() {}
468 ContextNode(
bool IsAllocation, CallInfo
C)
469 : IsAllocation(IsAllocation),
Call(
C) {}
471 void addClone(ContextNode *Clone) {
473 CloneOf->Clones.push_back(Clone);
474 Clone->CloneOf = CloneOf;
476 Clones.push_back(Clone);
478 Clone->CloneOf =
this;
482 ContextNode *getOrigNode() {
489 unsigned int ContextId);
491 ContextEdge *findEdgeFromCallee(
const ContextNode *Callee);
492 ContextEdge *findEdgeFromCaller(
const ContextNode *Caller);
493 void eraseCalleeEdge(
const ContextEdge *Edge);
494 void eraseCallerEdge(
const ContextEdge *Edge);
496 void setCall(CallInfo
C) {
Call = std::move(
C); }
498 bool hasCall()
const {
return (
bool)
Call.call(); }
504 bool isRemoved()
const {
540 bool IsBackedge =
false;
547 : Callee(Callee), Caller(Caller), AllocTypes(
AllocType),
548 ContextIds(std::move(ContextIds)) {}
554 inline void clear() {
564 inline bool isRemoved()
const {
565 if (Callee || Caller)
586 void removeNoneTypeCalleeEdges(ContextNode *
Node);
587 void removeNoneTypeCallerEdges(ContextNode *
Node);
589 recursivelyRemoveNoneTypeCalleeEdges(ContextNode *
Node,
595 template <
class NodeT,
class IteratorT>
596 std::vector<uint64_t>
601 ContextNode *addAllocNode(CallInfo
Call,
const FuncTy *
F);
604 template <
class NodeT,
class IteratorT>
605 void addStackNodesForMIB(
609 std::map<uint64_t, uint32_t> &TotalSizeToContextIdTopNCold);
614 void updateStackNodes();
623 void fixupImportantContexts();
627 void handleCallsitesWithMultipleTargets();
630 void markBackedges();
640 bool partitionCallsByCallee(
642 std::vector<std::pair<CallInfo, ContextNode *>> &NewCallToNode);
649 std::map<const ContextNode *, const FuncTy *> NodeToCallingFunc;
656 using EdgeIter =
typename std::vector<std::shared_ptr<ContextEdge>>
::iterator;
661 struct CallContextInfo {
665 std::vector<uint64_t> StackIds;
679 void removeEdgeFromGraph(ContextEdge *Edge, EdgeIter *EI =
nullptr,
680 bool CalleeIter =
true);
688 void assignStackNodesPostOrder(
702 void propagateDuplicateContextIds(
708 void connectNewNode(ContextNode *NewNode, ContextNode *OrigNode,
716 return static_cast<const DerivedCCG *
>(
this)->getStackId(IdOrIndex);
726 calleesMatch(CallTy
Call, EdgeIter &EI,
731 const FuncTy *getCalleeFunc(CallTy
Call) {
732 return static_cast<DerivedCCG *
>(
this)->getCalleeFunc(
Call);
738 bool calleeMatchesFunc(
739 CallTy
Call,
const FuncTy *Func,
const FuncTy *CallerFunc,
740 std::vector<std::pair<CallTy, FuncTy *>> &FoundCalleeChain) {
741 return static_cast<DerivedCCG *
>(
this)->calleeMatchesFunc(
742 Call, Func, CallerFunc, FoundCalleeChain);
746 bool sameCallee(CallTy Call1, CallTy Call2) {
747 return static_cast<DerivedCCG *
>(
this)->sameCallee(Call1, Call2);
752 std::vector<uint64_t> getStackIdsWithContextNodesForCall(CallTy
Call) {
753 return static_cast<DerivedCCG *
>(
this)->getStackIdsWithContextNodesForCall(
759 return static_cast<DerivedCCG *
>(
this)->getLastStackId(
Call);
765 static_cast<DerivedCCG *
>(
this)->updateAllocationCall(
Call,
AllocType);
770 return static_cast<const DerivedCCG *
>(
this)->getAllocationCallType(
Call);
775 void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc) {
776 static_cast<DerivedCCG *
>(
this)->updateCall(CallerCall, CalleeFunc);
782 FuncInfo cloneFunctionForCallsite(
784 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
785 return static_cast<DerivedCCG *
>(
this)->cloneFunctionForCallsite(
786 Func,
Call, CallMap, CallsWithMetadataInFunc, CloneNo);
791 std::string getLabel(
const FuncTy *Func,
const CallTy
Call,
792 unsigned CloneNo)
const {
793 return static_cast<const DerivedCCG *
>(
this)->getLabel(Func,
Call, CloneNo);
797 ContextNode *createNewNode(
bool IsAllocation,
const FuncTy *
F =
nullptr,
798 CallInfo
C = CallInfo()) {
799 NodeOwner.push_back(std::make_unique<ContextNode>(IsAllocation,
C));
800 auto *NewNode = NodeOwner.back().get();
802 NodeToCallingFunc[NewNode] =
F;
803 NewNode->NodeId = NodeOwner.size();
808 ContextNode *getNodeForInst(
const CallInfo &
C);
809 ContextNode *getNodeForAlloc(
const CallInfo &
C);
810 ContextNode *getNodeForStackId(
uint64_t StackId);
832 moveEdgeToNewCalleeClone(
const std::shared_ptr<ContextEdge> &Edge,
839 void moveEdgeToExistingCalleeClone(
const std::shared_ptr<ContextEdge> &Edge,
840 ContextNode *NewCallee,
841 bool NewClone =
false,
849 void moveCalleeEdgeToNewCaller(
const std::shared_ptr<ContextEdge> &Edge,
850 ContextNode *NewCaller);
861 void mergeNodeCalleeClones(
866 void findOtherCallersToShareMerge(
867 ContextNode *
Node, std::vector<std::shared_ptr<ContextEdge>> &CalleeEdges,
895 struct ImportantContextInfo {
897 std::vector<uint64_t> StackIds;
900 unsigned MaxLength = 0;
904 std::map<std::vector<uint64_t>, ContextNode *> StackIdsToNode;
913 void recordStackNode(std::vector<uint64_t> &StackIds, ContextNode *
Node,
927 auto Size = StackIds.size();
928 for (
auto Id : Ids) {
929 auto &Entry = ImportantContextIdInfo[Id];
930 Entry.StackIdsToNode[StackIds] =
Node;
932 if (
Size > Entry.MaxLength)
933 Entry.MaxLength =
Size;
942 std::vector<std::unique_ptr<ContextNode>> NodeOwner;
948 unsigned int LastContextId = 0;
951template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
953 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode;
954template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
956 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge;
957template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
959 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::FuncInfo;
960template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
962 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::CallInfo;
965class ModuleCallsiteContextGraph
966 :
public CallsiteContextGraph<ModuleCallsiteContextGraph, Function,
969 ModuleCallsiteContextGraph(
971 llvm::function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter);
974 friend CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
977 uint64_t getStackId(uint64_t IdOrIndex)
const;
979 bool calleeMatchesFunc(
980 Instruction *
Call,
const Function *Func,
const Function *CallerFunc,
981 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain);
982 bool sameCallee(Instruction *Call1, Instruction *Call2);
983 bool findProfiledCalleeThroughTailCalls(
984 const Function *ProfiledCallee,
Value *CurCallee,
unsigned Depth,
985 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain,
986 bool &FoundMultipleCalleeChains);
987 uint64_t getLastStackId(Instruction *
Call);
988 std::vector<uint64_t> getStackIdsWithContextNodesForCall(Instruction *
Call);
991 void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc);
992 CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
994 cloneFunctionForCallsite(FuncInfo &Func, CallInfo &
Call,
995 DenseMap<CallInfo, CallInfo> &CallMap,
996 std::vector<CallInfo> &CallsWithMetadataInFunc,
998 std::string getLabel(
const Function *Func,
const Instruction *
Call,
999 unsigned CloneNo)
const;
1002 llvm::function_ref<OptimizationRemarkEmitter &(
Function *)> OREGetter;
1008struct IndexCall :
public PointerUnion<CallsiteInfo *, AllocInfo *> {
1009 IndexCall() : PointerUnion() {}
1010 IndexCall(std::nullptr_t) : IndexCall() {}
1011 IndexCall(CallsiteInfo *StackNode) : PointerUnion(StackNode) {}
1012 IndexCall(AllocInfo *AllocNode) : PointerUnion(AllocNode) {}
1013 IndexCall(PointerUnion PT) : PointerUnion(PT) {}
1015 IndexCall *operator->() {
return this; }
1017 void print(raw_ostream &OS)
const {
1018 PointerUnion<CallsiteInfo *, AllocInfo *>
Base = *
this;
1043class IndexCallsiteContextGraph
1044 :
public CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
1047 IndexCallsiteContextGraph(
1048 ModuleSummaryIndex &Index,
1052 ~IndexCallsiteContextGraph() {
1057 for (
auto &
I : FunctionCalleesToSynthesizedCallsiteInfos) {
1059 for (
auto &Callsite :
I.second)
1060 FS->addCallsite(std::move(*Callsite.second));
1065 friend CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
1068 uint64_t getStackId(uint64_t IdOrIndex)
const;
1069 const FunctionSummary *getCalleeFunc(IndexCall &
Call);
1070 bool calleeMatchesFunc(
1071 IndexCall &
Call,
const FunctionSummary *Func,
1072 const FunctionSummary *CallerFunc,
1073 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain);
1074 bool sameCallee(IndexCall &Call1, IndexCall &Call2);
1075 bool findProfiledCalleeThroughTailCalls(
1076 ValueInfo ProfiledCallee, ValueInfo CurCallee,
unsigned Depth,
1077 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain,
1078 bool &FoundMultipleCalleeChains);
1079 uint64_t getLastStackId(IndexCall &
Call);
1080 std::vector<uint64_t> getStackIdsWithContextNodesForCall(IndexCall &
Call);
1083 void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc);
1084 CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
1085 IndexCall>::FuncInfo
1086 cloneFunctionForCallsite(FuncInfo &Func, CallInfo &
Call,
1087 DenseMap<CallInfo, CallInfo> &CallMap,
1088 std::vector<CallInfo> &CallsWithMetadataInFunc,
1090 std::string getLabel(
const FunctionSummary *Func,
const IndexCall &
Call,
1091 unsigned CloneNo)
const;
1092 DenseSet<GlobalValue::GUID> findAliaseeGUIDsPrevailingInDifferentModule();
1096 std::map<const FunctionSummary *, ValueInfo> FSToVIMap;
1098 const ModuleSummaryIndex &Index;
1106 std::unordered_map<FunctionSummary *,
1107 std::map<ValueInfo, std::unique_ptr<CallsiteInfo>>>
1108 FunctionCalleesToSynthesizedCallsiteInfos;
1119 :
public DenseMapInfo<std::pair<IndexCall, unsigned>> {};
1122 :
public DenseMapInfo<PointerUnion<CallsiteInfo *, AllocInfo *>> {};
1143template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1144bool allocTypesMatch(
1145 const std::vector<uint8_t> &InAllocTypes,
1146 const std::vector<std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>>
1150 assert(InAllocTypes.size() == Edges.size());
1152 InAllocTypes.begin(), InAllocTypes.end(), Edges.begin(), Edges.end(),
1154 const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &r) {
1158 if (l == (uint8_t)AllocationType::None ||
1159 r->AllocTypes == (uint8_t)AllocationType::None)
1161 return allocTypeToUse(l) == allocTypeToUse(r->AllocTypes);
1170template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1171bool allocTypesMatchClone(
1172 const std::vector<uint8_t> &InAllocTypes,
1173 const ContextNode<DerivedCCG, FuncTy, CallTy> *Clone) {
1174 const ContextNode<DerivedCCG, FuncTy, CallTy> *
Node = Clone->CloneOf;
1178 assert(InAllocTypes.size() ==
Node->CalleeEdges.size());
1182 for (
const auto &
E : Clone->CalleeEdges) {
1184 EdgeCalleeMap[
E->Callee] =
E->AllocTypes;
1188 for (
unsigned I = 0;
I <
Node->CalleeEdges.size();
I++) {
1189 auto Iter = EdgeCalleeMap.
find(
Node->CalleeEdges[
I]->Callee);
1191 if (Iter == EdgeCalleeMap.
end())
1199 if (allocTypeToUse(Iter->second) != allocTypeToUse(InAllocTypes[
I]))
1207template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1208typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1209CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForInst(
1210 const CallInfo &
C) {
1211 ContextNode *
Node = getNodeForAlloc(
C);
1215 return NonAllocationCallToContextNodeMap.lookup(
C);
1218template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1219typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1220CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForAlloc(
1221 const CallInfo &
C) {
1222 return AllocationCallToContextNodeMap.lookup(
C);
1225template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1226typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1227CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForStackId(
1229 auto StackEntryNode = StackEntryIdToContextNodeMap.find(StackId);
1230 if (StackEntryNode != StackEntryIdToContextNodeMap.end())
1231 return StackEntryNode->second;
1235template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1236void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1238 unsigned int ContextId) {
1239 for (
auto &
Edge : CallerEdges) {
1240 if (
Edge->Caller == Caller) {
1242 Edge->getContextIds().insert(ContextId);
1246 std::shared_ptr<ContextEdge>
Edge = std::make_shared<ContextEdge>(
1247 this, Caller, (uint8_t)
AllocType, DenseSet<uint32_t>({ContextId}));
1248 CallerEdges.push_back(
Edge);
1252template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1253void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::removeEdgeFromGraph(
1254 ContextEdge *
Edge, EdgeIter *EI,
bool CalleeIter) {
1270 auto CalleeCallerCount =
Callee->CallerEdges.size();
1271 auto CallerCalleeCount =
Caller->CalleeEdges.size();
1276 }
else if (CalleeIter) {
1278 *EI =
Caller->CalleeEdges.erase(*EI);
1281 *EI =
Callee->CallerEdges.erase(*EI);
1283 assert(
Callee->CallerEdges.size() < CalleeCallerCount);
1284 assert(
Caller->CalleeEdges.size() < CallerCalleeCount);
1287template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1288void CallsiteContextGraph<
1289 DerivedCCG, FuncTy, CallTy>::removeNoneTypeCalleeEdges(ContextNode *Node) {
1290 for (
auto EI =
Node->CalleeEdges.begin(); EI !=
Node->CalleeEdges.end();) {
1292 if (
Edge->AllocTypes == (uint8_t)AllocationType::None) {
1294 removeEdgeFromGraph(
Edge.get(), &EI,
true);
1300template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1301void CallsiteContextGraph<
1302 DerivedCCG, FuncTy, CallTy>::removeNoneTypeCallerEdges(ContextNode *Node) {
1303 for (
auto EI =
Node->CallerEdges.begin(); EI !=
Node->CallerEdges.end();) {
1305 if (
Edge->AllocTypes == (uint8_t)AllocationType::None) {
1307 Edge->Caller->eraseCalleeEdge(
Edge.get());
1308 EI =
Node->CallerEdges.erase(EI);
1314template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1315typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge *
1316CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1317 findEdgeFromCallee(
const ContextNode *Callee) {
1318 for (
const auto &
Edge : CalleeEdges)
1319 if (
Edge->Callee == Callee)
1324template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1325typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge *
1326CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1327 findEdgeFromCaller(
const ContextNode *Caller) {
1328 for (
const auto &
Edge : CallerEdges)
1329 if (
Edge->Caller == Caller)
1334template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1335void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1336 eraseCalleeEdge(
const ContextEdge *
Edge) {
1338 CalleeEdges, [
Edge](
const std::shared_ptr<ContextEdge> &CalleeEdge) {
1339 return CalleeEdge.get() ==
Edge;
1341 assert(EI != CalleeEdges.end());
1342 CalleeEdges.erase(EI);
1345template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1346void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1347 eraseCallerEdge(
const ContextEdge *
Edge) {
1349 CallerEdges, [
Edge](
const std::shared_ptr<ContextEdge> &CallerEdge) {
1350 return CallerEdge.get() ==
Edge;
1352 assert(EI != CallerEdges.end());
1353 CallerEdges.erase(EI);
1356template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1357uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::computeAllocType(
1358 DenseSet<uint32_t> &ContextIds)
const {
1360 (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
1361 uint8_t
AllocType = (uint8_t)AllocationType::None;
1362 for (
auto Id : ContextIds) {
1363 AllocType |= (uint8_t)ContextIdToAllocationType.at(Id);
1371template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1373CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypesImpl(
1374 const DenseSet<uint32_t> &Node1Ids,
1375 const DenseSet<uint32_t> &Node2Ids)
const {
1377 (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
1378 uint8_t
AllocType = (uint8_t)AllocationType::None;
1379 for (
auto Id : Node1Ids) {
1380 if (!Node2Ids.
count(Id))
1382 AllocType |= (uint8_t)ContextIdToAllocationType.at(Id);
1390template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1391uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypes(
1392 const DenseSet<uint32_t> &Node1Ids,
1393 const DenseSet<uint32_t> &Node2Ids)
const {
1394 if (Node1Ids.
size() < Node2Ids.
size())
1395 return intersectAllocTypesImpl(Node1Ids, Node2Ids);
1397 return intersectAllocTypesImpl(Node2Ids, Node1Ids);
1400template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1401typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1402CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addAllocNode(
1403 CallInfo
Call,
const FuncTy *
F) {
1405 ContextNode *AllocNode = createNewNode(
true,
F,
Call);
1406 AllocationCallToContextNodeMap[
Call] = AllocNode;
1408 AllocNode->OrigStackOrAllocId = LastContextId;
1411 AllocNode->AllocTypes = (uint8_t)AllocationType::None;
1427template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1428template <
class NodeT,
class IteratorT>
1429void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addStackNodesForMIB(
1430 ContextNode *AllocNode, CallStack<NodeT, IteratorT> &StackContext,
1433 std::map<uint64_t, uint32_t> &TotalSizeToContextIdTopNCold) {
1439 ContextIdToAllocationType[++LastContextId] =
AllocType;
1441 bool IsImportant =
false;
1442 if (!ContextSizeInfo.
empty()) {
1443 auto &
Entry = ContextIdToContextSizeInfos[LastContextId];
1447 uint64_t TotalCold = 0;
1448 for (
auto &CSI : ContextSizeInfo)
1449 TotalCold += CSI.TotalSize;
1455 TotalCold > TotalSizeToContextIdTopNCold.begin()->first) {
1458 auto IdToRemove = TotalSizeToContextIdTopNCold.begin()->second;
1459 TotalSizeToContextIdTopNCold.erase(
1460 TotalSizeToContextIdTopNCold.begin());
1461 assert(ImportantContextIdInfo.count(IdToRemove));
1462 ImportantContextIdInfo.erase(IdToRemove);
1464 TotalSizeToContextIdTopNCold[TotalCold] = LastContextId;
1468 Entry.insert(
Entry.begin(), ContextSizeInfo.begin(), ContextSizeInfo.end());
1472 AllocNode->AllocTypes |= (uint8_t)
AllocType;
1477 ContextNode *PrevNode = AllocNode;
1481 SmallSet<uint64_t, 8> StackIdSet;
1484 ContextIter != StackContext.
end(); ++ContextIter) {
1485 auto StackId = getStackId(*ContextIter);
1487 ImportantContextIdInfo[LastContextId].StackIds.push_back(StackId);
1488 ContextNode *StackNode = getNodeForStackId(StackId);
1490 StackNode = createNewNode(
false);
1491 StackEntryIdToContextNodeMap[StackId] = StackNode;
1492 StackNode->OrigStackOrAllocId = StackId;
1497 auto Ins = StackIdSet.
insert(StackId);
1499 StackNode->Recursive =
true;
1501 StackNode->AllocTypes |= (uint8_t)
AllocType;
1502 PrevNode->addOrUpdateCallerEdge(StackNode,
AllocType, LastContextId);
1503 PrevNode = StackNode;
1507template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1509CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::duplicateContextIds(
1510 const DenseSet<uint32_t> &StackSequenceContextIds,
1511 DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds) {
1512 DenseSet<uint32_t> NewContextIds;
1513 for (
auto OldId : StackSequenceContextIds) {
1514 NewContextIds.
insert(++LastContextId);
1515 OldToNewContextIds[OldId].insert(LastContextId);
1516 assert(ContextIdToAllocationType.count(OldId));
1518 ContextIdToAllocationType[LastContextId] = ContextIdToAllocationType[OldId];
1519 auto CSI = ContextIdToContextSizeInfos.find(OldId);
1520 if (CSI != ContextIdToContextSizeInfos.end())
1521 ContextIdToContextSizeInfos[LastContextId] = CSI->second;
1522 if (DotAllocContextIds.
contains(OldId))
1523 DotAllocContextIds.
insert(LastContextId);
1525 return NewContextIds;
1528template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1529void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
1530 propagateDuplicateContextIds(
1531 const DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds) {
1533 auto GetNewIds = [&OldToNewContextIds](
const DenseSet<uint32_t> &ContextIds) {
1534 DenseSet<uint32_t> NewIds;
1535 for (
auto Id : ContextIds)
1536 if (
auto NewId = OldToNewContextIds.find(Id);
1537 NewId != OldToNewContextIds.end())
1543 auto UpdateCallers = [&](ContextNode *
Node,
1544 DenseSet<const ContextEdge *> &Visited,
1545 auto &&UpdateCallers) ->
void {
1546 for (
const auto &
Edge :
Node->CallerEdges) {
1550 ContextNode *NextNode =
Edge->Caller;
1551 DenseSet<uint32_t> NewIdsToAdd = GetNewIds(
Edge->getContextIds());
1554 if (!NewIdsToAdd.
empty()) {
1555 Edge->getContextIds().insert_range(NewIdsToAdd);
1556 UpdateCallers(NextNode, Visited, UpdateCallers);
1561 DenseSet<const ContextEdge *> Visited;
1562 for (
auto &Entry : AllocationCallToContextNodeMap) {
1564 UpdateCallers(Node, Visited, UpdateCallers);
1568template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1569void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::connectNewNode(
1570 ContextNode *NewNode, ContextNode *OrigNode,
bool TowardsCallee,
1573 DenseSet<uint32_t> RemainingContextIds) {
1575 TowardsCallee ? OrigNode->CalleeEdges : OrigNode->CallerEdges;
1576 DenseSet<uint32_t> RecursiveContextIds;
1577 DenseSet<uint32_t> AllCallerContextIds;
1582 for (
auto &CE : OrigEdges) {
1583 AllCallerContextIds.
reserve(
CE->getContextIds().size());
1584 for (
auto Id :
CE->getContextIds())
1585 if (!AllCallerContextIds.
insert(Id).second)
1586 RecursiveContextIds.
insert(Id);
1590 for (
auto EI = OrigEdges.begin(); EI != OrigEdges.end();) {
1592 DenseSet<uint32_t> NewEdgeContextIds;
1593 DenseSet<uint32_t> NotFoundContextIds;
1597 set_subtract(
Edge->getContextIds(), RemainingContextIds, NewEdgeContextIds,
1598 NotFoundContextIds);
1601 if (RecursiveContextIds.
empty()) {
1604 RemainingContextIds.
swap(NotFoundContextIds);
1614 DenseSet<uint32_t> NonRecursiveRemainingCurEdgeIds =
1616 set_subtract(RemainingContextIds, NonRecursiveRemainingCurEdgeIds);
1619 if (NewEdgeContextIds.
empty()) {
1623 if (TowardsCallee) {
1624 uint8_t NewAllocType = computeAllocType(NewEdgeContextIds);
1625 auto NewEdge = std::make_shared<ContextEdge>(
1626 Edge->Callee, NewNode, NewAllocType, std::move(NewEdgeContextIds));
1627 NewNode->CalleeEdges.push_back(NewEdge);
1628 NewEdge->Callee->CallerEdges.push_back(NewEdge);
1630 uint8_t NewAllocType = computeAllocType(NewEdgeContextIds);
1631 auto NewEdge = std::make_shared<ContextEdge>(
1632 NewNode,
Edge->Caller, NewAllocType, std::move(NewEdgeContextIds));
1633 NewNode->CallerEdges.push_back(NewEdge);
1634 NewEdge->Caller->CalleeEdges.push_back(NewEdge);
1637 if (
Edge->getContextIds().empty()) {
1638 removeEdgeFromGraph(
Edge.get(), &EI, TowardsCallee);
1645template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1647 const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &Edge) {
1651 assert(!Edge->ContextIds.empty());
1654template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1656 bool CheckEdges =
true) {
1657 if (
Node->isRemoved())
1661 auto NodeContextIds =
Node->getContextIds();
1665 if (
Node->CallerEdges.size()) {
1667 Node->CallerEdges.front()->ContextIds);
1671 set_union(CallerEdgeContextIds, Edge->ContextIds);
1678 NodeContextIds == CallerEdgeContextIds ||
1681 if (
Node->CalleeEdges.size()) {
1683 Node->CalleeEdges.front()->ContextIds);
1687 set_union(CalleeEdgeContextIds, Edge->getContextIds());
1693 NodeContextIds == CalleeEdgeContextIds);
1702 for (
const auto &
E :
Node->CalleeEdges)
1708template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1709void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
1710 assignStackNodesPostOrder(ContextNode *Node,
1711 DenseSet<const ContextNode *> &Visited,
1712 DenseMap<uint64_t, std::vector<CallContextInfo>>
1713 &StackIdToMatchingCalls,
1714 DenseMap<CallInfo, CallInfo> &CallToMatchingCall,
1715 const DenseSet<uint32_t> &ImportantContextIds) {
1723 auto CallerEdges =
Node->CallerEdges;
1724 for (
auto &
Edge : CallerEdges) {
1726 if (
Edge->isRemoved()) {
1730 assignStackNodesPostOrder(
Edge->Caller, Visited, StackIdToMatchingCalls,
1731 CallToMatchingCall, ImportantContextIds);
1740 if (
Node->IsAllocation ||
1741 !StackIdToMatchingCalls.count(
Node->OrigStackOrAllocId))
1744 auto &Calls = StackIdToMatchingCalls[
Node->OrigStackOrAllocId];
1748 if (Calls.size() == 1) {
1749 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[0];
1750 if (Ids.size() == 1) {
1751 assert(SavedContextIds.empty());
1753 assert(Node == getNodeForStackId(Ids[0]));
1754 if (
Node->Recursive)
1757 NonAllocationCallToContextNodeMap[
Call] =
Node;
1759 recordStackNode(Ids, Node,
Node->getContextIds(), ImportantContextIds);
1767 uint64_t LastId =
Node->OrigStackOrAllocId;
1768 ContextNode *LastNode = getNodeForStackId(LastId);
1771 assert(LastNode == Node);
1773 ContextNode *LastNode =
Node;
1778 DenseSet<uint32_t> LastNodeContextIds = LastNode->getContextIds();
1780 [[maybe_unused]]
bool PrevIterCreatedNode =
false;
1781 bool CreatedNode =
false;
1782 for (
unsigned I = 0;
I < Calls.size();
1783 I++, PrevIterCreatedNode = CreatedNode) {
1784 CreatedNode =
false;
1785 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[
I];
1788 if (SavedContextIds.empty()) {
1795 auto MatchingCall = CallToMatchingCall[
Call];
1796 if (!NonAllocationCallToContextNodeMap.contains(MatchingCall)) {
1800 assert(
I > 0 && !PrevIterCreatedNode);
1803 NonAllocationCallToContextNodeMap[MatchingCall]->MatchingCalls.push_back(
1808 assert(LastId == Ids.back());
1817 ContextNode *PrevNode = LastNode;
1821 for (
auto IdIter = Ids.rbegin() + 1; IdIter != Ids.rend(); IdIter++) {
1823 ContextNode *CurNode = getNodeForStackId(Id);
1827 assert(!CurNode->Recursive);
1829 auto *
Edge = CurNode->findEdgeFromCaller(PrevNode);
1841 if (SavedContextIds.empty()) {
1850 ContextNode *NewNode = createNewNode(
false, Func,
Call);
1851 NonAllocationCallToContextNodeMap[
Call] = NewNode;
1853 NewNode->AllocTypes = computeAllocType(SavedContextIds);
1855 ContextNode *FirstNode = getNodeForStackId(Ids[0]);
1861 connectNewNode(NewNode, FirstNode,
true, SavedContextIds);
1866 connectNewNode(NewNode, LastNode,
false, SavedContextIds);
1871 for (
auto Id : Ids) {
1872 ContextNode *CurNode = getNodeForStackId(Id);
1879 auto *PrevEdge = CurNode->findEdgeFromCallee(PrevNode);
1886 set_subtract(PrevEdge->getContextIds(), SavedContextIds);
1887 if (PrevEdge->getContextIds().empty())
1888 removeEdgeFromGraph(PrevEdge);
1893 CurNode->AllocTypes = CurNode->CalleeEdges.empty()
1894 ? (uint8_t)AllocationType::None
1895 : CurNode->computeAllocType();
1899 recordStackNode(Ids, NewNode, SavedContextIds, ImportantContextIds);
1903 for (
auto Id : Ids) {
1904 ContextNode *CurNode = getNodeForStackId(Id);
1913template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1914void CallsiteContextGraph<DerivedCCG, FuncTy,
1915 CallTy>::fixupImportantContexts() {
1916 if (ImportantContextIdInfo.empty())
1920 NumImportantContextIds = ImportantContextIdInfo.size();
1926 exportToDot(
"beforestackfixup");
1951 for (
auto &[CurContextId, Info] : ImportantContextIdInfo) {
1952 if (
Info.StackIdsToNode.empty())
1955 ContextNode *PrevNode =
nullptr;
1956 ContextNode *CurNode =
nullptr;
1957 DenseSet<const ContextEdge *> VisitedEdges;
1958 ArrayRef<uint64_t> AllStackIds(
Info.StackIds);
1961 for (
unsigned I = 0;
I < AllStackIds.size();
I++, PrevNode = CurNode) {
1965 auto LenToEnd = AllStackIds.size() -
I;
1973 auto CheckStackIds = AllStackIds.slice(
I, Len);
1974 auto EntryIt =
Info.StackIdsToNode.find(CheckStackIds);
1975 if (EntryIt ==
Info.StackIdsToNode.end())
1977 CurNode = EntryIt->second;
1994 auto *CurEdge = PrevNode->findEdgeFromCaller(CurNode);
1997 if (CurEdge->getContextIds().insert(CurContextId).second) {
1998 NumFixupEdgeIdsInserted++;
2003 NumFixupEdgesAdded++;
2004 DenseSet<uint32_t> ContextIds({CurContextId});
2005 auto AllocType = computeAllocType(ContextIds);
2006 auto NewEdge = std::make_shared<ContextEdge>(
2007 PrevNode, CurNode,
AllocType, std::move(ContextIds));
2008 PrevNode->CallerEdges.push_back(NewEdge);
2009 CurNode->CalleeEdges.push_back(NewEdge);
2011 CurEdge = NewEdge.get();
2014 VisitedEdges.
insert(CurEdge);
2017 for (
auto &
Edge : PrevNode->CallerEdges) {
2021 Edge->getContextIds().erase(CurContextId);
2029template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2030void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
2038 DenseMap<uint64_t, std::vector<CallContextInfo>> StackIdToMatchingCalls;
2039 for (
auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) {
2040 for (
auto &
Call : CallsWithMetadata) {
2042 if (AllocationCallToContextNodeMap.count(
Call))
2044 auto StackIdsWithContextNodes =
2045 getStackIdsWithContextNodesForCall(
Call.call());
2048 if (StackIdsWithContextNodes.empty())
2052 StackIdToMatchingCalls[StackIdsWithContextNodes.back()].push_back(
2053 {
Call.call(), StackIdsWithContextNodes,
Func, {}});
2063 DenseMap<uint32_t, DenseSet<uint32_t>> OldToNewContextIds;
2067 DenseMap<CallInfo, CallInfo> CallToMatchingCall;
2068 for (
auto &It : StackIdToMatchingCalls) {
2069 auto &Calls = It.getSecond();
2071 if (Calls.size() == 1) {
2072 auto &Ids = Calls[0].StackIds;
2073 if (Ids.size() == 1)
2086 DenseMap<const FuncTy *, unsigned> FuncToIndex;
2087 for (
const auto &[Idx, CallCtxInfo] :
enumerate(Calls))
2088 FuncToIndex.
insert({CallCtxInfo.Func, Idx});
2091 [&FuncToIndex](
const CallContextInfo &
A,
const CallContextInfo &
B) {
2092 return A.StackIds.size() >
B.StackIds.size() ||
2093 (
A.StackIds.size() ==
B.StackIds.size() &&
2094 (
A.StackIds <
B.StackIds ||
2095 (
A.StackIds ==
B.StackIds &&
2096 FuncToIndex[
A.Func] < FuncToIndex[
B.Func])));
2102 uint64_t LastId = It.getFirst();
2103 ContextNode *LastNode = getNodeForStackId(LastId);
2107 if (LastNode->Recursive)
2112 DenseSet<uint32_t> LastNodeContextIds = LastNode->getContextIds();
2120 DenseSet<const FuncTy *> MatchingIdsFuncSet;
2123 for (
unsigned I = 0;
I < Calls.size();
I++) {
2124 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[
I];
2125 assert(SavedContextIds.empty());
2126 assert(LastId == Ids.back());
2131 if (
I > 0 && Ids != Calls[
I - 1].StackIds)
2132 MatchingIdsFuncSet.
clear();
2139 DenseSet<uint32_t> StackSequenceContextIds = LastNodeContextIds;
2141 ContextNode *PrevNode = LastNode;
2142 ContextNode *CurNode = LastNode;
2147 for (
auto IdIter = Ids.rbegin() + 1; IdIter != Ids.rend(); IdIter++) {
2149 CurNode = getNodeForStackId(Id);
2153 if (CurNode->Recursive) {
2158 auto *
Edge = CurNode->findEdgeFromCaller(PrevNode);
2179 if (StackSequenceContextIds.
empty()) {
2192 if (Ids.back() != getLastStackId(
Call)) {
2193 for (
const auto &PE : LastNode->CallerEdges) {
2194 set_subtract(StackSequenceContextIds, PE->getContextIds());
2195 if (StackSequenceContextIds.
empty())
2199 if (StackSequenceContextIds.
empty())
2211 MatchingIdsFuncSet.
insert(Func);
2218 bool DuplicateContextIds =
false;
2219 for (
unsigned J =
I + 1; J < Calls.size(); J++) {
2220 auto &CallCtxInfo = Calls[J];
2221 auto &NextIds = CallCtxInfo.StackIds;
2224 auto *NextFunc = CallCtxInfo.Func;
2225 if (NextFunc != Func) {
2228 DuplicateContextIds =
true;
2231 auto &NextCall = CallCtxInfo.Call;
2232 CallToMatchingCall[NextCall] =
Call;
2243 OldToNewContextIds.
reserve(OldToNewContextIds.
size() +
2244 StackSequenceContextIds.
size());
2247 ? duplicateContextIds(StackSequenceContextIds, OldToNewContextIds)
2248 : StackSequenceContextIds;
2249 assert(!SavedContextIds.empty());
2251 if (!DuplicateContextIds) {
2255 set_subtract(LastNodeContextIds, StackSequenceContextIds);
2256 if (LastNodeContextIds.
empty())
2263 propagateDuplicateContextIds(OldToNewContextIds);
2273 DenseSet<const ContextNode *> Visited;
2275 ImportantContextIdInfo.keys());
2276 for (
auto &Entry : AllocationCallToContextNodeMap)
2277 assignStackNodesPostOrder(
Entry.second, Visited, StackIdToMatchingCalls,
2278 CallToMatchingCall, ImportantContextIds);
2280 fixupImportantContexts();
2286uint64_t ModuleCallsiteContextGraph::getLastStackId(Instruction *
Call) {
2287 CallStack<MDNode, MDNode::op_iterator> CallsiteContext(
2289 return CallsiteContext.
back();
2292uint64_t IndexCallsiteContextGraph::getLastStackId(IndexCall &
Call) {
2294 CallStack<CallsiteInfo, SmallVector<unsigned>::const_iterator>
2297 return Index.getStackIdAtIndex(CallsiteContext.
back());
2319 auto Pos =
F.getName().find_last_of(
'.');
2322 bool Err =
F.getName().drop_front(Pos + 1).getAsInteger(10, CloneNo);
2328std::string ModuleCallsiteContextGraph::getLabel(
const Function *Func,
2329 const Instruction *
Call,
2330 unsigned CloneNo)
const {
2336std::string IndexCallsiteContextGraph::getLabel(
const FunctionSummary *Func,
2337 const IndexCall &
Call,
2338 unsigned CloneNo)
const {
2339 auto VI = FSToVIMap.find(Func);
2340 assert(VI != FSToVIMap.end());
2343 return CallerName +
" -> alloc";
2346 return CallerName +
" -> " +
2348 Callsite->Clones[CloneNo]);
2352std::vector<uint64_t>
2353ModuleCallsiteContextGraph::getStackIdsWithContextNodesForCall(
2354 Instruction *
Call) {
2355 CallStack<MDNode, MDNode::op_iterator> CallsiteContext(
2357 return getStackIdsWithContextNodes<MDNode, MDNode::op_iterator>(
2361std::vector<uint64_t>
2362IndexCallsiteContextGraph::getStackIdsWithContextNodesForCall(IndexCall &
Call) {
2364 CallStack<CallsiteInfo, SmallVector<unsigned>::const_iterator>
2366 return getStackIdsWithContextNodes<CallsiteInfo,
2367 SmallVector<unsigned>::const_iterator>(
2371template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2372template <
class NodeT,
class IteratorT>
2373std::vector<uint64_t>
2374CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getStackIdsWithContextNodes(
2375 CallStack<NodeT, IteratorT> &CallsiteContext) {
2376 std::vector<uint64_t> StackIds;
2377 for (
auto IdOrIndex : CallsiteContext) {
2378 auto StackId = getStackId(IdOrIndex);
2379 ContextNode *
Node = getNodeForStackId(StackId);
2382 StackIds.push_back(StackId);
2387ModuleCallsiteContextGraph::ModuleCallsiteContextGraph(
2389 llvm::function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter)
2390 :
Mod(
M), OREGetter(OREGetter) {
2394 std::map<uint64_t, uint32_t> TotalSizeToContextIdTopNCold;
2396 std::vector<CallInfo> CallsWithMetadata;
2397 for (
auto &BB :
F) {
2398 for (
auto &
I : BB) {
2401 if (
auto *MemProfMD =
I.getMetadata(LLVMContext::MD_memprof)) {
2402 CallsWithMetadata.push_back(&
I);
2403 auto *AllocNode = addAllocNode(&
I, &
F);
2404 auto *CallsiteMD =
I.getMetadata(LLVMContext::MD_callsite);
2408 for (
auto &MDOp : MemProfMD->operands()) {
2410 std::vector<ContextTotalSize> ContextSizeInfo;
2412 if (MIBMD->getNumOperands() > 2) {
2413 for (
unsigned I = 2;
I < MIBMD->getNumOperands();
I++) {
2414 MDNode *ContextSizePair =
2423 ContextSizeInfo.push_back({FullStackId, TotalSize});
2429 addStackNodesForMIB<MDNode, MDNode::op_iterator>(
2430 AllocNode, StackContext, CallsiteContext,
2432 TotalSizeToContextIdTopNCold);
2437 DotAllocContextIds = AllocNode->getContextIds();
2441 I.setMetadata(LLVMContext::MD_memprof,
nullptr);
2442 I.setMetadata(LLVMContext::MD_callsite,
nullptr);
2445 else if (
I.getMetadata(LLVMContext::MD_callsite)) {
2446 CallsWithMetadata.push_back(&
I);
2450 if (!CallsWithMetadata.empty())
2451 FuncToCallsWithMetadata[&
F] = CallsWithMetadata;
2455 dbgs() <<
"CCG before updating call stack chains:\n";
2460 exportToDot(
"prestackupdate");
2465 exportToDot(
"poststackupdate");
2467 handleCallsitesWithMultipleTargets();
2472 for (
auto &FuncEntry : FuncToCallsWithMetadata)
2473 for (
auto &
Call : FuncEntry.second)
2474 Call.call()->setMetadata(LLVMContext::MD_callsite,
nullptr);
2480IndexCallsiteContextGraph::findAliaseeGUIDsPrevailingInDifferentModule() {
2481 DenseSet<GlobalValue::GUID> AliaseeGUIDs;
2482 for (
auto &
I : Index) {
2484 for (
auto &S :
VI.getSummaryList()) {
2489 auto *AliaseeSummary = &AS->getAliasee();
2497 !isPrevailing(
VI.getGUID(), S.get()))
2502 auto AliaseeGUID = AS->getAliaseeGUID();
2504 if (!isPrevailing(AliaseeGUID, AliaseeSummary))
2505 AliaseeGUIDs.
insert(AliaseeGUID);
2508 AliaseesPrevailingInDiffModuleFromAlias += AliaseeGUIDs.
size();
2509 return AliaseeGUIDs;
2512IndexCallsiteContextGraph::IndexCallsiteContextGraph(
2513 ModuleSummaryIndex &Index,
2523 findAliaseeGUIDsPrevailingInDifferentModule();
2527 std::map<uint64_t, uint32_t> TotalSizeToContextIdTopNCold;
2528 for (
auto &
I : Index) {
2529 auto VI = Index.getValueInfo(
I);
2530 if (GUIDsToSkip.
contains(VI.getGUID()))
2532 for (
auto &S : VI.getSummaryList()) {
2541 !isPrevailing(VI.getGUID(), S.get()))
2546 std::vector<CallInfo> CallsWithMetadata;
2547 if (!
FS->allocs().empty()) {
2548 for (
auto &AN :
FS->mutableAllocs()) {
2553 if (AN.MIBs.empty())
2555 IndexCall AllocCall(&AN);
2556 CallsWithMetadata.push_back(AllocCall);
2557 auto *AllocNode = addAllocNode(AllocCall, FS);
2565 AN.ContextSizeInfos.size() == AN.MIBs.size());
2567 for (
auto &MIB : AN.MIBs) {
2570 std::vector<ContextTotalSize> ContextSizeInfo;
2571 if (!AN.ContextSizeInfos.empty()) {
2572 for (
auto [FullStackId, TotalSize] : AN.ContextSizeInfos[
I])
2573 ContextSizeInfo.push_back({FullStackId, TotalSize});
2575 addStackNodesForMIB<MIBInfo, SmallVector<unsigned>::const_iterator>(
2576 AllocNode, StackContext, EmptyContext, MIB.AllocType,
2577 ContextSizeInfo, TotalSizeToContextIdTopNCold);
2583 DotAllocContextIds = AllocNode->getContextIds();
2589 AN.Versions[0] = (
uint8_t)allocTypeToUse(AllocNode->AllocTypes);
2593 if (!
FS->callsites().empty())
2594 for (
auto &SN :
FS->mutableCallsites()) {
2595 IndexCall StackNodeCall(&SN);
2596 CallsWithMetadata.push_back(StackNodeCall);
2599 if (!CallsWithMetadata.empty())
2600 FuncToCallsWithMetadata[
FS] = CallsWithMetadata;
2602 if (!
FS->allocs().empty() || !
FS->callsites().empty())
2608 dbgs() <<
"CCG before updating call stack chains:\n";
2613 exportToDot(
"prestackupdate");
2618 exportToDot(
"poststackupdate");
2620 handleCallsitesWithMultipleTargets();
2625template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2626void CallsiteContextGraph<DerivedCCG, FuncTy,
2627 CallTy>::handleCallsitesWithMultipleTargets() {
2642 std::vector<std::pair<CallInfo, ContextNode *>> NewCallToNode;
2643 for (
auto &Entry : NonAllocationCallToContextNodeMap) {
2644 auto *
Node = Entry.second;
2653 std::vector<CallInfo> AllCalls;
2654 AllCalls.reserve(
Node->MatchingCalls.size() + 1);
2655 AllCalls.push_back(
Node->Call);
2669 if (partitionCallsByCallee(
Node, AllCalls, NewCallToNode))
2672 auto It = AllCalls.begin();
2674 for (; It != AllCalls.end(); ++It) {
2677 for (
auto EI =
Node->CalleeEdges.begin(); EI !=
Node->CalleeEdges.end();
2680 if (!Edge->Callee->hasCall())
2682 assert(NodeToCallingFunc.count(Edge->Callee));
2684 if (!calleesMatch(
ThisCall.call(), EI, TailCallToContextNodeMap)) {
2693 if (
Node->Call != ThisCall) {
2694 Node->setCall(ThisCall);
2705 Node->MatchingCalls.clear();
2708 if (It == AllCalls.end()) {
2709 RemovedEdgesWithMismatchedCallees++;
2713 Node->setCall(CallInfo());
2718 for (++It; It != AllCalls.end(); ++It) {
2722 Node->MatchingCalls.push_back(ThisCall);
2731 NonAllocationCallToContextNodeMap.remove_if([](
const auto &it) {
2732 return !it.second->hasCall() || it.second->Call != it.first;
2736 for (
auto &[
Call,
Node] : NewCallToNode)
2737 NonAllocationCallToContextNodeMap[
Call] =
Node;
2741 for (
auto &[
Call,
Node] : TailCallToContextNodeMap)
2742 NonAllocationCallToContextNodeMap[
Call] =
Node;
2745template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2746bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::partitionCallsByCallee(
2748 std::vector<std::pair<CallInfo, ContextNode *>> &NewCallToNode) {
2752 struct CallsWithSameCallee {
2753 std::vector<CallInfo> Calls;
2754 ContextNode *
Node =
nullptr;
2760 for (
auto ThisCall : AllCalls) {
2761 auto *
F = getCalleeFunc(
ThisCall.call());
2763 CalleeFuncToCallInfo[
F].Calls.push_back(ThisCall);
2772 for (
const auto &Edge :
Node->CalleeEdges) {
2773 if (!Edge->Callee->hasCall())
2775 const FuncTy *ProfiledCalleeFunc = NodeToCallingFunc[Edge->Callee];
2776 if (CalleeFuncToCallInfo.
contains(ProfiledCalleeFunc))
2777 CalleeNodeToCallInfo[Edge->Callee] =
2778 &CalleeFuncToCallInfo[ProfiledCalleeFunc];
2784 if (CalleeNodeToCallInfo.
empty())
2796 ContextNode *UnmatchedCalleesNode =
nullptr;
2798 bool UsedOrigNode =
false;
2803 auto CalleeEdges =
Node->CalleeEdges;
2804 for (
auto &Edge : CalleeEdges) {
2805 if (!Edge->Callee->hasCall())
2810 ContextNode *CallerNodeToUse =
nullptr;
2814 if (!CalleeNodeToCallInfo.
contains(Edge->Callee)) {
2815 if (!UnmatchedCalleesNode)
2816 UnmatchedCalleesNode =
2817 createNewNode(
false, NodeToCallingFunc[
Node]);
2818 CallerNodeToUse = UnmatchedCalleesNode;
2822 auto *Info = CalleeNodeToCallInfo[Edge->Callee];
2825 if (!UsedOrigNode) {
2828 Node->MatchingCalls.clear();
2829 UsedOrigNode =
true;
2832 createNewNode(
false, NodeToCallingFunc[
Node]);
2833 assert(!Info->Calls.empty());
2836 Info->Node->setCall(Info->Calls.front());
2842 NewCallToNode.push_back({Info->Node->Call, Info->Node});
2844 CallerNodeToUse = Info->Node;
2848 if (CallerNodeToUse ==
Node)
2851 moveCalleeEdgeToNewCaller(Edge, CallerNodeToUse);
2858 for (
auto &
I : CalleeNodeToCallInfo)
2859 removeNoneTypeCallerEdges(
I.second->Node);
2860 if (UnmatchedCalleesNode)
2861 removeNoneTypeCallerEdges(UnmatchedCalleesNode);
2862 removeNoneTypeCallerEdges(
Node);
2872uint64_t IndexCallsiteContextGraph::getStackId(uint64_t IdOrIndex)
const {
2875 return Index.getStackIdAtIndex(IdOrIndex);
2878template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2879bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::calleesMatch(
2880 CallTy
Call, EdgeIter &EI,
2881 MapVector<CallInfo, ContextNode *> &TailCallToContextNodeMap) {
2883 const FuncTy *ProfiledCalleeFunc = NodeToCallingFunc[
Edge->Callee];
2884 const FuncTy *CallerFunc = NodeToCallingFunc[
Edge->Caller];
2887 std::vector<std::pair<CallTy, FuncTy *>> FoundCalleeChain;
2888 if (!calleeMatchesFunc(
Call, ProfiledCalleeFunc, CallerFunc,
2893 if (FoundCalleeChain.empty())
2897 auto *CurEdge =
Callee->findEdgeFromCaller(Caller);
2901 CurEdge->ContextIds.insert_range(
Edge->ContextIds);
2902 CurEdge->AllocTypes |=
Edge->AllocTypes;
2907 auto NewEdge = std::make_shared<ContextEdge>(
2908 Callee, Caller,
Edge->AllocTypes,
Edge->ContextIds);
2909 Callee->CallerEdges.push_back(NewEdge);
2910 if (Caller ==
Edge->Caller) {
2914 EI =
Caller->CalleeEdges.insert(EI, NewEdge);
2917 "Iterator position not restored after insert and increment");
2919 Caller->CalleeEdges.push_back(NewEdge);
2924 auto *CurCalleeNode =
Edge->Callee;
2925 for (
auto &[NewCall, Func] : FoundCalleeChain) {
2926 ContextNode *NewNode =
nullptr;
2928 if (TailCallToContextNodeMap.
count(NewCall)) {
2929 NewNode = TailCallToContextNodeMap[NewCall];
2930 NewNode->AllocTypes |=
Edge->AllocTypes;
2932 FuncToCallsWithMetadata[
Func].push_back({NewCall});
2934 NewNode = createNewNode(
false, Func, NewCall);
2935 TailCallToContextNodeMap[NewCall] = NewNode;
2936 NewNode->AllocTypes =
Edge->AllocTypes;
2940 AddEdge(NewNode, CurCalleeNode);
2942 CurCalleeNode = NewNode;
2946 AddEdge(
Edge->Caller, CurCalleeNode);
2954 removeEdgeFromGraph(
Edge.get(), &EI,
true);
2966bool ModuleCallsiteContextGraph::findProfiledCalleeThroughTailCalls(
2967 const Function *ProfiledCallee,
Value *CurCallee,
unsigned Depth,
2968 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain,
2969 bool &FoundMultipleCalleeChains) {
2976 FoundCalleeChain.push_back({Callsite,
F});
2991 bool FoundSingleCalleeChain =
false;
2992 for (
auto &BB : *CalleeFunc) {
2993 for (
auto &
I : BB) {
2995 if (!CB || !CB->isTailCall())
2997 auto *CalledValue = CB->getCalledOperand();
2998 auto *CalledFunction = CB->getCalledFunction();
2999 if (CalledValue && !CalledFunction) {
3000 CalledValue = CalledValue->stripPointerCasts();
3007 assert(!CalledFunction &&
3008 "Expected null called function in callsite for alias");
3011 if (!CalledFunction)
3013 if (CalledFunction == ProfiledCallee) {
3014 if (FoundSingleCalleeChain) {
3015 FoundMultipleCalleeChains =
true;
3018 FoundSingleCalleeChain =
true;
3019 FoundProfiledCalleeCount++;
3020 FoundProfiledCalleeDepth +=
Depth;
3021 if (
Depth > FoundProfiledCalleeMaxDepth)
3022 FoundProfiledCalleeMaxDepth =
Depth;
3023 SaveCallsiteInfo(&
I, CalleeFunc);
3024 }
else if (findProfiledCalleeThroughTailCalls(
3025 ProfiledCallee, CalledFunction,
Depth + 1,
3026 FoundCalleeChain, FoundMultipleCalleeChains)) {
3029 assert(!FoundMultipleCalleeChains);
3030 if (FoundSingleCalleeChain) {
3031 FoundMultipleCalleeChains =
true;
3034 FoundSingleCalleeChain =
true;
3035 SaveCallsiteInfo(&
I, CalleeFunc);
3036 }
else if (FoundMultipleCalleeChains)
3041 return FoundSingleCalleeChain;
3044const Function *ModuleCallsiteContextGraph::getCalleeFunc(Instruction *
Call) {
3046 if (!CB->getCalledOperand() || CB->isIndirectCall())
3048 auto *CalleeVal = CB->getCalledOperand()->stripPointerCasts();
3055bool ModuleCallsiteContextGraph::calleeMatchesFunc(
3056 Instruction *
Call,
const Function *Func,
const Function *CallerFunc,
3057 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain) {
3059 if (!CB->getCalledOperand() || CB->isIndirectCall())
3061 auto *CalleeVal = CB->getCalledOperand()->stripPointerCasts();
3063 if (CalleeFunc == Func)
3066 if (Alias && Alias->getAliasee() == Func)
3077 bool FoundMultipleCalleeChains =
false;
3078 if (!findProfiledCalleeThroughTailCalls(Func, CalleeVal,
Depth,
3080 FoundMultipleCalleeChains)) {
3081 LLVM_DEBUG(
dbgs() <<
"Not found through unique tail call chain: "
3082 <<
Func->getName() <<
" from " << CallerFunc->
getName()
3083 <<
" that actually called " << CalleeVal->getName()
3084 << (FoundMultipleCalleeChains
3085 ?
" (found multiple possible chains)"
3088 if (FoundMultipleCalleeChains)
3089 FoundProfiledCalleeNonUniquelyCount++;
3096bool ModuleCallsiteContextGraph::sameCallee(Instruction *Call1,
3097 Instruction *Call2) {
3099 if (!CB1->getCalledOperand() || CB1->isIndirectCall())
3101 auto *CalleeVal1 = CB1->getCalledOperand()->stripPointerCasts();
3104 if (!CB2->getCalledOperand() || CB2->isIndirectCall())
3106 auto *CalleeVal2 = CB2->getCalledOperand()->stripPointerCasts();
3108 return CalleeFunc1 == CalleeFunc2;
3111bool IndexCallsiteContextGraph::findProfiledCalleeThroughTailCalls(
3112 ValueInfo ProfiledCallee, ValueInfo CurCallee,
unsigned Depth,
3113 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain,
3114 bool &FoundMultipleCalleeChains) {
3120 auto CreateAndSaveCallsiteInfo = [&](ValueInfo
Callee, FunctionSummary *
FS) {
3123 if (!FunctionCalleesToSynthesizedCallsiteInfos.count(FS) ||
3124 !FunctionCalleesToSynthesizedCallsiteInfos[FS].count(Callee))
3127 FunctionCalleesToSynthesizedCallsiteInfos[
FS][
Callee] =
3128 std::make_unique<CallsiteInfo>(Callee, SmallVector<unsigned>());
3129 CallsiteInfo *NewCallsiteInfo =
3130 FunctionCalleesToSynthesizedCallsiteInfos[
FS][
Callee].get();
3131 FoundCalleeChain.push_back({NewCallsiteInfo,
FS});
3138 bool FoundSingleCalleeChain =
false;
3141 !isPrevailing(CurCallee.
getGUID(), S.get()))
3146 auto FSVI = CurCallee;
3149 FSVI = AS->getAliaseeVI();
3150 for (
auto &CallEdge :
FS->calls()) {
3151 if (!CallEdge.second.hasTailCall())
3153 if (CallEdge.first == ProfiledCallee) {
3154 if (FoundSingleCalleeChain) {
3155 FoundMultipleCalleeChains =
true;
3158 FoundSingleCalleeChain =
true;
3159 FoundProfiledCalleeCount++;
3160 FoundProfiledCalleeDepth +=
Depth;
3161 if (
Depth > FoundProfiledCalleeMaxDepth)
3162 FoundProfiledCalleeMaxDepth =
Depth;
3163 CreateAndSaveCallsiteInfo(CallEdge.first, FS);
3165 assert(!FSToVIMap.count(FS) || FSToVIMap[FS] == FSVI);
3166 FSToVIMap[
FS] = FSVI;
3167 }
else if (findProfiledCalleeThroughTailCalls(
3168 ProfiledCallee, CallEdge.first,
Depth + 1,
3169 FoundCalleeChain, FoundMultipleCalleeChains)) {
3172 assert(!FoundMultipleCalleeChains);
3173 if (FoundSingleCalleeChain) {
3174 FoundMultipleCalleeChains =
true;
3177 FoundSingleCalleeChain =
true;
3178 CreateAndSaveCallsiteInfo(CallEdge.first, FS);
3180 assert(!FSToVIMap.count(FS) || FSToVIMap[FS] == FSVI);
3181 FSToVIMap[
FS] = FSVI;
3182 }
else if (FoundMultipleCalleeChains)
3187 return FoundSingleCalleeChain;
3190const FunctionSummary *
3191IndexCallsiteContextGraph::getCalleeFunc(IndexCall &
Call) {
3193 if (
Callee.getSummaryList().empty())
3198bool IndexCallsiteContextGraph::calleeMatchesFunc(
3199 IndexCall &
Call,
const FunctionSummary *Func,
3200 const FunctionSummary *CallerFunc,
3201 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain) {
3205 AliasSummary *Alias =
3206 Callee.getSummaryList().empty()
3209 assert(FSToVIMap.count(Func));
3210 auto FuncVI = FSToVIMap[
Func];
3211 if (Callee == FuncVI ||
3226 bool FoundMultipleCalleeChains =
false;
3227 if (!findProfiledCalleeThroughTailCalls(
3228 FuncVI, Callee,
Depth, FoundCalleeChain, FoundMultipleCalleeChains)) {
3229 LLVM_DEBUG(
dbgs() <<
"Not found through unique tail call chain: " << FuncVI
3230 <<
" from " << FSToVIMap[CallerFunc]
3231 <<
" that actually called " << Callee
3232 << (FoundMultipleCalleeChains
3233 ?
" (found multiple possible chains)"
3236 if (FoundMultipleCalleeChains)
3237 FoundProfiledCalleeNonUniquelyCount++;
3244bool IndexCallsiteContextGraph::sameCallee(IndexCall &Call1, IndexCall &Call2) {
3247 return Callee1 == Callee2;
3250template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3251void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::dump()
3257template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3258void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::print(
3259 raw_ostream &OS)
const {
3260 OS <<
"Node " <<
this <<
"\n";
3264 OS <<
" (recursive)";
3266 if (!MatchingCalls.empty()) {
3267 OS <<
"\tMatchingCalls:\n";
3268 for (
auto &MatchingCall : MatchingCalls) {
3270 MatchingCall.print(OS);
3274 OS <<
"\tNodeId: " <<
NodeId <<
"\n";
3276 OS <<
"\tContextIds:";
3278 auto ContextIds = getContextIds();
3279 std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
3280 std::sort(SortedIds.begin(), SortedIds.end());
3281 for (
auto Id : SortedIds)
3284 OS <<
"\tCalleeEdges:\n";
3285 for (
auto &
Edge : CalleeEdges)
3286 OS <<
"\t\t" << *
Edge <<
" (Callee NodeId: " <<
Edge->Callee->NodeId
3288 OS <<
"\tCallerEdges:\n";
3289 for (
auto &
Edge : CallerEdges)
3290 OS <<
"\t\t" << *
Edge <<
" (Caller NodeId: " <<
Edge->Caller->NodeId
3292 if (!Clones.empty()) {
3295 for (
auto *
C : Clones)
3296 OS <<
LS <<
C <<
" NodeId: " <<
C->NodeId;
3298 }
else if (CloneOf) {
3299 OS <<
"\tClone of " << CloneOf <<
" NodeId: " << CloneOf->NodeId <<
"\n";
3303template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3304void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::dump()
3310template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3311void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::print(
3312 raw_ostream &OS)
const {
3313 OS <<
"Edge from Callee " <<
Callee <<
" to Caller: " <<
Caller
3314 << (IsBackedge ?
" (BE)" :
"")
3316 OS <<
" ContextIds:";
3317 std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
3318 std::sort(SortedIds.begin(), SortedIds.end());
3319 for (
auto Id : SortedIds)
3323template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3324void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::dump()
const {
3328template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3329void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::print(
3330 raw_ostream &OS)
const {
3331 OS <<
"Callsite Context Graph:\n";
3332 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3334 if (
Node->isRemoved())
3341template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3342void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::printTotalSizes(
3344 function_ref<
void(StringRef, StringRef,
const Twine &)> EmitRemark)
const {
3345 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3347 if (
Node->isRemoved())
3349 if (!
Node->IsAllocation)
3351 DenseSet<uint32_t> ContextIds =
Node->getContextIds();
3352 auto AllocTypeFromCall = getAllocationCallType(
Node->Call);
3353 std::vector<uint32_t> SortedIds(ContextIds.
begin(), ContextIds.
end());
3354 std::sort(SortedIds.begin(), SortedIds.end());
3355 for (
auto Id : SortedIds) {
3356 auto TypeI = ContextIdToAllocationType.find(Id);
3357 assert(TypeI != ContextIdToAllocationType.end());
3358 auto CSI = ContextIdToContextSizeInfos.find(Id);
3359 if (CSI != ContextIdToContextSizeInfos.end()) {
3360 for (
auto &Info : CSI->second) {
3363 " full allocation context " + std::to_string(
Info.FullStackId) +
3364 " with total size " + std::to_string(
Info.TotalSize) +
" is " +
3366 if (allocTypeToUse(
Node->AllocTypes) != AllocTypeFromCall)
3368 " due to cold byte percent";
3370 Msg +=
" (internal context id " + std::to_string(Id) +
")";
3374 EmitRemark(
DEBUG_TYPE,
"MemProfReport", Msg);
3382 if (allocTypeToUse(
Node->AllocTypes) != AllocTypeFromCall)
3384 " due to cold byte percent";
3386 Msg +=
" (internal context id " + std::to_string(Id) +
")";
3390 EmitRemark(
DEBUG_TYPE,
"MemProfReport", Msg);
3396template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3397void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::check()
const {
3398 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3401 for (
auto &
Edge :
Node->CallerEdges)
3406template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3408 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3409 using NodeRef =
const ContextNode<DerivedCCG, FuncTy, CallTy> *;
3411 using NodePtrTy = std::unique_ptr<ContextNode<DerivedCCG, FuncTy, CallTy>>;
3427 return G->NodeOwner.begin()->get();
3430 using EdgePtrTy = std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>;
3431 static const ContextNode<DerivedCCG, FuncTy, CallTy> *
3450template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3464 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3470 std::string LabelString =
3471 (
Twine(
"OrigId: ") + (
Node->IsAllocation ?
"Alloc" :
"") +
3474 LabelString +=
"\n";
3475 if (
Node->hasCall()) {
3476 auto Func =
G->NodeToCallingFunc.find(
Node);
3477 assert(Func !=
G->NodeToCallingFunc.end());
3479 G->getLabel(Func->second,
Node->Call.call(),
Node->Call.cloneNo());
3480 for (
auto &MatchingCall :
Node->MatchingCalls) {
3481 LabelString +=
"\n";
3482 LabelString +=
G->getLabel(Func->second, MatchingCall.call(),
3483 MatchingCall.cloneNo());
3486 LabelString +=
"null call";
3487 if (
Node->Recursive)
3488 LabelString +=
" (recursive)";
3490 LabelString +=
" (external)";
3496 auto ContextIds =
Node->getContextIds();
3500 bool Highlight =
false;
3509 std::string AttributeString = (
Twine(
"tooltip=\"") + getNodeId(
Node) +
" " +
3510 getContextIds(ContextIds) +
"\"")
3514 AttributeString +=
",fontsize=\"30\"";
3516 (
Twine(
",fillcolor=\"") + getColor(
Node->AllocTypes, Highlight) +
"\"")
3518 if (
Node->CloneOf) {
3519 AttributeString +=
",color=\"blue\"";
3520 AttributeString +=
",style=\"filled,bold,dashed\"";
3522 AttributeString +=
",style=\"filled\"";
3523 return AttributeString;
3528 auto &Edge = *(ChildIter.getCurrent());
3533 bool Highlight =
false;
3542 auto Color = getColor(Edge->AllocTypes, Highlight);
3543 std::string AttributeString =
3544 (
Twine(
"tooltip=\"") + getContextIds(Edge->ContextIds) +
"\"" +
3546 Twine(
",fillcolor=\"") + Color +
"\"" +
Twine(
",color=\"") + Color +
3549 if (Edge->IsBackedge)
3550 AttributeString +=
",style=\"dotted\"";
3553 AttributeString +=
",penwidth=\"2.0\",weight=\"2\"";
3554 return AttributeString;
3560 if (
Node->isRemoved())
3573 std::string IdString =
"ContextIds:";
3574 if (ContextIds.
size() < 100) {
3575 std::vector<uint32_t> SortedIds(ContextIds.
begin(), ContextIds.
end());
3576 std::sort(SortedIds.begin(), SortedIds.end());
3577 for (
auto Id : SortedIds)
3578 IdString += (
" " +
Twine(Id)).str();
3580 IdString += (
" (" + Twine(ContextIds.
size()) +
" ids)").str();
3585 static std::string getColor(uint8_t AllocTypes,
bool Highlight) {
3591 if (AllocTypes == (uint8_t)AllocationType::NotCold)
3593 return !
DoHighlight || Highlight ?
"brown1" :
"lightpink";
3594 if (AllocTypes == (uint8_t)AllocationType::Cold)
3595 return !
DoHighlight || Highlight ?
"cyan" :
"lightskyblue";
3597 ((uint8_t)AllocationType::NotCold | (uint8_t)AllocationType::Cold))
3598 return Highlight ?
"magenta" :
"mediumorchid1";
3602 static std::string getNodeId(NodeRef Node) {
3603 std::stringstream SStream;
3604 SStream << std::hex <<
"N0x" << (
unsigned long long)Node;
3605 std::string
Result = SStream.str();
3614template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3619template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3620void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::exportToDot(
3621 std::string Label)
const {
3626template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3627typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
3628CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::moveEdgeToNewCalleeClone(
3629 const std::shared_ptr<ContextEdge> &
Edge,
3630 DenseSet<uint32_t> ContextIdsToMove) {
3632 assert(NodeToCallingFunc.count(Node));
3633 ContextNode *Clone =
3634 createNewNode(
Node->IsAllocation, NodeToCallingFunc[Node],
Node->Call);
3635 Node->addClone(Clone);
3636 Clone->MatchingCalls =
Node->MatchingCalls;
3637 moveEdgeToExistingCalleeClone(
Edge, Clone,
true,
3642template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3643void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
3644 moveEdgeToExistingCalleeClone(
const std::shared_ptr<ContextEdge> &
Edge,
3645 ContextNode *NewCallee,
bool NewClone,
3646 DenseSet<uint32_t> ContextIdsToMove) {
3649 assert(NewCallee->getOrigNode() ==
Edge->Callee->getOrigNode());
3651 bool EdgeIsRecursive =
Edge->Callee ==
Edge->Caller;
3653 ContextNode *OldCallee =
Edge->Callee;
3657 auto ExistingEdgeToNewCallee = NewCallee->findEdgeFromCaller(
Edge->Caller);
3661 if (ContextIdsToMove.
empty())
3662 ContextIdsToMove =
Edge->getContextIds();
3666 if (
Edge->getContextIds().size() == ContextIdsToMove.
size()) {
3669 NewCallee->AllocTypes |=
Edge->AllocTypes;
3671 if (ExistingEdgeToNewCallee) {
3674 ExistingEdgeToNewCallee->getContextIds().insert_range(ContextIdsToMove);
3675 ExistingEdgeToNewCallee->AllocTypes |=
Edge->AllocTypes;
3676 assert(
Edge->ContextIds == ContextIdsToMove);
3677 removeEdgeFromGraph(
Edge.get());
3680 Edge->Callee = NewCallee;
3681 NewCallee->CallerEdges.push_back(
Edge);
3683 OldCallee->eraseCallerEdge(
Edge.get());
3690 auto CallerEdgeAllocType = computeAllocType(ContextIdsToMove);
3691 if (ExistingEdgeToNewCallee) {
3694 ExistingEdgeToNewCallee->getContextIds().insert_range(ContextIdsToMove);
3695 ExistingEdgeToNewCallee->AllocTypes |= CallerEdgeAllocType;
3698 auto NewEdge = std::make_shared<ContextEdge>(
3699 NewCallee,
Edge->Caller, CallerEdgeAllocType, ContextIdsToMove);
3700 Edge->Caller->CalleeEdges.push_back(NewEdge);
3701 NewCallee->CallerEdges.push_back(NewEdge);
3705 NewCallee->AllocTypes |= CallerEdgeAllocType;
3707 Edge->AllocTypes = computeAllocType(
Edge->ContextIds);
3712 for (
auto &OldCalleeEdge : OldCallee->CalleeEdges) {
3713 ContextNode *CalleeToUse = OldCalleeEdge->Callee;
3717 if (CalleeToUse == OldCallee) {
3721 if (EdgeIsRecursive) {
3725 CalleeToUse = NewCallee;
3729 DenseSet<uint32_t> EdgeContextIdsToMove =
3731 set_subtract(OldCalleeEdge->getContextIds(), EdgeContextIdsToMove);
3732 OldCalleeEdge->AllocTypes =
3733 computeAllocType(OldCalleeEdge->getContextIds());
3740 if (
auto *NewCalleeEdge = NewCallee->findEdgeFromCallee(CalleeToUse)) {
3741 NewCalleeEdge->getContextIds().insert_range(EdgeContextIdsToMove);
3742 NewCalleeEdge->AllocTypes |= computeAllocType(EdgeContextIdsToMove);
3746 auto NewEdge = std::make_shared<ContextEdge>(
3747 CalleeToUse, NewCallee, computeAllocType(EdgeContextIdsToMove),
3748 EdgeContextIdsToMove);
3749 NewCallee->CalleeEdges.push_back(NewEdge);
3750 NewEdge->Callee->CallerEdges.push_back(NewEdge);
3754 OldCallee->AllocTypes = OldCallee->computeAllocType();
3756 assert((OldCallee->AllocTypes == (uint8_t)AllocationType::None) ==
3757 OldCallee->emptyContextIds());
3761 for (
const auto &OldCalleeEdge : OldCallee->CalleeEdges)
3764 for (
const auto &NewCalleeEdge : NewCallee->CalleeEdges)
3770template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3771void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
3772 moveCalleeEdgeToNewCaller(
const std::shared_ptr<ContextEdge> &
Edge,
3773 ContextNode *NewCaller) {
3774 auto *OldCallee =
Edge->Callee;
3775 auto *NewCallee = OldCallee;
3778 bool Recursive =
Edge->Caller ==
Edge->Callee;
3780 NewCallee = NewCaller;
3782 ContextNode *OldCaller =
Edge->Caller;
3783 OldCaller->eraseCalleeEdge(
Edge.get());
3787 auto ExistingEdgeToNewCaller = NewCaller->findEdgeFromCallee(NewCallee);
3789 if (ExistingEdgeToNewCaller) {
3792 ExistingEdgeToNewCaller->getContextIds().insert_range(
3793 Edge->getContextIds());
3794 ExistingEdgeToNewCaller->AllocTypes |=
Edge->AllocTypes;
3795 Edge->ContextIds.clear();
3796 Edge->AllocTypes = (uint8_t)AllocationType::None;
3797 OldCallee->eraseCallerEdge(
Edge.get());
3800 Edge->Caller = NewCaller;
3801 NewCaller->CalleeEdges.push_back(
Edge);
3803 assert(NewCallee == NewCaller);
3806 Edge->Callee = NewCallee;
3807 NewCallee->CallerEdges.push_back(
Edge);
3808 OldCallee->eraseCallerEdge(
Edge.get());
3814 NewCaller->AllocTypes |=
Edge->AllocTypes;
3821 bool IsNewNode = NewCaller->CallerEdges.empty();
3830 for (
auto &OldCallerEdge : OldCaller->CallerEdges) {
3831 auto OldCallerCaller = OldCallerEdge->Caller;
3835 OldCallerEdge->getContextIds(),
Edge->getContextIds());
3836 if (OldCaller == OldCallerCaller) {
3837 OldCallerCaller = NewCaller;
3843 set_subtract(OldCallerEdge->getContextIds(), EdgeContextIdsToMove);
3844 OldCallerEdge->AllocTypes =
3845 computeAllocType(OldCallerEdge->getContextIds());
3850 auto *ExistingCallerEdge = NewCaller->findEdgeFromCaller(OldCallerCaller);
3854 if (ExistingCallerEdge) {
3855 ExistingCallerEdge->getContextIds().insert_range(EdgeContextIdsToMove);
3856 ExistingCallerEdge->AllocTypes |=
3857 computeAllocType(EdgeContextIdsToMove);
3860 auto NewEdge = std::make_shared<ContextEdge>(
3861 NewCaller, OldCallerCaller, computeAllocType(EdgeContextIdsToMove),
3862 EdgeContextIdsToMove);
3863 NewCaller->CallerEdges.push_back(NewEdge);
3864 NewEdge->Caller->CalleeEdges.push_back(NewEdge);
3869 OldCaller->AllocTypes = OldCaller->computeAllocType();
3871 assert((OldCaller->AllocTypes == (uint8_t)AllocationType::None) ==
3872 OldCaller->emptyContextIds());
3876 for (
const auto &OldCallerEdge : OldCaller->CallerEdges)
3879 for (
const auto &NewCallerEdge : NewCaller->CallerEdges)
3885template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3886void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
3887 recursivelyRemoveNoneTypeCalleeEdges(
3888 ContextNode *Node, DenseSet<const ContextNode *> &Visited) {
3893 removeNoneTypeCalleeEdges(Node);
3895 for (
auto *Clone :
Node->Clones)
3896 recursivelyRemoveNoneTypeCalleeEdges(Clone, Visited);
3900 auto CallerEdges =
Node->CallerEdges;
3901 for (
auto &
Edge : CallerEdges) {
3903 if (
Edge->isRemoved()) {
3907 recursivelyRemoveNoneTypeCalleeEdges(
Edge->Caller, Visited);
3912template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3913void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::markBackedges() {
3918 DenseSet<const ContextNode *> Visited;
3919 DenseSet<const ContextNode *> CurrentStack;
3920 for (
auto &Entry : NonAllocationCallToContextNodeMap) {
3922 if (
Node->isRemoved())
3925 if (!
Node->CallerEdges.empty())
3927 markBackedges(Node, Visited, CurrentStack);
3933template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3934void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::markBackedges(
3935 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
3936 DenseSet<const ContextNode *> &CurrentStack) {
3937 auto I = Visited.
insert(Node);
3941 for (
auto &CalleeEdge :
Node->CalleeEdges) {
3942 auto *
Callee = CalleeEdge->Callee;
3943 if (Visited.
count(Callee)) {
3946 if (CurrentStack.
count(Callee))
3947 CalleeEdge->IsBackedge =
true;
3950 CurrentStack.
insert(Callee);
3951 markBackedges(Callee, Visited, CurrentStack);
3952 CurrentStack.
erase(Callee);
3956template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3957void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones() {
3958 DenseSet<const ContextNode *> Visited;
3959 for (
auto &Entry : AllocationCallToContextNodeMap) {
3961 identifyClones(
Entry.second, Visited,
Entry.second->getContextIds());
3964 for (
auto &Entry : AllocationCallToContextNodeMap)
3965 recursivelyRemoveNoneTypeCalleeEdges(
Entry.second, Visited);
3978template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3979void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones(
3980 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
3981 const DenseSet<uint32_t> &AllocContextIds) {
3991 if (!
Node->hasCall())
4010 auto CallerEdges =
Node->CallerEdges;
4011 for (
auto &
Edge : CallerEdges) {
4013 if (
Edge->isRemoved()) {
4019 if (
Edge->IsBackedge) {
4026 if (!Visited.
count(
Edge->Caller) && !
Edge->Caller->CloneOf) {
4027 identifyClones(
Edge->Caller, Visited, AllocContextIds);
4050 const unsigned AllocTypeCloningPriority[] = { 3, 4,
4054 [&](
const std::shared_ptr<ContextEdge> &
A,
4055 const std::shared_ptr<ContextEdge> &
B) {
4058 if (A->ContextIds.empty())
4064 if (B->ContextIds.empty())
4067 if (A->AllocTypes == B->AllocTypes)
4070 return *A->ContextIds.begin() < *B->ContextIds.begin();
4071 return AllocTypeCloningPriority[A->AllocTypes] <
4072 AllocTypeCloningPriority[B->AllocTypes];
4075 assert(
Node->AllocTypes != (uint8_t)AllocationType::None);
4077 DenseSet<uint32_t> RecursiveContextIds;
4082 DenseSet<uint32_t> AllCallerContextIds;
4083 for (
auto &CE :
Node->CallerEdges) {
4086 AllCallerContextIds.
reserve(
CE->getContextIds().size());
4087 for (
auto Id :
CE->getContextIds())
4088 if (!AllCallerContextIds.
insert(Id).second)
4089 RecursiveContextIds.
insert(Id);
4099 auto CallerEdges =
Node->CallerEdges;
4100 for (
auto &CallerEdge : CallerEdges) {
4102 if (CallerEdge->isRemoved()) {
4106 assert(CallerEdge->Callee == Node);
4115 if (!CallerEdge->Caller->hasCall())
4120 auto CallerEdgeContextsForAlloc =
4122 if (!RecursiveContextIds.
empty())
4123 CallerEdgeContextsForAlloc =
4125 if (CallerEdgeContextsForAlloc.empty())
4128 auto CallerAllocTypeForAlloc = computeAllocType(CallerEdgeContextsForAlloc);
4132 std::vector<uint8_t> CalleeEdgeAllocTypesForCallerEdge;
4133 CalleeEdgeAllocTypesForCallerEdge.reserve(
Node->CalleeEdges.size());
4134 for (
auto &CalleeEdge :
Node->CalleeEdges)
4135 CalleeEdgeAllocTypesForCallerEdge.push_back(intersectAllocTypes(
4136 CalleeEdge->getContextIds(), CallerEdgeContextsForAlloc));
4152 assert(CallerEdge->AllocTypes != (uint8_t)AllocationType::None);
4153 assert(
Node->AllocTypes != (uint8_t)AllocationType::None);
4154 if (!CallerEdge->IsBackedge &&
4155 allocTypeToUse(CallerAllocTypeForAlloc) ==
4156 allocTypeToUse(
Node->AllocTypes) &&
4157 allocTypesMatch<DerivedCCG, FuncTy, CallTy>(
4158 CalleeEdgeAllocTypesForCallerEdge,
Node->CalleeEdges)) {
4162 if (CallerEdge->IsBackedge) {
4166 DeferredBackedges++;
4179 if (CallerEdge->IsBackedge && !CallerEdge->Caller->CloneOf &&
4180 !Visited.
count(CallerEdge->Caller)) {
4181 const auto OrigIdCount = CallerEdge->getContextIds().size();
4184 identifyClones(CallerEdge->Caller, Visited, CallerEdgeContextsForAlloc);
4185 removeNoneTypeCalleeEdges(CallerEdge->Caller);
4189 bool UpdatedEdge =
false;
4190 if (OrigIdCount > CallerEdge->getContextIds().size()) {
4191 for (
auto E :
Node->CallerEdges) {
4193 if (
E->Caller->CloneOf != CallerEdge->Caller)
4197 auto CallerEdgeContextsForAllocNew =
4199 if (CallerEdgeContextsForAllocNew.empty())
4209 CallerEdgeContextsForAlloc.swap(CallerEdgeContextsForAllocNew);
4219 if (CallerEdge->isRemoved())
4229 CallerEdgeContextsForAlloc, CallerEdge->getContextIds());
4230 if (CallerEdgeContextsForAlloc.empty())
4235 CallerAllocTypeForAlloc = computeAllocType(CallerEdgeContextsForAlloc);
4236 CalleeEdgeAllocTypesForCallerEdge.clear();
4237 for (
auto &CalleeEdge :
Node->CalleeEdges) {
4238 CalleeEdgeAllocTypesForCallerEdge.push_back(intersectAllocTypes(
4239 CalleeEdge->getContextIds(), CallerEdgeContextsForAlloc));
4245 ContextNode *Clone =
nullptr;
4246 for (
auto *CurClone :
Node->Clones) {
4247 if (allocTypeToUse(CurClone->AllocTypes) !=
4248 allocTypeToUse(CallerAllocTypeForAlloc))
4255 assert(!BothSingleAlloc ||
4256 CurClone->AllocTypes == CallerAllocTypeForAlloc);
4262 if (BothSingleAlloc || allocTypesMatchClone<DerivedCCG, FuncTy, CallTy>(
4263 CalleeEdgeAllocTypesForCallerEdge, CurClone)) {
4271 moveEdgeToExistingCalleeClone(CallerEdge, Clone,
false,
4272 CallerEdgeContextsForAlloc);
4274 Clone = moveEdgeToNewCalleeClone(CallerEdge, CallerEdgeContextsForAlloc);
4277 assert(Clone->AllocTypes != (uint8_t)AllocationType::None);
4284 assert(
Node->AllocTypes != (uint8_t)AllocationType::None);
4290void ModuleCallsiteContextGraph::updateAllocationCall(
4295 "memprof", AllocTypeString);
4298 .emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofAttribute",
Call.call())
4299 <<
ore::NV(
"AllocationCall",
Call.call()) <<
" in clone "
4301 <<
" marked with memprof allocation attribute "
4302 <<
ore::NV(
"Attribute", AllocTypeString));
4305void IndexCallsiteContextGraph::updateAllocationCall(CallInfo &
Call,
4309 assert(AI->Versions.size() >
Call.cloneNo());
4314ModuleCallsiteContextGraph::getAllocationCallType(
const CallInfo &
Call)
const {
4316 if (!CB->getAttributes().hasFnAttr(
"memprof"))
4317 return AllocationType::None;
4318 return CB->getAttributes().getFnAttr(
"memprof").getValueAsString() ==
"cold"
4319 ? AllocationType::Cold
4320 : AllocationType::NotCold;
4324IndexCallsiteContextGraph::getAllocationCallType(
const CallInfo &
Call)
const {
4326 assert(AI->Versions.size() >
Call.cloneNo());
4330void ModuleCallsiteContextGraph::updateCall(CallInfo &CallerCall,
4331 FuncInfo CalleeFunc) {
4332 auto *CurF = getCalleeFunc(CallerCall.call());
4333 auto NewCalleeCloneNo = CalleeFunc.cloneNo();
4340 if (CurCalleeCloneNo != NewCalleeCloneNo) {
4342 << CurCalleeCloneNo <<
" now " << NewCalleeCloneNo
4344 MismatchedCloneAssignments++;
4347 if (NewCalleeCloneNo > 0)
4348 cast<CallBase>(CallerCall.call())->setCalledFunction(CalleeFunc.func());
4349 OREGetter(CallerCall.call()->getFunction())
4350 .emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofCall", CallerCall.call())
4351 <<
ore::NV(
"Call", CallerCall.call()) <<
" in clone "
4352 <<
ore::NV(
"Caller", CallerCall.call()->getFunction())
4353 <<
" assigned to call function clone "
4354 <<
ore::NV(
"Callee", CalleeFunc.func()));
4357void IndexCallsiteContextGraph::updateCall(CallInfo &CallerCall,
4358 FuncInfo CalleeFunc) {
4361 "Caller cannot be an allocation which should not have profiled calls");
4362 assert(CI->Clones.size() > CallerCall.cloneNo());
4363 auto NewCalleeCloneNo = CalleeFunc.cloneNo();
4364 auto &CurCalleeCloneNo = CI->Clones[CallerCall.cloneNo()];
4369 if (CurCalleeCloneNo != 0 && CurCalleeCloneNo != NewCalleeCloneNo) {
4371 << CurCalleeCloneNo <<
" now " << NewCalleeCloneNo
4373 MismatchedCloneAssignments++;
4375 CurCalleeCloneNo = NewCalleeCloneNo;
4387 SP->replaceLinkageName(MDName);
4391 TempDISubprogram NewDecl = Decl->
clone();
4392 NewDecl->replaceLinkageName(MDName);
4396CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
4398ModuleCallsiteContextGraph::cloneFunctionForCallsite(
4399 FuncInfo &Func, CallInfo &
Call, DenseMap<CallInfo, CallInfo> &CallMap,
4400 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
4405 assert(!
Func.func()->getParent()->getFunction(Name));
4406 NewFunc->setName(Name);
4408 for (
auto &Inst : CallsWithMetadataInFunc) {
4410 assert(Inst.cloneNo() == 0);
4413 OREGetter(
Func.func())
4414 .emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofClone",
Func.func())
4415 <<
"created clone " <<
ore::NV(
"NewFunction", NewFunc));
4416 return {NewFunc, CloneNo};
4419CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
4420 IndexCall>::FuncInfo
4421IndexCallsiteContextGraph::cloneFunctionForCallsite(
4422 FuncInfo &Func, CallInfo &
Call, DenseMap<CallInfo, CallInfo> &CallMap,
4423 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
4437 for (
auto &Inst : CallsWithMetadataInFunc) {
4439 assert(Inst.cloneNo() == 0);
4441 assert(AI->Versions.size() == CloneNo);
4444 AI->Versions.push_back(0);
4447 assert(CI && CI->Clones.size() == CloneNo);
4450 CI->Clones.push_back(0);
4452 CallMap[Inst] = {Inst.call(), CloneNo};
4454 return {
Func.func(), CloneNo};
4471template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4472void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::mergeClones() {
4478 DenseMap<uint32_t, ContextNode *> ContextIdToAllocationNode;
4479 for (
auto &Entry : AllocationCallToContextNodeMap) {
4481 for (
auto Id :
Node->getContextIds())
4482 ContextIdToAllocationNode[
Id] =
Node->getOrigNode();
4483 for (
auto *Clone :
Node->Clones) {
4484 for (
auto Id : Clone->getContextIds())
4485 ContextIdToAllocationNode[
Id] = Clone->getOrigNode();
4492 DenseSet<const ContextNode *> Visited;
4493 for (
auto &Entry : AllocationCallToContextNodeMap) {
4496 mergeClones(Node, Visited, ContextIdToAllocationNode);
4502 auto Clones =
Node->Clones;
4503 for (
auto *Clone : Clones)
4504 mergeClones(Clone, Visited, ContextIdToAllocationNode);
4508 dbgs() <<
"CCG after merging:\n";
4512 exportToDot(
"aftermerge");
4520template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4521void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::mergeClones(
4522 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
4523 DenseMap<uint32_t, ContextNode *> &ContextIdToAllocationNode) {
4533 bool FoundUnvisited =
true;
4535 while (FoundUnvisited) {
4537 FoundUnvisited =
false;
4540 auto CallerEdges =
Node->CallerEdges;
4541 for (
auto CallerEdge : CallerEdges) {
4543 if (CallerEdge->Callee != Node)
4548 FoundUnvisited =
true;
4549 mergeClones(CallerEdge->Caller, Visited, ContextIdToAllocationNode);
4553 TotalMergeInvokes++;
4554 TotalMergeIters += Iters;
4555 if (Iters > MaxMergeIters)
4556 MaxMergeIters = Iters;
4559 mergeNodeCalleeClones(Node, Visited, ContextIdToAllocationNode);
4562template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4563void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::mergeNodeCalleeClones(
4564 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
4565 DenseMap<uint32_t, ContextNode *> &ContextIdToAllocationNode) {
4567 if (
Node->emptyContextIds())
4572 MapVector<ContextNode *, std::vector<std::shared_ptr<ContextEdge>>>
4573 OrigNodeToCloneEdges;
4574 for (
const auto &
E :
Node->CalleeEdges) {
4579 OrigNodeToCloneEdges[
Base].push_back(
E);
4585 auto CalleeCallerEdgeLessThan = [](
const std::shared_ptr<ContextEdge> &
A,
4586 const std::shared_ptr<ContextEdge> &
B) {
4587 if (
A->Callee->CallerEdges.size() !=
B->Callee->CallerEdges.size())
4588 return A->Callee->CallerEdges.size() <
B->Callee->CallerEdges.size();
4589 if (
A->Callee->CloneOf && !
B->Callee->CloneOf)
4591 else if (!
A->Callee->CloneOf &&
B->Callee->CloneOf)
4595 return *
A->ContextIds.begin() < *
B->ContextIds.begin();
4600 for (
auto Entry : OrigNodeToCloneEdges) {
4603 auto &CalleeEdges =
Entry.second;
4604 auto NumCalleeClones = CalleeEdges.size();
4606 if (NumCalleeClones == 1)
4617 DenseSet<ContextNode *> OtherCallersToShareMerge;
4618 findOtherCallersToShareMerge(Node, CalleeEdges, ContextIdToAllocationNode,
4619 OtherCallersToShareMerge);
4624 ContextNode *MergeNode =
nullptr;
4625 DenseMap<ContextNode *, unsigned> CallerToMoveCount;
4626 for (
auto CalleeEdge : CalleeEdges) {
4627 auto *OrigCallee = CalleeEdge->Callee;
4633 if (CalleeEdge->Callee->CallerEdges.size() == 1) {
4634 MergeNode = OrigCallee;
4635 NonNewMergedNodes++;
4642 if (!OtherCallersToShareMerge.
empty()) {
4643 bool MoveAllCallerEdges =
true;
4644 for (
auto CalleeCallerE : OrigCallee->CallerEdges) {
4645 if (CalleeCallerE == CalleeEdge)
4647 if (!OtherCallersToShareMerge.
contains(CalleeCallerE->Caller)) {
4648 MoveAllCallerEdges =
false;
4654 if (MoveAllCallerEdges) {
4655 MergeNode = OrigCallee;
4656 NonNewMergedNodes++;
4663 assert(MergeNode != OrigCallee);
4664 moveEdgeToExistingCalleeClone(CalleeEdge, MergeNode,
4667 MergeNode = moveEdgeToNewCalleeClone(CalleeEdge);
4672 if (!OtherCallersToShareMerge.
empty()) {
4676 auto OrigCalleeCallerEdges = OrigCallee->CallerEdges;
4677 for (
auto &CalleeCallerE : OrigCalleeCallerEdges) {
4678 if (CalleeCallerE == CalleeEdge)
4680 if (!OtherCallersToShareMerge.
contains(CalleeCallerE->Caller))
4682 CallerToMoveCount[CalleeCallerE->Caller]++;
4683 moveEdgeToExistingCalleeClone(CalleeCallerE, MergeNode,
4687 removeNoneTypeCalleeEdges(OrigCallee);
4688 removeNoneTypeCalleeEdges(MergeNode);
4706template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4707void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
4708 findOtherCallersToShareMerge(
4710 std::vector<std::shared_ptr<ContextEdge>> &CalleeEdges,
4711 DenseMap<uint32_t, ContextNode *> &ContextIdToAllocationNode,
4712 DenseSet<ContextNode *> &OtherCallersToShareMerge) {
4713 auto NumCalleeClones = CalleeEdges.size();
4716 DenseMap<ContextNode *, unsigned> OtherCallersToSharedCalleeEdgeCount;
4719 unsigned PossibleOtherCallerNodes = 0;
4723 if (CalleeEdges[0]->
Callee->CallerEdges.size() < 2)
4729 DenseMap<ContextEdge *, DenseSet<ContextNode *>> CalleeEdgeToAllocNodes;
4730 for (
auto CalleeEdge : CalleeEdges) {
4731 assert(CalleeEdge->Callee->CallerEdges.size() > 1);
4734 for (
auto CalleeCallerEdges : CalleeEdge->Callee->CallerEdges) {
4735 if (CalleeCallerEdges->Caller == Node) {
4736 assert(CalleeCallerEdges == CalleeEdge);
4739 OtherCallersToSharedCalleeEdgeCount[CalleeCallerEdges->Caller]++;
4742 if (OtherCallersToSharedCalleeEdgeCount[CalleeCallerEdges->Caller] ==
4744 PossibleOtherCallerNodes++;
4748 for (
auto Id : CalleeEdge->getContextIds()) {
4749 auto *
Alloc = ContextIdToAllocationNode.
lookup(Id);
4753 MissingAllocForContextId++;
4756 CalleeEdgeToAllocNodes[CalleeEdge.get()].
insert(
Alloc);
4763 for (
auto CalleeEdge : CalleeEdges) {
4764 assert(CalleeEdge->Callee->CallerEdges.size() > 1);
4766 if (!PossibleOtherCallerNodes)
4768 auto &CurCalleeAllocNodes = CalleeEdgeToAllocNodes[CalleeEdge.get()];
4770 for (
auto &CalleeCallerE : CalleeEdge->Callee->CallerEdges) {
4772 if (CalleeCallerE == CalleeEdge)
4776 if (OtherCallersToSharedCalleeEdgeCount[CalleeCallerE->Caller] !=
4781 for (
auto Id : CalleeCallerE->getContextIds()) {
4782 auto *
Alloc = ContextIdToAllocationNode.
lookup(Id);
4787 if (!CurCalleeAllocNodes.contains(
Alloc)) {
4788 OtherCallersToSharedCalleeEdgeCount[CalleeCallerE->Caller] = 0;
4789 PossibleOtherCallerNodes--;
4796 if (!PossibleOtherCallerNodes)
4801 for (
auto &[OtherCaller,
Count] : OtherCallersToSharedCalleeEdgeCount) {
4802 if (
Count != NumCalleeClones)
4804 OtherCallersToShareMerge.
insert(OtherCaller);
4849template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4850bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
4857 DenseMap<ContextNode *, FuncInfo> CallsiteToCalleeFuncCloneMap;
4861 auto RecordCalleeFuncOfCallsite = [&](ContextNode *
Caller,
4862 const FuncInfo &CalleeFunc) {
4864 CallsiteToCalleeFuncCloneMap[
Caller] = CalleeFunc;
4868 struct FuncCloneInfo {
4873 DenseMap<CallInfo, CallInfo> CallMap;
4901 DenseMap<const ContextNode *, std::map<unsigned, SmallVector<CallInfo, 0>>>
4902 UnassignedCallClones;
4906 for (
auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) {
4907 FuncInfo OrigFunc(Func);
4912 std::vector<FuncCloneInfo> FuncCloneInfos;
4913 for (
auto &
Call : CallsWithMetadata) {
4914 ContextNode *
Node = getNodeForInst(
Call);
4918 if (!Node ||
Node->Clones.empty())
4921 "Not having a call should have prevented cloning");
4925 std::map<FuncInfo, ContextNode *> FuncCloneToCurNodeCloneMap;
4929 auto AssignCallsiteCloneToFuncClone = [&](
const FuncInfo &FuncClone,
4931 ContextNode *CallsiteClone,
4934 FuncCloneToCurNodeCloneMap[FuncClone] = CallsiteClone;
4936 assert(FuncCloneInfos.size() > FuncClone.cloneNo());
4937 DenseMap<CallInfo, CallInfo> &CallMap =
4938 FuncCloneInfos[FuncClone.cloneNo()].CallMap;
4939 CallInfo CallClone(
Call);
4940 if (
auto It = CallMap.
find(
Call); It != CallMap.
end())
4941 CallClone = It->second;
4942 CallsiteClone->setCall(CallClone);
4944 for (
auto &MatchingCall :
Node->MatchingCalls) {
4945 CallInfo CallClone(MatchingCall);
4946 if (
auto It = CallMap.
find(MatchingCall); It != CallMap.
end())
4947 CallClone = It->second;
4949 MatchingCall = CallClone;
4957 auto MoveEdgeToNewCalleeCloneAndSetUp =
4958 [&](
const std::shared_ptr<ContextEdge> &
Edge) {
4959 ContextNode *OrigCallee =
Edge->Callee;
4960 ContextNode *NewClone = moveEdgeToNewCalleeClone(
Edge);
4961 removeNoneTypeCalleeEdges(NewClone);
4962 assert(NewClone->AllocTypes != (uint8_t)AllocationType::None);
4966 if (CallsiteToCalleeFuncCloneMap.
count(OrigCallee))
4967 RecordCalleeFuncOfCallsite(
4968 NewClone, CallsiteToCalleeFuncCloneMap[OrigCallee]);
4975 std::deque<ContextNode *> ClonesWorklist;
4977 if (!
Node->emptyContextIds())
4978 ClonesWorklist.push_back(Node);
4984 unsigned NodeCloneCount = 0;
4985 while (!ClonesWorklist.empty()) {
4986 ContextNode *Clone = ClonesWorklist.front();
4987 ClonesWorklist.pop_front();
4996 if (FuncCloneInfos.size() < NodeCloneCount) {
4998 if (NodeCloneCount == 1) {
5003 Clone->CallerEdges, [&](
const std::shared_ptr<ContextEdge> &
E) {
5004 return CallsiteToCalleeFuncCloneMap.count(E->Caller);
5008 FuncCloneInfos.push_back(
5009 {OrigFunc, DenseMap<CallInfo, CallInfo>()});
5010 AssignCallsiteCloneToFuncClone(
5011 OrigFunc,
Call, Clone,
5012 AllocationCallToContextNodeMap.count(
Call));
5013 for (
auto &CE : Clone->CallerEdges) {
5015 if (!
CE->Caller->hasCall())
5017 RecordCalleeFuncOfCallsite(
CE->Caller, OrigFunc);
5027 FuncInfo PreviousAssignedFuncClone;
5029 Clone->CallerEdges, [&](
const std::shared_ptr<ContextEdge> &
E) {
5030 return CallsiteToCalleeFuncCloneMap.count(E->Caller);
5032 bool CallerAssignedToCloneOfFunc =
false;
5033 if (EI != Clone->CallerEdges.end()) {
5034 const std::shared_ptr<ContextEdge> &
Edge = *EI;
5035 PreviousAssignedFuncClone =
5036 CallsiteToCalleeFuncCloneMap[
Edge->Caller];
5037 CallerAssignedToCloneOfFunc =
true;
5042 DenseMap<CallInfo, CallInfo> NewCallMap;
5043 unsigned CloneNo = FuncCloneInfos.size();
5044 assert(CloneNo > 0 &&
"Clone 0 is the original function, which "
5045 "should already exist in the map");
5046 FuncInfo NewFuncClone = cloneFunctionForCallsite(
5047 OrigFunc,
Call, NewCallMap, CallsWithMetadata, CloneNo);
5048 FuncCloneInfos.push_back({NewFuncClone, std::move(NewCallMap)});
5049 FunctionClonesAnalysis++;
5055 if (!CallerAssignedToCloneOfFunc) {
5056 AssignCallsiteCloneToFuncClone(
5057 NewFuncClone,
Call, Clone,
5058 AllocationCallToContextNodeMap.count(
Call));
5059 for (
auto &CE : Clone->CallerEdges) {
5061 if (!
CE->Caller->hasCall())
5063 RecordCalleeFuncOfCallsite(
CE->Caller, NewFuncClone);
5075 auto CallerEdges = Clone->CallerEdges;
5076 for (
auto CE : CallerEdges) {
5078 if (
CE->isRemoved()) {
5084 if (!
CE->Caller->hasCall())
5087 if (!CallsiteToCalleeFuncCloneMap.
count(
CE->Caller) ||
5091 CallsiteToCalleeFuncCloneMap[
CE->Caller] !=
5092 PreviousAssignedFuncClone)
5095 RecordCalleeFuncOfCallsite(
CE->Caller, NewFuncClone);
5108 auto CalleeEdges =
CE->Caller->CalleeEdges;
5109 for (
auto CalleeEdge : CalleeEdges) {
5112 if (CalleeEdge->isRemoved()) {
5117 ContextNode *
Callee = CalleeEdge->Callee;
5121 if (Callee == Clone || !
Callee->hasCall())
5126 if (Callee == CalleeEdge->Caller)
5128 ContextNode *NewClone =
5129 MoveEdgeToNewCalleeCloneAndSetUp(CalleeEdge);
5132 removeNoneTypeCalleeEdges(Callee);
5140 CallInfo OrigCall(
Callee->getOrigNode()->Call);
5141 OrigCall.setCloneNo(0);
5142 DenseMap<CallInfo, CallInfo> &CallMap =
5143 FuncCloneInfos[NewFuncClone.cloneNo()].CallMap;
5145 CallInfo NewCall(CallMap[OrigCall]);
5147 NewClone->setCall(NewCall);
5149 for (
auto &MatchingCall : NewClone->MatchingCalls) {
5150 CallInfo OrigMatchingCall(MatchingCall);
5151 OrigMatchingCall.setCloneNo(0);
5153 CallInfo NewCall(CallMap[OrigMatchingCall]);
5156 MatchingCall = NewCall;
5165 auto FindFirstAvailFuncClone = [&]() {
5170 for (
auto &CF : FuncCloneInfos) {
5171 if (!FuncCloneToCurNodeCloneMap.count(CF.FuncClone))
5172 return CF.FuncClone;
5175 "Expected an available func clone for this callsite clone");
5192 std::map<FuncInfo, ContextNode *> FuncCloneToNewCallsiteCloneMap;
5193 FuncInfo FuncCloneAssignedToCurCallsiteClone;
5197 auto CloneCallerEdges = Clone->CallerEdges;
5198 for (
auto &
Edge : CloneCallerEdges) {
5202 if (
Edge->isRemoved())
5205 if (!
Edge->Caller->hasCall())
5209 if (CallsiteToCalleeFuncCloneMap.
count(
Edge->Caller)) {
5210 FuncInfo FuncCloneCalledByCaller =
5211 CallsiteToCalleeFuncCloneMap[
Edge->Caller];
5221 if ((FuncCloneToCurNodeCloneMap.count(FuncCloneCalledByCaller) &&
5222 FuncCloneToCurNodeCloneMap[FuncCloneCalledByCaller] !=
5230 (FuncCloneAssignedToCurCallsiteClone &&
5231 FuncCloneAssignedToCurCallsiteClone !=
5232 FuncCloneCalledByCaller)) {
5247 if (FuncCloneToNewCallsiteCloneMap.count(
5248 FuncCloneCalledByCaller)) {
5249 ContextNode *NewClone =
5250 FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller];
5251 moveEdgeToExistingCalleeClone(
Edge, NewClone);
5253 removeNoneTypeCalleeEdges(NewClone);
5256 ContextNode *NewClone = MoveEdgeToNewCalleeCloneAndSetUp(
Edge);
5257 FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller] =
5260 ClonesWorklist.push_back(NewClone);
5264 removeNoneTypeCalleeEdges(Clone);
5272 if (!FuncCloneAssignedToCurCallsiteClone) {
5273 FuncCloneAssignedToCurCallsiteClone = FuncCloneCalledByCaller;
5275 AssignCallsiteCloneToFuncClone(
5276 FuncCloneCalledByCaller,
Call, Clone,
5277 AllocationCallToContextNodeMap.count(
Call));
5281 assert(FuncCloneAssignedToCurCallsiteClone ==
5282 FuncCloneCalledByCaller);
5291 if (!FuncCloneAssignedToCurCallsiteClone) {
5292 FuncCloneAssignedToCurCallsiteClone = FindFirstAvailFuncClone();
5293 assert(FuncCloneAssignedToCurCallsiteClone);
5295 AssignCallsiteCloneToFuncClone(
5296 FuncCloneAssignedToCurCallsiteClone,
Call, Clone,
5297 AllocationCallToContextNodeMap.count(
Call));
5299 assert(FuncCloneToCurNodeCloneMap
5300 [FuncCloneAssignedToCurCallsiteClone] == Clone);
5302 RecordCalleeFuncOfCallsite(
Edge->Caller,
5303 FuncCloneAssignedToCurCallsiteClone);
5323 if (!FuncCloneAssignedToCurCallsiteClone) {
5324 FuncCloneAssignedToCurCallsiteClone = FindFirstAvailFuncClone();
5325 assert(FuncCloneAssignedToCurCallsiteClone &&
5326 "No available func clone for this callsite clone");
5327 AssignCallsiteCloneToFuncClone(
5328 FuncCloneAssignedToCurCallsiteClone,
Call, Clone,
5329 AllocationCallToContextNodeMap.contains(
Call));
5334 for (
const auto &PE :
Node->CalleeEdges)
5336 for (
const auto &CE :
Node->CallerEdges)
5338 for (
auto *Clone :
Node->Clones) {
5340 for (
const auto &PE : Clone->CalleeEdges)
5342 for (
const auto &CE : Clone->CallerEdges)
5348 if (FuncCloneInfos.size() < 2)
5354 for (
auto &
Call : CallsWithMetadata) {
5355 ContextNode *
Node = getNodeForInst(
Call);
5356 if (!Node || !
Node->hasCall() ||
Node->emptyContextIds())
5362 if (
Node->Clones.size() + 1 >= FuncCloneInfos.size())
5366 DenseSet<unsigned> NodeCallClones;
5367 for (
auto *
C :
Node->Clones)
5368 NodeCallClones.
insert(
C->Call.cloneNo());
5371 for (
auto &FC : FuncCloneInfos) {
5376 if (++
I == 1 || NodeCallClones.
contains(
I)) {
5381 auto &CallVector = UnassignedCallClones[
Node][
I];
5382 DenseMap<CallInfo, CallInfo> &CallMap =
FC.CallMap;
5383 if (
auto It = CallMap.
find(
Call); It != CallMap.
end()) {
5384 CallInfo CallClone = It->second;
5385 CallVector.push_back(CallClone);
5389 assert(
false &&
"Expected to find call in CallMap");
5392 for (
auto &MatchingCall :
Node->MatchingCalls) {
5393 if (
auto It = CallMap.
find(MatchingCall); It != CallMap.
end()) {
5394 CallInfo CallClone = It->second;
5395 CallVector.push_back(CallClone);
5399 assert(
false &&
"Expected to find call in CallMap");
5407 (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
5409 auto UpdateCalls = [&](ContextNode *
Node,
5410 DenseSet<const ContextNode *> &Visited,
5411 auto &&UpdateCalls) {
5412 auto Inserted = Visited.insert(Node);
5416 for (
auto *Clone :
Node->Clones)
5417 UpdateCalls(Clone, Visited, UpdateCalls);
5419 for (
auto &
Edge :
Node->CallerEdges)
5420 UpdateCalls(
Edge->Caller, Visited, UpdateCalls);
5424 if (!
Node->hasCall() ||
Node->emptyContextIds())
5427 if (
Node->IsAllocation) {
5428 auto AT = allocTypeToUse(
Node->AllocTypes);
5434 !ContextIdToContextSizeInfos.empty()) {
5435 uint64_t TotalCold = 0;
5437 for (
auto Id :
Node->getContextIds()) {
5438 auto TypeI = ContextIdToAllocationType.find(Id);
5439 assert(TypeI != ContextIdToAllocationType.end());
5440 auto CSI = ContextIdToContextSizeInfos.find(Id);
5441 if (CSI != ContextIdToContextSizeInfos.end()) {
5442 for (
auto &Info : CSI->second) {
5444 if (TypeI->second == AllocationType::Cold)
5445 TotalCold +=
Info.TotalSize;
5450 AT = AllocationType::Cold;
5452 updateAllocationCall(
Node->Call, AT);
5457 if (!CallsiteToCalleeFuncCloneMap.
count(Node))
5460 auto CalleeFunc = CallsiteToCalleeFuncCloneMap[
Node];
5461 updateCall(
Node->Call, CalleeFunc);
5463 for (
auto &
Call :
Node->MatchingCalls)
5464 updateCall(
Call, CalleeFunc);
5468 if (!UnassignedCallClones.
contains(Node))
5470 DenseSet<unsigned> NodeCallClones;
5471 for (
auto *
C :
Node->Clones)
5472 NodeCallClones.
insert(
C->Call.cloneNo());
5474 auto &ClonedCalls = UnassignedCallClones[
Node];
5475 for (
auto &[CloneNo, CallVector] : ClonedCalls) {
5479 if (NodeCallClones.
contains(CloneNo))
5482 for (
auto &
Call : CallVector)
5483 updateCall(
Call, CalleeFunc);
5492 DenseSet<const ContextNode *> Visited;
5493 for (
auto &Entry : AllocationCallToContextNodeMap)
5494 UpdateCalls(
Entry.second, Visited, UpdateCalls);
5505 for (
auto &SN : FS->callsites()) {
5510 SN.Clones.size() >
I &&
5511 "Callsite summary has fewer entries than other summaries in function");
5512 if (SN.Clones.size() <=
I || !SN.Clones[
I])
5519 for (
auto &AN : FS->allocs()) {
5523 assert(AN.Versions.size() >
I &&
5524 "Alloc summary has fewer entries than other summaries in function");
5525 if (AN.Versions.size() <=
I ||
5542 NewGV->takeName(DeclGV);
5549 auto CloneFuncAliases = [&](
Function *NewF,
unsigned I) {
5550 if (!FuncToAliasMap.count(&
F))
5552 for (
auto *
A : FuncToAliasMap[&
F]) {
5554 auto *PrevA = M.getNamedAlias(AliasName);
5556 A->getType()->getPointerAddressSpace(),
5557 A->getLinkage(), AliasName, NewF);
5558 NewA->copyAttributesFrom(
A);
5560 TakeDeclNameAndReplace(PrevA, NewA);
5569 FunctionsClonedThinBackend++;
5586 for (
unsigned I = 1;
I < NumClones;
I++) {
5587 VMaps.
emplace_back(std::make_unique<ValueToValueMapTy>());
5594 FunctionCloneDuplicatesThinBackend++;
5595 auto *Func = HashToFunc[Hash];
5596 if (Func->hasAvailableExternallyLinkage()) {
5602 auto Decl = M.getOrInsertFunction(Name, Func->getFunctionType());
5604 <<
"created clone decl " <<
ore::NV(
"Decl", Decl.getCallee()));
5607 auto *PrevF = M.getFunction(Name);
5610 TakeDeclNameAndReplace(PrevF, Alias);
5612 <<
"created clone alias " <<
ore::NV(
"Alias", Alias));
5615 CloneFuncAliases(Func,
I);
5619 HashToFunc[Hash] = NewF;
5620 FunctionClonesThinBackend++;
5623 for (
auto &BB : *NewF) {
5624 for (
auto &Inst : BB) {
5625 Inst.setMetadata(LLVMContext::MD_memprof,
nullptr);
5626 Inst.setMetadata(LLVMContext::MD_callsite,
nullptr);
5631 TakeDeclNameAndReplace(PrevF, NewF);
5633 NewF->setName(Name);
5636 <<
"created clone " <<
ore::NV(
"NewFunction", NewF));
5639 CloneFuncAliases(NewF,
I);
5648 const Function *CallingFunc =
nullptr) {
5667 auto SrcFileMD =
F.getMetadata(
"thinlto_src_file");
5673 if (!SrcFileMD &&
F.isDeclaration()) {
5677 SrcFileMD = CallingFunc->getMetadata(
"thinlto_src_file");
5682 assert(SrcFileMD || OrigName ==
F.getName());
5684 StringRef SrcFile = M.getSourceFileName();
5696 if (!TheFnVI && OrigName ==
F.getName() &&
F.hasLocalLinkage() &&
5697 F.getName().contains(
'.')) {
5698 OrigName =
F.getName().rsplit(
'.').first;
5707 assert(TheFnVI ||
F.isDeclaration());
5711bool MemProfContextDisambiguation::initializeIndirectCallPromotionInfo(
5713 ICallAnalysis = std::make_unique<ICallPromotionAnalysis>();
5714 Symtab = std::make_unique<InstrProfSymtab>();
5725 if (
Error E = Symtab->create(M,
true,
false)) {
5726 std::string SymtabFailure =
toString(std::move(
E));
5727 M.getContext().emitError(
"Failed to create symtab: " + SymtabFailure);
5740 auto MIBIter = AllocNode.
MIBs.begin();
5741 for (
auto &MDOp : MemProfMD->
operands()) {
5743 auto StackIdIndexIter = MIBIter->StackIdIndices.begin();
5748 auto ContextIterBegin =
5752 (ContextIterBegin != StackContext.
end() && *ContextIterBegin == 0) ? 1
5754 for (
auto ContextIter = ContextIterBegin; ContextIter != StackContext.
end();
5759 if (LastStackContextId == *ContextIter)
5761 LastStackContextId = *ContextIter;
5762 assert(StackIdIndexIter != MIBIter->StackIdIndices.end());
5772bool MemProfContextDisambiguation::applyImport(
Module &M) {
5779 std::map<const Function *, SmallPtrSet<const GlobalAlias *, 1>>
5781 for (
auto &
A :
M.aliases()) {
5782 auto *Aliasee =
A.getAliaseeObject();
5784 FuncToAliasMap[
F].insert(&
A);
5787 if (!initializeIndirectCallPromotionInfo(M))
5794 OptimizationRemarkEmitter ORE(&
F);
5797 bool ClonesCreated =
false;
5798 unsigned NumClonesCreated = 0;
5799 auto CloneFuncIfNeeded = [&](
unsigned NumClones, FunctionSummary *
FS) {
5809 if (ClonesCreated) {
5810 assert(NumClonesCreated == NumClones);
5817 ClonesCreated =
true;
5818 NumClonesCreated = NumClones;
5821 auto CloneCallsite = [&](
const CallsiteInfo &StackNode, CallBase *CB,
5822 Function *CalledFunction, FunctionSummary *
FS) {
5824 CloneFuncIfNeeded(StackNode.
Clones.
size(), FS);
5836 if (CalledFunction != CB->getCalledOperand() &&
5837 (!GA || CalledFunction != GA->getAliaseeObject())) {
5838 SkippedCallsCloning++;
5844 auto CalleeOrigName = CalledFunction->getName();
5845 for (
unsigned J = 0; J < StackNode.
Clones.
size(); J++) {
5848 if (J > 0 && VMaps[J - 1]->
empty())
5852 if (!StackNode.
Clones[J])
5854 auto NewF =
M.getOrInsertFunction(
5856 CalledFunction->getFunctionType());
5870 ORE.emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofCall", CBClone)
5871 <<
ore::NV(
"Call", CBClone) <<
" in clone "
5873 <<
" assigned to call function clone "
5874 <<
ore::NV(
"Callee", NewF.getCallee()));
5888 ImportSummary->findSummaryInModule(TheFnVI,
M.getModuleIdentifier());
5892 auto SrcModuleMD =
F.getMetadata(
"thinlto_src_module");
5894 "enable-import-metadata is needed to emit thinlto_src_module");
5895 StringRef SrcModule =
5898 if (GVS->modulePath() == SrcModule) {
5899 GVSummary = GVS.get();
5924 if (
FS->allocs().empty() &&
FS->callsites().empty())
5927 auto SI =
FS->callsites().begin();
5928 auto AI =
FS->allocs().begin();
5933 DenseMap<ValueInfo, CallsiteInfo> MapTailCallCalleeVIToCallsite;
5936 for (
auto CallsiteIt =
FS->callsites().rbegin();
5937 CallsiteIt !=
FS->callsites().rend(); CallsiteIt++) {
5938 auto &Callsite = *CallsiteIt;
5942 if (!Callsite.StackIdIndices.empty())
5944 MapTailCallCalleeVIToCallsite.
insert({Callsite.Callee, Callsite});
5953 for (
auto &BB :
F) {
5954 for (
auto &
I : BB) {
5960 auto *CalledValue = CB->getCalledOperand();
5961 auto *CalledFunction = CB->getCalledFunction();
5962 if (CalledValue && !CalledFunction) {
5963 CalledValue = CalledValue->stripPointerCasts();
5970 assert(!CalledFunction &&
5971 "Expected null called function in callsite for alias");
5975 CallStack<MDNode, MDNode::op_iterator> CallsiteContext(
5976 I.getMetadata(LLVMContext::MD_callsite));
5977 auto *MemProfMD =
I.getMetadata(LLVMContext::MD_memprof);
5983 if (CB->getAttributes().hasFnAttr(
"memprof") && !MemProfMD) {
5984 CB->getAttributes().getFnAttr(
"memprof").getValueAsString() ==
"cold"
5985 ? AllocTypeColdThinBackend++
5986 : AllocTypeNotColdThinBackend++;
5987 OrigAllocsThinBackend++;
5988 AllocVersionsThinBackend++;
5989 if (!MaxAllocVersionsThinBackend)
5990 MaxAllocVersionsThinBackend = 1;
5997 auto &AllocNode = *(AI++);
6005 CloneFuncIfNeeded(AllocNode.Versions.size(), FS);
6007 OrigAllocsThinBackend++;
6008 AllocVersionsThinBackend += AllocNode.Versions.size();
6009 if (MaxAllocVersionsThinBackend < AllocNode.Versions.size())
6010 MaxAllocVersionsThinBackend = AllocNode.Versions.size();
6020 if (AllocNode.Versions.size() == 1 &&
6023 AllocationType::NotCold ||
6025 AllocationType::None);
6026 UnclonableAllocsThinBackend++;
6032 return Type == ((uint8_t)AllocationType::NotCold |
6033 (uint8_t)AllocationType::Cold);
6037 for (
unsigned J = 0; J < AllocNode.Versions.size(); J++) {
6040 if (J > 0 && VMaps[J - 1]->
empty())
6043 if (AllocNode.Versions[J] == (uint8_t)AllocationType::None)
6046 AllocTy == AllocationType::Cold ? AllocTypeColdThinBackend++
6047 : AllocTypeNotColdThinBackend++;
6062 ORE.emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofAttribute", CBClone)
6063 <<
ore::NV(
"AllocationCall", CBClone) <<
" in clone "
6065 <<
" marked with memprof allocation attribute "
6066 <<
ore::NV(
"Attribute", AllocTypeString));
6068 }
else if (!CallsiteContext.empty()) {
6069 if (!CalledFunction) {
6073 assert(!CI || !CI->isInlineAsm());
6083 recordICPInfo(CB,
FS->callsites(), SI, ICallAnalysisInfo);
6089 CloneFuncIfNeeded(NumClones, FS);
6094 assert(SI !=
FS->callsites().end());
6095 auto &StackNode = *(
SI++);
6101 for (
auto StackId : CallsiteContext) {
6103 assert(ImportSummary->getStackIdAtIndex(*StackIdIndexIter) ==
6109 CloneCallsite(StackNode, CB, CalledFunction, FS);
6111 }
else if (CB->isTailCall() && CalledFunction) {
6114 ValueInfo CalleeVI =
6116 if (CalleeVI && MapTailCallCalleeVIToCallsite.
count(CalleeVI)) {
6117 auto Callsite = MapTailCallCalleeVIToCallsite.
find(CalleeVI);
6118 assert(Callsite != MapTailCallCalleeVIToCallsite.
end());
6119 CloneCallsite(Callsite->second, CB, CalledFunction, FS);
6126 performICP(M,
FS->callsites(), VMaps, ICallAnalysisInfo, ORE);
6136 for (
auto &BB :
F) {
6137 for (
auto &
I : BB) {
6140 I.setMetadata(LLVMContext::MD_memprof,
nullptr);
6141 I.setMetadata(LLVMContext::MD_callsite,
nullptr);
6149unsigned MemProfContextDisambiguation::recordICPInfo(
6154 uint32_t NumCandidates;
6155 uint64_t TotalCount;
6156 auto CandidateProfileData =
6157 ICallAnalysis->getPromotionCandidatesForInstruction(
6159 if (CandidateProfileData.empty())
6165 bool ICPNeeded =
false;
6166 unsigned NumClones = 0;
6167 size_t CallsiteInfoStartIndex = std::distance(AllCallsites.
begin(), SI);
6168 for (
const auto &Candidate : CandidateProfileData) {
6170 auto CalleeValueInfo =
6172 ImportSummary->getValueInfo(Candidate.Value);
6175 assert(!CalleeValueInfo ||
SI->Callee == CalleeValueInfo);
6177 auto &StackNode = *(
SI++);
6182 [](
unsigned CloneNo) { return CloneNo != 0; });
6192 ICallAnalysisInfo.
push_back({CB, CandidateProfileData.vec(), NumCandidates,
6193 TotalCount, CallsiteInfoStartIndex});
6197void MemProfContextDisambiguation::performICP(
6199 ArrayRef<std::unique_ptr<ValueToValueMapTy>> VMaps,
6201 OptimizationRemarkEmitter &ORE) {
6208 for (
auto &Info : ICallAnalysisInfo) {
6211 auto TotalCount =
Info.TotalCount;
6212 unsigned NumClones = 0;
6215 for (
auto &Candidate :
Info.CandidateProfileData) {
6226 Function *TargetFunction = Symtab->getFunction(Candidate.Value);
6227 if (TargetFunction ==
nullptr ||
6235 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnableToFindTarget", CB)
6236 <<
"Memprof cannot promote indirect call: target with md5sum "
6237 <<
ore::NV(
"target md5sum", Candidate.Value) <<
" not found";
6242 RemainingCandidates.
push_back(Candidate);
6247 const char *Reason =
nullptr;
6250 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnableToPromote", CB)
6251 <<
"Memprof cannot promote indirect call to "
6252 <<
ore::NV(
"TargetFunction", TargetFunction)
6253 <<
" with count of " <<
ore::NV(
"TotalCount", TotalCount)
6256 RemainingCandidates.
push_back(Candidate);
6265 CallBase *CBClone = CB;
6266 for (
unsigned J = 0; J < NumClones; J++) {
6269 if (J > 0 && VMaps[J - 1]->
empty())
6279 TotalCount, isSamplePGO, &ORE);
6280 auto *TargetToUse = TargetFunction;
6283 if (StackNode.
Clones[J]) {
6302 <<
ore::NV(
"Call", CBClone) <<
" in clone "
6304 <<
" promoted and assigned to call function clone "
6305 <<
ore::NV(
"Callee", TargetToUse));
6309 TotalCount -= Candidate.Count;
6313 CallBase *CBClone = CB;
6314 for (
unsigned J = 0; J < NumClones; J++) {
6317 if (J > 0 && VMaps[J - 1]->
empty())
6323 CBClone->
setMetadata(LLVMContext::MD_prof,
nullptr);
6326 if (TotalCount != 0)
6328 IPVK_IndirectCallTarget,
Info.NumCandidates);
6333template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
6334bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::process(
6335 function_ref<
void(StringRef, StringRef,
const Twine &)> EmitRemark,
6336 bool AllowExtraAnalysis) {
6338 dbgs() <<
"CCG before cloning:\n";
6342 exportToDot(
"postbuild");
6355 dbgs() <<
"CCG after cloning:\n";
6359 exportToDot(
"cloned");
6361 bool Changed = assignFunctions();
6364 dbgs() <<
"CCG after assigning function clones:\n";
6368 exportToDot(
"clonefuncassign");
6371 printTotalSizes(
errs(), EmitRemark);
6376bool MemProfContextDisambiguation::processModule(
6378 llvm::function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter) {
6383 return applyImport(M);
6396 ModuleCallsiteContextGraph CCG(M, OREGetter);
6399 return CCG.process();
6404 : ImportSummary(Summary), isSamplePGO(isSamplePGO) {
6409 "-memprof-dot-scope=alloc requires -memprof-dot-alloc-id");
6413 "-memprof-dot-scope=context requires -memprof-dot-context-id");
6417 "-memprof-dot-scope=all can't have both -memprof-dot-alloc-id and "
6418 "-memprof-dot-context-id");
6419 if (ImportSummary) {
6429 auto ReadSummaryFile =
6431 if (!ReadSummaryFile) {
6438 if (!ImportSummaryForTestingOrErr) {
6444 ImportSummaryForTesting = std::move(*ImportSummaryForTestingOrErr);
6445 ImportSummary = ImportSummaryForTesting.get();
6454 if (!processModule(M, OREGetter))
6473 bool AllowExtraAnalysis =
6476 IndexCallsiteContextGraph CCG(Index, isPrevailing);
6477 CCG.process(EmitRemark, AllowExtraAnalysis);
6492 for (
auto &BB :
F) {
6493 for (
auto &
I : BB) {
6497 if (CI->hasFnAttr(
"memprof")) {
6498 CI->removeFnAttr(
"memprof");
6501 if (!CI->hasMetadata(LLVMContext::MD_callsite)) {
6502 assert(!CI->hasMetadata(LLVMContext::MD_memprof));
6508 CI->setMetadata(LLVMContext::MD_memprof,
nullptr);
6509 CI->setMetadata(LLVMContext::MD_callsite,
nullptr);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Unify divergent function exit nodes
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
Module.h This file contains the declarations for the Module class.
Machine Check Debug Module
This file implements a map that provides insertion order iteration.
static cl::opt< unsigned > TailCallSearchDepth("memprof-tail-call-search-depth", cl::init(5), cl::Hidden, cl::desc("Max depth to recursively search for missing " "frames through tail calls."))
uint64_t ComputeHash(const FunctionSummary *FS, unsigned I)
static cl::opt< DotScope > DotGraphScope("memprof-dot-scope", cl::desc("Scope of graph to export to dot"), cl::Hidden, cl::init(DotScope::All), cl::values(clEnumValN(DotScope::All, "all", "Export full callsite graph"), clEnumValN(DotScope::Alloc, "alloc", "Export only nodes with contexts feeding given " "-memprof-dot-alloc-id"), clEnumValN(DotScope::Context, "context", "Export only nodes with given -memprof-dot-context-id")))
static cl::opt< bool > DoMergeIteration("memprof-merge-iteration", cl::init(true), cl::Hidden, cl::desc("Iteratively apply merging on a node to catch new callers"))
static bool isMemProfClone(const Function &F)
static cl::opt< unsigned > AllocIdForDot("memprof-dot-alloc-id", cl::init(0), cl::Hidden, cl::desc("Id of alloc to export if -memprof-dot-scope=alloc " "or to highlight if -memprof-dot-scope=all"))
static cl::opt< unsigned > ContextIdForDot("memprof-dot-context-id", cl::init(0), cl::Hidden, cl::desc("Id of context to export if -memprof-dot-scope=context or to " "highlight otherwise"))
static cl::opt< bool > ExportToDot("memprof-export-to-dot", cl::init(false), cl::Hidden, cl::desc("Export graph to dot files."))
static void checkEdge(const std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > &Edge)
static cl::opt< bool > AllowRecursiveCallsites("memprof-allow-recursive-callsites", cl::init(true), cl::Hidden, cl::desc("Allow cloning of callsites involved in recursive cycles"))
bool checkColdOrNotCold(uint8_t AllocType)
static ValueInfo findValueInfoForFunc(const Function &F, const Module &M, const ModuleSummaryIndex *ImportSummary, const Function *CallingFunc=nullptr)
static cl::opt< bool > CloneRecursiveContexts("memprof-clone-recursive-contexts", cl::init(true), cl::Hidden, cl::desc("Allow cloning of contexts through recursive cycles"))
static std::string getAllocTypeString(uint8_t AllocTypes)
bool DOTGraphTraits< constCallsiteContextGraph< DerivedCCG, FuncTy, CallTy > * >::DoHighlight
static unsigned getMemProfCloneNum(const Function &F)
static cl::opt< unsigned > MemProfICPNoInlineThreshold("memprof-icp-noinline-threshold", cl::init(0), cl::Hidden, cl::desc("Minimum absolute count for promoted target to be inlinable"))
static SmallVector< std::unique_ptr< ValueToValueMapTy >, 4 > createFunctionClones(Function &F, unsigned NumClones, Module &M, OptimizationRemarkEmitter &ORE, std::map< const Function *, SmallPtrSet< const GlobalAlias *, 1 > > &FuncToAliasMap, FunctionSummary *FS)
static cl::opt< bool > VerifyCCG("memprof-verify-ccg", cl::init(false), cl::Hidden, cl::desc("Perform verification checks on CallingContextGraph."))
static void checkNode(const ContextNode< DerivedCCG, FuncTy, CallTy > *Node, bool CheckEdges=true)
static cl::opt< bool > MergeClones("memprof-merge-clones", cl::init(true), cl::Hidden, cl::desc("Merge clones before assigning functions"))
static std::string getMemProfFuncName(Twine Base, unsigned CloneNo)
static cl::opt< std::string > MemProfImportSummary("memprof-import-summary", cl::desc("Import summary to use for testing the ThinLTO backend via opt"), cl::Hidden)
static const std::string MemProfCloneSuffix
static void updateSubprogramLinkageName(Function *NewFunc, StringRef Name)
static cl::opt< bool > AllowRecursiveContexts("memprof-allow-recursive-contexts", cl::init(true), cl::Hidden, cl::desc("Allow cloning of contexts having recursive cycles"))
static cl::opt< std::string > DotFilePathPrefix("memprof-dot-file-path-prefix", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path prefix of the MemProf dot files."))
static cl::opt< bool > VerifyNodes("memprof-verify-nodes", cl::init(false), cl::Hidden, cl::desc("Perform frequent verification checks on nodes."))
static void checkAllocContextIds(const AllocInfo &AllocNode, const MDNode *MemProfMD, const CallStack< MDNode, MDNode::op_iterator > &CallsiteContext, const ModuleSummaryIndex *ImportSummary)
static cl::opt< bool > DumpCCG("memprof-dump-ccg", cl::init(false), cl::Hidden, cl::desc("Dump CallingContextGraph to stdout after each stage."))
This is the interface to build a ModuleSummaryIndex for a module.
ModuleSummaryIndex.h This file contains the declarations the classes that hold the module index and s...
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
FunctionAnalysisManager FAM
std::pair< BasicBlock *, BasicBlock * > Edge
This file defines generic set operations that may be used on set's of different types,...
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
void print(OutputBuffer &OB) const
ValueInfo getAliaseeVI() const
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
bool empty() const
Check if the array is empty.
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
void addFnAttr(Attribute::AttrKind Kind)
Adds the attribute to the function.
void setCalledOperand(Value *V)
Subprogram description. Uses SubclassData1.
ValueT lookup(const_arg_type_t< KeyT > Val) const
Return the entry for the specified key, or a default constructed value if no such entry exists.
iterator find(const_arg_type_t< KeyT > Val)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
void reserve(size_type NumEntries)
Grow the densemap so that it can contain at least NumEntries items before resizing again.
Implements a dense probed hash-table based set.
Function summary information to aid decisions and implementation of importing.
FunctionType * getFunctionType() const
Returns the FunctionType for me.
DISubprogram * getSubprogram() const
Get the attached subprogram.
const Function & getFunction() const
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
static LLVM_ABI GlobalAlias * create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent)
If a parent module is specified, the alias is automatically inserted into the end of the specified mo...
Function and variable summary information to aid decisions and implementation of importing.
static LLVM_ABI GUID getGUIDAssumingExternalLinkage(StringRef GlobalName)
Return a 64-bit global unique ID constructed from the name of a global symbol.
static bool isLocalLinkage(LinkageTypes Linkage)
LLVM_ABI bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
uint64_t GUID
Declare a type to represent a global unique identifier for a global value.
Module * getParent()
Get the module that this global value is contained inside of...
LLVM_ABI void eraseFromParent()
This method unlinks 'this' from the containing module and deletes it.
static LLVM_ABI std::string getGlobalIdentifier(StringRef Name, GlobalValue::LinkageTypes Linkage, StringRef FileName)
Return the modified name for a global value suitable to be used as the key for a global lookup (e....
bool isWeakForLinker() const
@ InternalLinkage
Rename collisions when linking (static functions).
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
This is an important class for using LLVM in a threaded context.
const MDOperand & getOperand(unsigned I) const
ArrayRef< MDOperand > operands() const
unsigned getNumOperands() const
Return number of MDNode operands.
LLVM_ABI TempMDNode clone() const
Create a (temporary) clone of this.
static std::enable_if_t< std::is_base_of< MDNode, T >::value, T * > replaceWithUniqued(std::unique_ptr< T, TempMDNodeDeleter > N)
Replace a temporary node with a uniqued one.
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
This class implements a map that also provides access to all stored values in a deterministic order.
size_type count(const KeyT &Key) const
LLVM_ABI MemProfContextDisambiguation(const ModuleSummaryIndex *Summary=nullptr, bool isSamplePGO=false)
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
Class to hold module path string table and global value map, and encapsulate methods for operating on...
static StringRef getOriginalNameBeforePromote(StringRef Name)
Helper to obtain the unpromoted name for a global value (or the original name if not promoted).
ValueInfo getValueInfo(const GlobalValueSummaryMapTy::value_type &R) const
Return a ValueInfo for the index value_type (convenient when iterating index).
uint64_t getStackIdAtIndex(unsigned Index) const
A Module instance is used to store all the information related to an LLVM module.
LLVMContext & getContext() const
Get the global data context.
A NodeSet contains a set of SUnit DAG nodes with additional information that assigns a priority to th...
A discriminated union of two or more pointer types, with the discriminator in the low bits of the poi...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
A class that wrap the SHA1 algorithm.
LLVM_ABI void update(ArrayRef< uint8_t > Data)
Digest more data.
LLVM_ABI std::array< uint8_t, 20 > result()
Return the current raw 160-bits SHA1 for the digested data since the last call to init().
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Represent a constant reference to a string, i.e.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
std::pair< iterator, bool > insert(const ValueT &V)
void reserve(size_t Size)
Grow the DenseSet so that it can contain at least NumEntries items before resizing again.
void insert_range(Range &&R)
void swap(DenseSetImpl &RHS)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
bool erase(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
An efficient, type-erasing, non-owning reference to a callable.
Helper class to iterate through stack ids in both metadata (memprof MIB and callsite) and the corresp...
CallStackIterator beginAfterSharedPrefix(const CallStack &Other)
CallStackIterator end() const
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
@ CE
Windows NT (Windows on ARM)
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract(Y &&MD)
Extract a Value from Metadata, if any.
LLVM_ABI AllocationType getMIBAllocType(const MDNode *MIB)
Returns the allocation type from an MIB metadata node.
LLVM_ABI bool metadataMayIncludeContextSizeInfo()
Whether the alloc memprof metadata may include context size info for some MIBs (but possibly not all)...
LLVM_ABI bool hasSingleAllocType(uint8_t AllocTypes)
True if the AllocTypes bitmask contains just a single type.
LLVM_ABI std::string getAllocTypeAttributeString(AllocationType Type)
Returns the string to use in attributes with the given type.
LLVM_ABI MDNode * getMIBStackNode(const MDNode *MIB)
Returns the stack node from an MIB metadata node.
LLVM_ABI void removeAnyExistingAmbiguousAttribute(CallBase *CB)
Removes any existing "ambiguous" memprof attribute.
DiagnosticInfoOptimizationBase::Argument NV
LLVM_ABI CallBase & promoteIndirectCall(CallBase &CB, Function *F, uint64_t Count, uint64_t TotalCount, bool AttachProfToDirectCall, OptimizationRemarkEmitter *ORE)
NodeAddr< NodeBase * > Node
NodeAddr< FuncNode * > Func
friend class Instruction
Iterator for Instructions in a `BasicBlock.
uint64_t read64le(const void *P)
void write32le(void *P, uint32_t V)
This is an optimization pass for GlobalISel generic memory operations.
cl::opt< unsigned > MinClonedColdBytePercent("memprof-cloning-cold-threshold", cl::init(100), cl::Hidden, cl::desc("Min percent of cold bytes to hint alloc cold during cloning"))
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
LLVM_ABI void logAllUnhandledErrors(Error E, raw_ostream &OS, Twine ErrorBanner={})
Log all errors (if any) in E to OS.
FunctionAddr VTableAddr Value
void stable_sort(R &&Range)
cl::opt< bool > MemProfReportHintedSizes("memprof-report-hinted-sizes", cl::init(false), cl::Hidden, cl::desc("Report total allocation sizes of hinted allocations"))
LLVM_ABI bool isLegalToPromote(const CallBase &CB, Function *Callee, const char **FailureReason=nullptr)
Return true if the given indirect call site can be made to call Callee.
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI bool mayHaveMemprofSummary(const CallBase *CB)
Returns true if the instruction could have memprof metadata, used to ensure consistency between summa...
constexpr from_range_t from_range
static cl::opt< bool > MemProfRequireDefinitionForPromotion("memprof-require-definition-for-promotion", cl::init(false), cl::Hidden, cl::desc("Require target function definition when promoting indirect calls"))
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
cl::opt< unsigned > MemProfTopNImportant("memprof-top-n-important", cl::init(10), cl::Hidden, cl::desc("Number of largest cold contexts to consider important"))
bool set_is_subset(const S1Ty &S1, const S2Ty &S2)
set_is_subset(A, B) - Return true iff A in B
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
void set_subtract(S1Ty &S1, const S2Ty &S2)
set_subtract(A, B) - Compute A := A - B
InnerAnalysisManagerProxy< FunctionAnalysisManager, Module > FunctionAnalysisManagerModuleProxy
Provide the FunctionAnalysisManager to Module proxy.
raw_ostream & WriteGraph(raw_ostream &O, const GraphType &G, bool ShortNames=false, const Twine &Title="")
bool set_intersects(const S1Ty &S1, const S2Ty &S2)
set_intersects(A, B) - Return true iff A ^ B is non empty
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
LLVM_ABI Expected< std::unique_ptr< ModuleSummaryIndex > > getModuleSummaryIndex(MemoryBufferRef Buffer)
Parse the specified bitcode buffer, returning the module summary index.
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
cl::opt< unsigned > MaxSummaryIndirectEdges("module-summary-max-indirect-edges", cl::init(0), cl::Hidden, cl::desc("Max number of summary edges added from " "indirect call profile metadata"))
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
FunctionAddr VTableAddr Count
bool set_union(S1Ty &S1, const S2Ty &S2)
set_union(A, B) - Compute A := A u B, return whether A changed.
cl::opt< bool > SupportsHotColdNew
Indicate we are linking with an allocator that supports hot/cold operator new interfaces.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
S1Ty set_intersection(const S1Ty &S1, const S2Ty &S2)
set_intersection(A, B) - Return A ^ B
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
cl::opt< bool > EnableMemProfContextDisambiguation
Enable MemProf context disambiguation for thin link.
S1Ty set_difference(const S1Ty &S1, const S2Ty &S2)
set_difference(A, B) - Return A - B
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Expected< T > errorOrToExpected(ErrorOr< T > &&EO)
Convert an ErrorOr<T> to an Expected<T>.
ArrayRef(const T &OneElt) -> ArrayRef< T >
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
LLVM_ABI Function * CloneFunction(Function *F, ValueToValueMapTy &VMap, ClonedCodeInfo *CodeInfo=nullptr)
Return a copy of the specified function and add it to that function's module.
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
cl::opt< bool > MemProfFixupImportant("memprof-fixup-important", cl::init(true), cl::Hidden, cl::desc("Enables edge fixup for important contexts"))
DOTGraphTraits(bool IsSimple=false)
typename GTraits::NodeRef NodeRef
static std::string getEdgeAttributes(NodeRef, ChildIteratorType ChildIter, GraphType G)
const CallsiteContextGraph< DerivedCCG, FuncTy, CallTy > * GraphType
typename GTraits::ChildIteratorType ChildIteratorType
static std::string getNodeAttributes(NodeRef Node, GraphType G)
static bool isNodeHidden(NodeRef Node, GraphType G)
static std::string getNodeLabel(NodeRef Node, GraphType G)
GraphTraits< GraphType > GTraits
static NodeRef getNode(const NodePtrTy &P)
static const ContextNode< DerivedCCG, FuncTy, CallTy > * GetCallee(const EdgePtrTy &P)
static ChildIteratorType child_end(NodeRef N)
std::unique_ptr< ContextNode< DerivedCCG, FuncTy, CallTy > > NodePtrTy
mapped_iterator< typename std::vector< std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > >::const_iterator, decltype(&GetCallee)> ChildIteratorType
const CallsiteContextGraph< DerivedCCG, FuncTy, CallTy > * GraphType
const ContextNode< DerivedCCG, FuncTy, CallTy > * NodeRef
mapped_iterator< typename std::vector< NodePtrTy >::const_iterator, decltype(&getNode)> nodes_iterator
static ChildIteratorType child_begin(NodeRef N)
static NodeRef getEntryNode(GraphType G)
static nodes_iterator nodes_begin(GraphType G)
static nodes_iterator nodes_end(GraphType G)
std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > EdgePtrTy
Summary of memprof metadata on allocations.
std::vector< MIBInfo > MIBs
SmallVector< unsigned > StackIdIndices
SmallVector< unsigned > Clones
DOTGraphTraits - Template class that can be specialized to customize how graphs are converted to 'dot...
DefaultDOTGraphTraits(bool simple=false)
An information struct used to provide DenseMap with the various necessary components for a given valu...
typename GraphType::UnknownGraphTypeError NodeRef
Struct that holds a reference to a particular GUID in a global value summary.
ArrayRef< std::unique_ptr< GlobalValueSummary > > getSummaryList() const
GlobalValue::GUID getGUID() const
PointerUnion< CallsiteInfo *, AllocInfo * > SimpleType
static SimpleType getSimplifiedValue(IndexCall &Val)
const PointerUnion< CallsiteInfo *, AllocInfo * > SimpleType
static SimpleType getSimplifiedValue(const IndexCall &Val)
Define a template that can be specialized by smart pointers to reflect the fact that they are automat...