52#include <unordered_map>
57#define DEBUG_TYPE "memprof-context-disambiguation"
60 "Number of function clones created during whole program analysis");
62 "Number of function clones created during ThinLTO backend");
64 "Number of functions that had clones created during ThinLTO backend");
66 FunctionCloneDuplicatesThinBackend,
67 "Number of function clone duplicates detected during ThinLTO backend");
68STATISTIC(AllocTypeNotCold,
"Number of not cold static allocations (possibly "
69 "cloned) during whole program analysis");
70STATISTIC(AllocTypeCold,
"Number of cold static allocations (possibly cloned) "
71 "during whole program analysis");
73 "Number of not cold static allocations (possibly cloned) during "
75STATISTIC(AllocTypeColdThinBackend,
"Number of cold static allocations "
76 "(possibly cloned) during ThinLTO backend");
78 "Number of original (not cloned) allocations with memprof profiles "
79 "during ThinLTO backend");
81 AllocVersionsThinBackend,
82 "Number of allocation versions (including clones) during ThinLTO backend");
84 "Maximum number of allocation versions created for an original "
85 "allocation during ThinLTO backend");
87 "Number of unclonable ambigous allocations during ThinLTO backend");
89 "Number of edges removed due to mismatched callees (profiled vs IR)");
91 "Number of profiled callees found via tail calls");
93 "Aggregate depth of profiled callees found via tail calls");
95 "Maximum depth of profiled callees found via tail calls");
97 "Number of profiled callees found via multiple tail call chains");
98STATISTIC(DeferredBackedges,
"Number of backedges with deferred cloning");
99STATISTIC(NewMergedNodes,
"Number of new nodes created during merging");
100STATISTIC(NonNewMergedNodes,
"Number of non new nodes used during merging");
102 "Number of missing alloc nodes for context ids");
104 "Number of calls skipped during cloning due to unexpected operand");
106 "Number of callsites assigned to call multiple non-matching clones");
107STATISTIC(TotalMergeInvokes,
"Number of merge invocations for nodes");
108STATISTIC(TotalMergeIters,
"Number of merge iterations for nodes");
109STATISTIC(MaxMergeIters,
"Max merge iterations for nodes");
110STATISTIC(NumImportantContextIds,
"Number of important context ids");
111STATISTIC(NumFixupEdgeIdsInserted,
"Number of fixup edge ids inserted");
112STATISTIC(NumFixupEdgesAdded,
"Number of fixup edges added");
113STATISTIC(NumFixedContexts,
"Number of contexts with fixed edges");
118 cl::desc(
"Specify the path prefix of the MemProf dot files."));
122 cl::desc(
"Export graph to dot files."));
127 cl::desc(
"Iteratively apply merging on a node to catch new callers"));
137 "memprof-dot-scope",
cl::desc(
"Scope of graph to export to dot"),
142 "Export only nodes with contexts feeding given "
143 "-memprof-dot-alloc-id"),
145 "Export only nodes with given -memprof-dot-context-id")));
149 cl::desc(
"Id of alloc to export if -memprof-dot-scope=alloc "
150 "or to highlight if -memprof-dot-scope=all"));
154 cl::desc(
"Id of context to export if -memprof-dot-scope=context or to "
155 "highlight otherwise"));
159 cl::desc(
"Dump CallingContextGraph to stdout after each stage."));
163 cl::desc(
"Perform verification checks on CallingContextGraph."));
167 cl::desc(
"Perform frequent verification checks on nodes."));
170 "memprof-import-summary",
171 cl::desc(
"Import summary to use for testing the ThinLTO backend via opt"),
177 cl::desc(
"Max depth to recursively search for missing "
178 "frames through tail calls."));
183 cl::desc(
"Allow cloning of callsites involved in recursive cycles"));
187 cl::desc(
"Allow cloning of contexts through recursive cycles"));
194 cl::desc(
"Merge clones before assigning functions"));
203 cl::desc(
"Allow cloning of contexts having recursive cycles"));
209 cl::desc(
"Minimum absolute count for promoted target to be inlinable"));
220 cl::desc(
"Linking with hot/cold operator new interfaces"));
225 "Require target function definition when promoting indirect calls"));
232 cl::desc(
"Number of largest cold contexts to consider important"));
236 cl::desc(
"Enables edge fixup for important contexts"));
258template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
259class CallsiteContextGraph {
261 CallsiteContextGraph() =
default;
262 CallsiteContextGraph(
const CallsiteContextGraph &) =
default;
263 CallsiteContextGraph(CallsiteContextGraph &&) =
default;
270 void identifyClones();
277 bool assignFunctions();
284 const CallsiteContextGraph &CCG) {
290 const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>;
292 const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>;
294 void exportToDot(std::string Label)
const;
297 struct FuncInfo final
298 :
public std::pair<FuncTy *, unsigned > {
299 using Base = std::pair<FuncTy *, unsigned>;
301 FuncInfo(FuncTy *
F =
nullptr,
unsigned CloneNo = 0) :
Base(
F, CloneNo) {}
302 explicit operator bool()
const {
return this->first !=
nullptr; }
303 FuncTy *func()
const {
return this->first; }
304 unsigned cloneNo()
const {
return this->second; }
308 struct CallInfo final :
public std::pair<CallTy, unsigned > {
309 using Base = std::pair<CallTy, unsigned>;
311 CallInfo(CallTy
Call =
nullptr,
unsigned CloneNo = 0)
313 explicit operator bool()
const {
return (
bool)this->first; }
314 CallTy call()
const {
return this->first; }
315 unsigned cloneNo()
const {
return this->second; }
316 void setCloneNo(
unsigned N) { this->second =
N; }
318 if (!
operator bool()) {
324 OS <<
"\t(clone " << cloneNo() <<
")";
350 bool Recursive =
false;
377 std::vector<std::shared_ptr<ContextEdge>> CalleeEdges;
381 std::vector<std::shared_ptr<ContextEdge>> CallerEdges;
385 bool useCallerEdgesForContextInfo()
const {
390 assert(!CalleeEdges.empty() || CallerEdges.empty() || IsAllocation ||
408 for (
auto &Edge : CalleeEdges.empty() ? CallerEdges : CalleeEdges)
409 Count += Edge->getContextIds().size();
413 CalleeEdges, useCallerEdgesForContextInfo()
415 : std::vector<std::shared_ptr<ContextEdge>>());
416 for (
const auto &Edge : Edges)
423 uint8_t computeAllocType()
const {
428 CalleeEdges, useCallerEdgesForContextInfo()
430 : std::vector<std::shared_ptr<ContextEdge>>());
431 for (
const auto &Edge : Edges) {
442 bool emptyContextIds()
const {
444 CalleeEdges, useCallerEdgesForContextInfo()
446 : std::vector<std::shared_ptr<ContextEdge>>());
447 for (
const auto &Edge : Edges) {
448 if (!Edge->getContextIds().empty())
455 std::vector<ContextNode *> Clones;
458 ContextNode *CloneOf =
nullptr;
460 ContextNode(
bool IsAllocation) : IsAllocation(IsAllocation),
Call() {}
462 ContextNode(
bool IsAllocation, CallInfo
C)
463 : IsAllocation(IsAllocation),
Call(
C) {}
465 void addClone(ContextNode *Clone) {
467 CloneOf->Clones.push_back(Clone);
468 Clone->CloneOf = CloneOf;
470 Clones.push_back(Clone);
472 Clone->CloneOf =
this;
476 ContextNode *getOrigNode() {
483 unsigned int ContextId);
485 ContextEdge *findEdgeFromCallee(
const ContextNode *Callee);
486 ContextEdge *findEdgeFromCaller(
const ContextNode *Caller);
487 void eraseCalleeEdge(
const ContextEdge *Edge);
488 void eraseCallerEdge(
const ContextEdge *Edge);
490 void setCall(CallInfo
C) {
Call =
C; }
492 bool hasCall()
const {
return (
bool)
Call.call(); }
498 bool isRemoved()
const {
534 bool IsBackedge =
false;
541 : Callee(Callee), Caller(Caller), AllocTypes(
AllocType),
542 ContextIds(std::move(ContextIds)) {}
548 inline void clear() {
558 inline bool isRemoved()
const {
559 if (Callee || Caller)
580 void removeNoneTypeCalleeEdges(ContextNode *
Node);
581 void removeNoneTypeCallerEdges(ContextNode *
Node);
583 recursivelyRemoveNoneTypeCalleeEdges(ContextNode *
Node,
589 template <
class NodeT,
class IteratorT>
590 std::vector<uint64_t>
595 ContextNode *addAllocNode(CallInfo
Call,
const FuncTy *
F);
598 template <
class NodeT,
class IteratorT>
599 void addStackNodesForMIB(
603 std::map<uint64_t, uint32_t> &TotalSizeToContextIdTopNCold);
608 void updateStackNodes();
617 void fixupImportantContexts();
621 void handleCallsitesWithMultipleTargets();
624 void markBackedges();
634 bool partitionCallsByCallee(
636 std::vector<std::pair<CallInfo, ContextNode *>> &NewCallToNode);
643 std::map<const ContextNode *, const FuncTy *> NodeToCallingFunc;
650 using EdgeIter =
typename std::vector<std::shared_ptr<ContextEdge>>
::iterator;
655 struct CallContextInfo {
659 std::vector<uint64_t> StackIds;
673 void removeEdgeFromGraph(ContextEdge *Edge, EdgeIter *EI =
nullptr,
674 bool CalleeIter =
true);
682 void assignStackNodesPostOrder(
696 void propagateDuplicateContextIds(
702 void connectNewNode(ContextNode *NewNode, ContextNode *OrigNode,
710 return static_cast<const DerivedCCG *
>(
this)->getStackId(IdOrIndex);
720 calleesMatch(CallTy
Call, EdgeIter &EI,
725 const FuncTy *getCalleeFunc(CallTy
Call) {
726 return static_cast<DerivedCCG *
>(
this)->getCalleeFunc(
Call);
732 bool calleeMatchesFunc(
733 CallTy
Call,
const FuncTy *Func,
const FuncTy *CallerFunc,
734 std::vector<std::pair<CallTy, FuncTy *>> &FoundCalleeChain) {
735 return static_cast<DerivedCCG *
>(
this)->calleeMatchesFunc(
736 Call, Func, CallerFunc, FoundCalleeChain);
740 bool sameCallee(CallTy Call1, CallTy Call2) {
741 return static_cast<DerivedCCG *
>(
this)->sameCallee(Call1, Call2);
746 std::vector<uint64_t> getStackIdsWithContextNodesForCall(CallTy
Call) {
747 return static_cast<DerivedCCG *
>(
this)->getStackIdsWithContextNodesForCall(
753 return static_cast<DerivedCCG *
>(
this)->getLastStackId(
Call);
759 static_cast<DerivedCCG *
>(
this)->updateAllocationCall(
Call,
AllocType);
764 return static_cast<const DerivedCCG *
>(
this)->getAllocationCallType(
Call);
769 void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc) {
770 static_cast<DerivedCCG *
>(
this)->updateCall(CallerCall, CalleeFunc);
776 FuncInfo cloneFunctionForCallsite(
778 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
779 return static_cast<DerivedCCG *
>(
this)->cloneFunctionForCallsite(
780 Func,
Call, CallMap, CallsWithMetadataInFunc, CloneNo);
785 std::string getLabel(
const FuncTy *Func,
const CallTy
Call,
786 unsigned CloneNo)
const {
787 return static_cast<const DerivedCCG *
>(
this)->getLabel(Func,
Call, CloneNo);
791 ContextNode *createNewNode(
bool IsAllocation,
const FuncTy *
F =
nullptr,
792 CallInfo
C = CallInfo()) {
793 NodeOwner.push_back(std::make_unique<ContextNode>(IsAllocation,
C));
794 auto *NewNode = NodeOwner.back().get();
796 NodeToCallingFunc[NewNode] =
F;
797 NewNode->NodeId = NodeOwner.size();
802 ContextNode *getNodeForInst(
const CallInfo &
C);
803 ContextNode *getNodeForAlloc(
const CallInfo &
C);
804 ContextNode *getNodeForStackId(
uint64_t StackId);
826 moveEdgeToNewCalleeClone(
const std::shared_ptr<ContextEdge> &Edge,
833 void moveEdgeToExistingCalleeClone(
const std::shared_ptr<ContextEdge> &Edge,
834 ContextNode *NewCallee,
835 bool NewClone =
false,
843 void moveCalleeEdgeToNewCaller(
const std::shared_ptr<ContextEdge> &Edge,
844 ContextNode *NewCaller);
855 void mergeNodeCalleeClones(
860 void findOtherCallersToShareMerge(
861 ContextNode *
Node, std::vector<std::shared_ptr<ContextEdge>> &CalleeEdges,
889 struct ImportantContextInfo {
891 std::vector<uint64_t> StackIds;
894 unsigned MaxLength = 0;
898 std::map<std::vector<uint64_t>, ContextNode *> StackIdsToNode;
907 void recordStackNode(std::vector<uint64_t> &StackIds, ContextNode *
Node,
921 auto Size = StackIds.size();
922 for (
auto Id : Ids) {
923 auto &Entry = ImportantContextIdInfo[Id];
924 Entry.StackIdsToNode[StackIds] =
Node;
926 if (
Size > Entry.MaxLength)
927 Entry.MaxLength =
Size;
936 std::vector<std::unique_ptr<ContextNode>> NodeOwner;
942 unsigned int LastContextId = 0;
945template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
947 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode;
948template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
950 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge;
951template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
953 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::FuncInfo;
954template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
956 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::CallInfo;
959class ModuleCallsiteContextGraph
960 :
public CallsiteContextGraph<ModuleCallsiteContextGraph, Function,
963 ModuleCallsiteContextGraph(
965 llvm::function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter);
968 friend CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
971 uint64_t getStackId(uint64_t IdOrIndex)
const;
973 bool calleeMatchesFunc(
974 Instruction *
Call,
const Function *Func,
const Function *CallerFunc,
975 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain);
976 bool sameCallee(Instruction *Call1, Instruction *Call2);
977 bool findProfiledCalleeThroughTailCalls(
978 const Function *ProfiledCallee,
Value *CurCallee,
unsigned Depth,
979 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain,
980 bool &FoundMultipleCalleeChains);
981 uint64_t getLastStackId(Instruction *
Call);
982 std::vector<uint64_t> getStackIdsWithContextNodesForCall(Instruction *
Call);
985 void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc);
986 CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
988 cloneFunctionForCallsite(FuncInfo &Func, CallInfo &
Call,
989 DenseMap<CallInfo, CallInfo> &CallMap,
990 std::vector<CallInfo> &CallsWithMetadataInFunc,
992 std::string getLabel(
const Function *Func,
const Instruction *
Call,
993 unsigned CloneNo)
const;
996 llvm::function_ref<OptimizationRemarkEmitter &(
Function *)> OREGetter;
1002struct IndexCall :
public PointerUnion<CallsiteInfo *, AllocInfo *> {
1003 IndexCall() : PointerUnion() {}
1004 IndexCall(std::nullptr_t) : IndexCall() {}
1005 IndexCall(CallsiteInfo *StackNode) : PointerUnion(StackNode) {}
1006 IndexCall(AllocInfo *AllocNode) : PointerUnion(AllocNode) {}
1007 IndexCall(PointerUnion PT) : PointerUnion(PT) {}
1009 IndexCall *operator->() {
return this; }
1011 void print(raw_ostream &OS)
const {
1012 PointerUnion<CallsiteInfo *, AllocInfo *>
Base = *
this;
1037class IndexCallsiteContextGraph
1038 :
public CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
1041 IndexCallsiteContextGraph(
1042 ModuleSummaryIndex &Index,
1046 ~IndexCallsiteContextGraph() {
1051 for (
auto &
I : FunctionCalleesToSynthesizedCallsiteInfos) {
1053 for (
auto &Callsite :
I.second)
1054 FS->addCallsite(*Callsite.second);
1059 friend CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
1062 uint64_t getStackId(uint64_t IdOrIndex)
const;
1063 const FunctionSummary *getCalleeFunc(IndexCall &
Call);
1064 bool calleeMatchesFunc(
1065 IndexCall &
Call,
const FunctionSummary *Func,
1066 const FunctionSummary *CallerFunc,
1067 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain);
1068 bool sameCallee(IndexCall &Call1, IndexCall &Call2);
1069 bool findProfiledCalleeThroughTailCalls(
1070 ValueInfo ProfiledCallee, ValueInfo CurCallee,
unsigned Depth,
1071 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain,
1072 bool &FoundMultipleCalleeChains);
1073 uint64_t getLastStackId(IndexCall &
Call);
1074 std::vector<uint64_t> getStackIdsWithContextNodesForCall(IndexCall &
Call);
1077 void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc);
1078 CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
1079 IndexCall>::FuncInfo
1080 cloneFunctionForCallsite(FuncInfo &Func, CallInfo &
Call,
1081 DenseMap<CallInfo, CallInfo> &CallMap,
1082 std::vector<CallInfo> &CallsWithMetadataInFunc,
1084 std::string getLabel(
const FunctionSummary *Func,
const IndexCall &
Call,
1085 unsigned CloneNo)
const;
1089 std::map<const FunctionSummary *, ValueInfo> FSToVIMap;
1091 const ModuleSummaryIndex &Index;
1099 std::unordered_map<FunctionSummary *,
1100 std::map<ValueInfo, std::unique_ptr<CallsiteInfo>>>
1101 FunctionCalleesToSynthesizedCallsiteInfos;
1112 :
public DenseMapInfo<std::pair<IndexCall, unsigned>> {};
1115 :
public DenseMapInfo<PointerUnion<CallsiteInfo *, AllocInfo *>> {};
1136template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1137bool allocTypesMatch(
1138 const std::vector<uint8_t> &InAllocTypes,
1139 const std::vector<std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>>
1143 assert(InAllocTypes.size() == Edges.size());
1145 InAllocTypes.begin(), InAllocTypes.end(), Edges.begin(), Edges.end(),
1147 const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &r) {
1151 if (l == (uint8_t)AllocationType::None ||
1152 r->AllocTypes == (uint8_t)AllocationType::None)
1154 return allocTypeToUse(l) == allocTypeToUse(r->AllocTypes);
1163template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1164bool allocTypesMatchClone(
1165 const std::vector<uint8_t> &InAllocTypes,
1166 const ContextNode<DerivedCCG, FuncTy, CallTy> *Clone) {
1167 const ContextNode<DerivedCCG, FuncTy, CallTy> *
Node = Clone->CloneOf;
1171 assert(InAllocTypes.size() ==
Node->CalleeEdges.size());
1175 for (
const auto &
E : Clone->CalleeEdges) {
1177 EdgeCalleeMap[
E->Callee] =
E->AllocTypes;
1181 for (
unsigned I = 0;
I <
Node->CalleeEdges.size();
I++) {
1182 auto Iter = EdgeCalleeMap.
find(
Node->CalleeEdges[
I]->Callee);
1184 if (Iter == EdgeCalleeMap.
end())
1192 if (allocTypeToUse(Iter->second) != allocTypeToUse(InAllocTypes[
I]))
1200template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1201typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1202CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForInst(
1203 const CallInfo &
C) {
1204 ContextNode *
Node = getNodeForAlloc(
C);
1208 return NonAllocationCallToContextNodeMap.lookup(
C);
1211template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1212typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1213CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForAlloc(
1214 const CallInfo &
C) {
1215 return AllocationCallToContextNodeMap.lookup(
C);
1218template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1219typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1220CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForStackId(
1222 auto StackEntryNode = StackEntryIdToContextNodeMap.find(StackId);
1223 if (StackEntryNode != StackEntryIdToContextNodeMap.end())
1224 return StackEntryNode->second;
1228template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1229void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1231 unsigned int ContextId) {
1232 for (
auto &
Edge : CallerEdges) {
1233 if (
Edge->Caller == Caller) {
1235 Edge->getContextIds().insert(ContextId);
1239 std::shared_ptr<ContextEdge>
Edge = std::make_shared<ContextEdge>(
1240 this, Caller, (uint8_t)
AllocType, DenseSet<uint32_t>({ContextId}));
1241 CallerEdges.push_back(
Edge);
1245template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1246void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::removeEdgeFromGraph(
1247 ContextEdge *
Edge, EdgeIter *EI,
bool CalleeIter) {
1263 auto CalleeCallerCount =
Callee->CallerEdges.size();
1264 auto CallerCalleeCount =
Caller->CalleeEdges.size();
1269 }
else if (CalleeIter) {
1271 *EI =
Caller->CalleeEdges.erase(*EI);
1274 *EI =
Callee->CallerEdges.erase(*EI);
1276 assert(
Callee->CallerEdges.size() < CalleeCallerCount);
1277 assert(
Caller->CalleeEdges.size() < CallerCalleeCount);
1280template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1281void CallsiteContextGraph<
1282 DerivedCCG, FuncTy, CallTy>::removeNoneTypeCalleeEdges(ContextNode *Node) {
1283 for (
auto EI =
Node->CalleeEdges.begin(); EI !=
Node->CalleeEdges.end();) {
1285 if (
Edge->AllocTypes == (uint8_t)AllocationType::None) {
1287 removeEdgeFromGraph(
Edge.get(), &EI,
true);
1293template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1294void CallsiteContextGraph<
1295 DerivedCCG, FuncTy, CallTy>::removeNoneTypeCallerEdges(ContextNode *Node) {
1296 for (
auto EI =
Node->CallerEdges.begin(); EI !=
Node->CallerEdges.end();) {
1298 if (
Edge->AllocTypes == (uint8_t)AllocationType::None) {
1300 Edge->Caller->eraseCalleeEdge(
Edge.get());
1301 EI =
Node->CallerEdges.erase(EI);
1307template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1308typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge *
1309CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1310 findEdgeFromCallee(
const ContextNode *Callee) {
1311 for (
const auto &
Edge : CalleeEdges)
1312 if (
Edge->Callee == Callee)
1317template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1318typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge *
1319CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1320 findEdgeFromCaller(
const ContextNode *Caller) {
1321 for (
const auto &
Edge : CallerEdges)
1322 if (
Edge->Caller == Caller)
1327template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1328void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1329 eraseCalleeEdge(
const ContextEdge *
Edge) {
1331 CalleeEdges, [
Edge](
const std::shared_ptr<ContextEdge> &CalleeEdge) {
1332 return CalleeEdge.get() ==
Edge;
1334 assert(EI != CalleeEdges.end());
1335 CalleeEdges.erase(EI);
1338template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1339void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1340 eraseCallerEdge(
const ContextEdge *
Edge) {
1342 CallerEdges, [
Edge](
const std::shared_ptr<ContextEdge> &CallerEdge) {
1343 return CallerEdge.get() ==
Edge;
1345 assert(EI != CallerEdges.end());
1346 CallerEdges.erase(EI);
1349template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1350uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::computeAllocType(
1351 DenseSet<uint32_t> &ContextIds)
const {
1353 (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
1354 uint8_t
AllocType = (uint8_t)AllocationType::None;
1355 for (
auto Id : ContextIds) {
1356 AllocType |= (uint8_t)ContextIdToAllocationType.at(Id);
1364template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1366CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypesImpl(
1367 const DenseSet<uint32_t> &Node1Ids,
1368 const DenseSet<uint32_t> &Node2Ids)
const {
1370 (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
1371 uint8_t
AllocType = (uint8_t)AllocationType::None;
1372 for (
auto Id : Node1Ids) {
1373 if (!Node2Ids.
count(Id))
1375 AllocType |= (uint8_t)ContextIdToAllocationType.at(Id);
1383template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1384uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypes(
1385 const DenseSet<uint32_t> &Node1Ids,
1386 const DenseSet<uint32_t> &Node2Ids)
const {
1387 if (Node1Ids.
size() < Node2Ids.
size())
1388 return intersectAllocTypesImpl(Node1Ids, Node2Ids);
1390 return intersectAllocTypesImpl(Node2Ids, Node1Ids);
1393template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1394typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1395CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addAllocNode(
1396 CallInfo
Call,
const FuncTy *
F) {
1398 ContextNode *AllocNode = createNewNode(
true,
F,
Call);
1399 AllocationCallToContextNodeMap[
Call] = AllocNode;
1401 AllocNode->OrigStackOrAllocId = LastContextId;
1404 AllocNode->AllocTypes = (uint8_t)AllocationType::None;
1420template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1421template <
class NodeT,
class IteratorT>
1422void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addStackNodesForMIB(
1423 ContextNode *AllocNode, CallStack<NodeT, IteratorT> &StackContext,
1426 std::map<uint64_t, uint32_t> &TotalSizeToContextIdTopNCold) {
1432 ContextIdToAllocationType[++LastContextId] =
AllocType;
1434 bool IsImportant =
false;
1435 if (!ContextSizeInfo.
empty()) {
1436 auto &
Entry = ContextIdToContextSizeInfos[LastContextId];
1440 uint64_t TotalCold = 0;
1441 for (
auto &CSI : ContextSizeInfo)
1442 TotalCold += CSI.TotalSize;
1448 TotalCold > TotalSizeToContextIdTopNCold.begin()->first) {
1451 auto IdToRemove = TotalSizeToContextIdTopNCold.begin()->second;
1452 TotalSizeToContextIdTopNCold.erase(
1453 TotalSizeToContextIdTopNCold.begin());
1454 assert(ImportantContextIdInfo.count(IdToRemove));
1455 ImportantContextIdInfo.erase(IdToRemove);
1457 TotalSizeToContextIdTopNCold[TotalCold] = LastContextId;
1461 Entry.insert(
Entry.begin(), ContextSizeInfo.begin(), ContextSizeInfo.end());
1465 AllocNode->AllocTypes |= (uint8_t)
AllocType;
1470 ContextNode *PrevNode = AllocNode;
1474 SmallSet<uint64_t, 8> StackIdSet;
1477 ContextIter != StackContext.
end(); ++ContextIter) {
1478 auto StackId = getStackId(*ContextIter);
1480 ImportantContextIdInfo[LastContextId].StackIds.push_back(StackId);
1481 ContextNode *StackNode = getNodeForStackId(StackId);
1483 StackNode = createNewNode(
false);
1484 StackEntryIdToContextNodeMap[StackId] = StackNode;
1485 StackNode->OrigStackOrAllocId = StackId;
1490 auto Ins = StackIdSet.
insert(StackId);
1492 StackNode->Recursive =
true;
1494 StackNode->AllocTypes |= (uint8_t)
AllocType;
1495 PrevNode->addOrUpdateCallerEdge(StackNode,
AllocType, LastContextId);
1496 PrevNode = StackNode;
1500template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1502CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::duplicateContextIds(
1503 const DenseSet<uint32_t> &StackSequenceContextIds,
1504 DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds) {
1505 DenseSet<uint32_t> NewContextIds;
1506 for (
auto OldId : StackSequenceContextIds) {
1507 NewContextIds.
insert(++LastContextId);
1508 OldToNewContextIds[OldId].insert(LastContextId);
1509 assert(ContextIdToAllocationType.count(OldId));
1511 ContextIdToAllocationType[LastContextId] = ContextIdToAllocationType[OldId];
1512 auto CSI = ContextIdToContextSizeInfos.find(OldId);
1513 if (CSI != ContextIdToContextSizeInfos.end())
1514 ContextIdToContextSizeInfos[LastContextId] = CSI->second;
1515 if (DotAllocContextIds.
contains(OldId))
1516 DotAllocContextIds.
insert(LastContextId);
1518 return NewContextIds;
1521template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1522void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
1523 propagateDuplicateContextIds(
1524 const DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds) {
1526 auto GetNewIds = [&OldToNewContextIds](
const DenseSet<uint32_t> &ContextIds) {
1527 DenseSet<uint32_t> NewIds;
1528 for (
auto Id : ContextIds)
1529 if (
auto NewId = OldToNewContextIds.find(Id);
1530 NewId != OldToNewContextIds.end())
1536 auto UpdateCallers = [&](ContextNode *
Node,
1537 DenseSet<const ContextEdge *> &Visited,
1538 auto &&UpdateCallers) ->
void {
1539 for (
const auto &
Edge :
Node->CallerEdges) {
1543 ContextNode *NextNode =
Edge->Caller;
1544 DenseSet<uint32_t> NewIdsToAdd = GetNewIds(
Edge->getContextIds());
1547 if (!NewIdsToAdd.
empty()) {
1548 Edge->getContextIds().insert_range(NewIdsToAdd);
1549 UpdateCallers(NextNode, Visited, UpdateCallers);
1554 DenseSet<const ContextEdge *> Visited;
1555 for (
auto &Entry : AllocationCallToContextNodeMap) {
1557 UpdateCallers(Node, Visited, UpdateCallers);
1561template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1562void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::connectNewNode(
1563 ContextNode *NewNode, ContextNode *OrigNode,
bool TowardsCallee,
1566 DenseSet<uint32_t> RemainingContextIds) {
1568 TowardsCallee ? OrigNode->CalleeEdges : OrigNode->CallerEdges;
1569 DenseSet<uint32_t> RecursiveContextIds;
1570 DenseSet<uint32_t> AllCallerContextIds;
1575 for (
auto &CE : OrigEdges) {
1576 AllCallerContextIds.
reserve(
CE->getContextIds().size());
1577 for (
auto Id :
CE->getContextIds())
1578 if (!AllCallerContextIds.
insert(Id).second)
1579 RecursiveContextIds.
insert(Id);
1583 for (
auto EI = OrigEdges.begin(); EI != OrigEdges.end();) {
1585 DenseSet<uint32_t> NewEdgeContextIds;
1586 DenseSet<uint32_t> NotFoundContextIds;
1590 set_subtract(
Edge->getContextIds(), RemainingContextIds, NewEdgeContextIds,
1591 NotFoundContextIds);
1594 if (RecursiveContextIds.
empty()) {
1597 RemainingContextIds.
swap(NotFoundContextIds);
1607 DenseSet<uint32_t> NonRecursiveRemainingCurEdgeIds =
1609 set_subtract(RemainingContextIds, NonRecursiveRemainingCurEdgeIds);
1612 if (NewEdgeContextIds.
empty()) {
1616 if (TowardsCallee) {
1617 uint8_t NewAllocType = computeAllocType(NewEdgeContextIds);
1618 auto NewEdge = std::make_shared<ContextEdge>(
1619 Edge->Callee, NewNode, NewAllocType, std::move(NewEdgeContextIds));
1620 NewNode->CalleeEdges.push_back(NewEdge);
1621 NewEdge->Callee->CallerEdges.push_back(NewEdge);
1623 uint8_t NewAllocType = computeAllocType(NewEdgeContextIds);
1624 auto NewEdge = std::make_shared<ContextEdge>(
1625 NewNode,
Edge->Caller, NewAllocType, std::move(NewEdgeContextIds));
1626 NewNode->CallerEdges.push_back(NewEdge);
1627 NewEdge->Caller->CalleeEdges.push_back(NewEdge);
1630 if (
Edge->getContextIds().empty()) {
1631 removeEdgeFromGraph(
Edge.get(), &EI, TowardsCallee);
1638template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1640 const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &Edge) {
1644 assert(!Edge->ContextIds.empty());
1647template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1649 bool CheckEdges =
true) {
1650 if (
Node->isRemoved())
1654 auto NodeContextIds =
Node->getContextIds();
1658 if (
Node->CallerEdges.size()) {
1660 Node->CallerEdges.front()->ContextIds);
1664 set_union(CallerEdgeContextIds, Edge->ContextIds);
1671 NodeContextIds == CallerEdgeContextIds ||
1674 if (
Node->CalleeEdges.size()) {
1676 Node->CalleeEdges.front()->ContextIds);
1680 set_union(CalleeEdgeContextIds, Edge->getContextIds());
1686 NodeContextIds == CalleeEdgeContextIds);
1695 for (
const auto &
E :
Node->CalleeEdges)
1701template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1702void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
1703 assignStackNodesPostOrder(ContextNode *Node,
1704 DenseSet<const ContextNode *> &Visited,
1705 DenseMap<uint64_t, std::vector<CallContextInfo>>
1706 &StackIdToMatchingCalls,
1707 DenseMap<CallInfo, CallInfo> &CallToMatchingCall,
1708 const DenseSet<uint32_t> &ImportantContextIds) {
1716 auto CallerEdges =
Node->CallerEdges;
1717 for (
auto &
Edge : CallerEdges) {
1719 if (
Edge->isRemoved()) {
1723 assignStackNodesPostOrder(
Edge->Caller, Visited, StackIdToMatchingCalls,
1724 CallToMatchingCall, ImportantContextIds);
1733 if (
Node->IsAllocation ||
1734 !StackIdToMatchingCalls.count(
Node->OrigStackOrAllocId))
1737 auto &Calls = StackIdToMatchingCalls[
Node->OrigStackOrAllocId];
1741 if (Calls.size() == 1) {
1742 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[0];
1743 if (Ids.size() == 1) {
1744 assert(SavedContextIds.empty());
1746 assert(Node == getNodeForStackId(Ids[0]));
1747 if (
Node->Recursive)
1750 NonAllocationCallToContextNodeMap[
Call] =
Node;
1752 recordStackNode(Ids, Node,
Node->getContextIds(), ImportantContextIds);
1760 uint64_t LastId =
Node->OrigStackOrAllocId;
1761 ContextNode *LastNode = getNodeForStackId(LastId);
1764 assert(LastNode == Node);
1766 ContextNode *LastNode =
Node;
1771 DenseSet<uint32_t> LastNodeContextIds = LastNode->getContextIds();
1773 [[maybe_unused]]
bool PrevIterCreatedNode =
false;
1774 bool CreatedNode =
false;
1775 for (
unsigned I = 0;
I < Calls.size();
1776 I++, PrevIterCreatedNode = CreatedNode) {
1777 CreatedNode =
false;
1778 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[
I];
1781 if (SavedContextIds.empty()) {
1788 auto MatchingCall = CallToMatchingCall[
Call];
1789 if (!NonAllocationCallToContextNodeMap.contains(MatchingCall)) {
1793 assert(
I > 0 && !PrevIterCreatedNode);
1796 NonAllocationCallToContextNodeMap[MatchingCall]->MatchingCalls.push_back(
1801 assert(LastId == Ids.back());
1810 ContextNode *PrevNode = LastNode;
1814 for (
auto IdIter = Ids.rbegin() + 1; IdIter != Ids.rend(); IdIter++) {
1816 ContextNode *CurNode = getNodeForStackId(Id);
1820 assert(!CurNode->Recursive);
1822 auto *
Edge = CurNode->findEdgeFromCaller(PrevNode);
1834 if (SavedContextIds.empty()) {
1843 ContextNode *NewNode = createNewNode(
false, Func,
Call);
1844 NonAllocationCallToContextNodeMap[
Call] = NewNode;
1846 NewNode->AllocTypes = computeAllocType(SavedContextIds);
1848 ContextNode *FirstNode = getNodeForStackId(Ids[0]);
1854 connectNewNode(NewNode, FirstNode,
true, SavedContextIds);
1859 connectNewNode(NewNode, LastNode,
false, SavedContextIds);
1864 for (
auto Id : Ids) {
1865 ContextNode *CurNode = getNodeForStackId(Id);
1872 auto *PrevEdge = CurNode->findEdgeFromCallee(PrevNode);
1879 set_subtract(PrevEdge->getContextIds(), SavedContextIds);
1880 if (PrevEdge->getContextIds().empty())
1881 removeEdgeFromGraph(PrevEdge);
1886 CurNode->AllocTypes = CurNode->CalleeEdges.empty()
1887 ? (uint8_t)AllocationType::None
1888 : CurNode->computeAllocType();
1892 recordStackNode(Ids, NewNode, SavedContextIds, ImportantContextIds);
1896 for (
auto Id : Ids) {
1897 ContextNode *CurNode = getNodeForStackId(Id);
1906template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1907void CallsiteContextGraph<DerivedCCG, FuncTy,
1908 CallTy>::fixupImportantContexts() {
1909 if (ImportantContextIdInfo.empty())
1913 NumImportantContextIds = ImportantContextIdInfo.size();
1919 exportToDot(
"beforestackfixup");
1944 for (
auto &[CurContextId,
Info] : ImportantContextIdInfo) {
1945 if (
Info.StackIdsToNode.empty())
1948 ContextNode *PrevNode =
nullptr;
1949 ContextNode *CurNode =
nullptr;
1950 DenseSet<const ContextEdge *> VisitedEdges;
1951 ArrayRef<uint64_t> AllStackIds(
Info.StackIds);
1954 for (
unsigned I = 0;
I < AllStackIds.size();
I++, PrevNode = CurNode) {
1958 auto LenToEnd = AllStackIds.size() -
I;
1966 auto CheckStackIds = AllStackIds.slice(
I, Len);
1967 auto EntryIt =
Info.StackIdsToNode.find(CheckStackIds);
1968 if (EntryIt ==
Info.StackIdsToNode.end())
1970 CurNode = EntryIt->second;
1987 auto *CurEdge = PrevNode->findEdgeFromCaller(CurNode);
1990 if (CurEdge->getContextIds().insert(CurContextId).second) {
1991 NumFixupEdgeIdsInserted++;
1996 NumFixupEdgesAdded++;
1997 DenseSet<uint32_t> ContextIds({CurContextId});
1998 auto AllocType = computeAllocType(ContextIds);
1999 auto NewEdge = std::make_shared<ContextEdge>(
2000 PrevNode, CurNode,
AllocType, std::move(ContextIds));
2001 PrevNode->CallerEdges.push_back(NewEdge);
2002 CurNode->CalleeEdges.push_back(NewEdge);
2004 CurEdge = NewEdge.get();
2007 VisitedEdges.
insert(CurEdge);
2010 for (
auto &
Edge : PrevNode->CallerEdges) {
2014 Edge->getContextIds().erase(CurContextId);
2022template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2023void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
2031 DenseMap<uint64_t, std::vector<CallContextInfo>> StackIdToMatchingCalls;
2032 for (
auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) {
2033 for (
auto &
Call : CallsWithMetadata) {
2035 if (AllocationCallToContextNodeMap.count(
Call))
2037 auto StackIdsWithContextNodes =
2038 getStackIdsWithContextNodesForCall(
Call.call());
2041 if (StackIdsWithContextNodes.empty())
2045 StackIdToMatchingCalls[StackIdsWithContextNodes.back()].push_back(
2046 {
Call.call(), StackIdsWithContextNodes,
Func, {}});
2056 DenseMap<uint32_t, DenseSet<uint32_t>> OldToNewContextIds;
2060 DenseMap<CallInfo, CallInfo> CallToMatchingCall;
2061 for (
auto &It : StackIdToMatchingCalls) {
2062 auto &Calls = It.getSecond();
2064 if (Calls.size() == 1) {
2065 auto &Ids = Calls[0].StackIds;
2066 if (Ids.size() == 1)
2079 DenseMap<const FuncTy *, unsigned> FuncToIndex;
2080 for (
const auto &[Idx, CallCtxInfo] :
enumerate(Calls))
2081 FuncToIndex.
insert({CallCtxInfo.Func, Idx});
2084 [&FuncToIndex](
const CallContextInfo &
A,
const CallContextInfo &
B) {
2085 return A.StackIds.size() >
B.StackIds.size() ||
2086 (
A.StackIds.size() ==
B.StackIds.size() &&
2087 (
A.StackIds <
B.StackIds ||
2088 (
A.StackIds ==
B.StackIds &&
2089 FuncToIndex[
A.Func] < FuncToIndex[
B.Func])));
2095 uint64_t LastId = It.getFirst();
2096 ContextNode *LastNode = getNodeForStackId(LastId);
2100 if (LastNode->Recursive)
2105 DenseSet<uint32_t> LastNodeContextIds = LastNode->getContextIds();
2113 DenseSet<const FuncTy *> MatchingIdsFuncSet;
2116 for (
unsigned I = 0;
I < Calls.size();
I++) {
2117 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[
I];
2118 assert(SavedContextIds.empty());
2119 assert(LastId == Ids.back());
2124 if (
I > 0 && Ids != Calls[
I - 1].StackIds)
2125 MatchingIdsFuncSet.
clear();
2132 DenseSet<uint32_t> StackSequenceContextIds = LastNodeContextIds;
2134 ContextNode *PrevNode = LastNode;
2135 ContextNode *CurNode = LastNode;
2140 for (
auto IdIter = Ids.rbegin() + 1; IdIter != Ids.rend(); IdIter++) {
2142 CurNode = getNodeForStackId(Id);
2146 if (CurNode->Recursive) {
2151 auto *
Edge = CurNode->findEdgeFromCaller(PrevNode);
2172 if (StackSequenceContextIds.
empty()) {
2185 if (Ids.back() != getLastStackId(
Call)) {
2186 for (
const auto &PE : LastNode->CallerEdges) {
2187 set_subtract(StackSequenceContextIds, PE->getContextIds());
2188 if (StackSequenceContextIds.
empty())
2192 if (StackSequenceContextIds.
empty())
2204 MatchingIdsFuncSet.
insert(Func);
2211 bool DuplicateContextIds =
false;
2212 for (
unsigned J =
I + 1; J < Calls.size(); J++) {
2213 auto &CallCtxInfo = Calls[J];
2214 auto &NextIds = CallCtxInfo.StackIds;
2217 auto *NextFunc = CallCtxInfo.Func;
2218 if (NextFunc != Func) {
2221 DuplicateContextIds =
true;
2224 auto &NextCall = CallCtxInfo.Call;
2225 CallToMatchingCall[NextCall] =
Call;
2236 OldToNewContextIds.
reserve(OldToNewContextIds.
size() +
2237 StackSequenceContextIds.
size());
2240 ? duplicateContextIds(StackSequenceContextIds, OldToNewContextIds)
2241 : StackSequenceContextIds;
2242 assert(!SavedContextIds.empty());
2244 if (!DuplicateContextIds) {
2248 set_subtract(LastNodeContextIds, StackSequenceContextIds);
2249 if (LastNodeContextIds.
empty())
2256 propagateDuplicateContextIds(OldToNewContextIds);
2266 DenseSet<const ContextNode *> Visited;
2268 ImportantContextIdInfo.keys());
2269 for (
auto &Entry : AllocationCallToContextNodeMap)
2270 assignStackNodesPostOrder(
Entry.second, Visited, StackIdToMatchingCalls,
2271 CallToMatchingCall, ImportantContextIds);
2273 fixupImportantContexts();
2279uint64_t ModuleCallsiteContextGraph::getLastStackId(Instruction *
Call) {
2280 CallStack<MDNode, MDNode::op_iterator> CallsiteContext(
2282 return CallsiteContext.
back();
2285uint64_t IndexCallsiteContextGraph::getLastStackId(IndexCall &
Call) {
2287 CallStack<CallsiteInfo, SmallVector<unsigned>::const_iterator>
2290 return Index.getStackIdAtIndex(CallsiteContext.
back());
2312 auto Pos =
F.getName().find_last_of(
'.');
2315 bool Err =
F.getName().drop_front(Pos + 1).getAsInteger(10, CloneNo);
2321std::string ModuleCallsiteContextGraph::getLabel(
const Function *Func,
2322 const Instruction *
Call,
2323 unsigned CloneNo)
const {
2329std::string IndexCallsiteContextGraph::getLabel(
const FunctionSummary *Func,
2330 const IndexCall &
Call,
2331 unsigned CloneNo)
const {
2332 auto VI = FSToVIMap.find(Func);
2333 assert(VI != FSToVIMap.end());
2336 return CallerName +
" -> alloc";
2339 return CallerName +
" -> " +
2341 Callsite->Clones[CloneNo]);
2345std::vector<uint64_t>
2346ModuleCallsiteContextGraph::getStackIdsWithContextNodesForCall(
2347 Instruction *
Call) {
2348 CallStack<MDNode, MDNode::op_iterator> CallsiteContext(
2350 return getStackIdsWithContextNodes<MDNode, MDNode::op_iterator>(
2354std::vector<uint64_t>
2355IndexCallsiteContextGraph::getStackIdsWithContextNodesForCall(IndexCall &
Call) {
2357 CallStack<CallsiteInfo, SmallVector<unsigned>::const_iterator>
2359 return getStackIdsWithContextNodes<CallsiteInfo,
2360 SmallVector<unsigned>::const_iterator>(
2364template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2365template <
class NodeT,
class IteratorT>
2366std::vector<uint64_t>
2367CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getStackIdsWithContextNodes(
2368 CallStack<NodeT, IteratorT> &CallsiteContext) {
2369 std::vector<uint64_t> StackIds;
2370 for (
auto IdOrIndex : CallsiteContext) {
2371 auto StackId = getStackId(IdOrIndex);
2372 ContextNode *
Node = getNodeForStackId(StackId);
2375 StackIds.push_back(StackId);
2380ModuleCallsiteContextGraph::ModuleCallsiteContextGraph(
2382 llvm::function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter)
2383 :
Mod(
M), OREGetter(OREGetter) {
2387 std::map<uint64_t, uint32_t> TotalSizeToContextIdTopNCold;
2389 std::vector<CallInfo> CallsWithMetadata;
2390 for (
auto &BB :
F) {
2391 for (
auto &
I : BB) {
2394 if (
auto *MemProfMD =
I.getMetadata(LLVMContext::MD_memprof)) {
2395 CallsWithMetadata.push_back(&
I);
2396 auto *AllocNode = addAllocNode(&
I, &
F);
2397 auto *CallsiteMD =
I.getMetadata(LLVMContext::MD_callsite);
2401 for (
auto &MDOp : MemProfMD->operands()) {
2403 std::vector<ContextTotalSize> ContextSizeInfo;
2405 if (MIBMD->getNumOperands() > 2) {
2406 for (
unsigned I = 2;
I < MIBMD->getNumOperands();
I++) {
2407 MDNode *ContextSizePair =
2416 ContextSizeInfo.push_back({FullStackId, TotalSize});
2422 addStackNodesForMIB<MDNode, MDNode::op_iterator>(
2423 AllocNode, StackContext, CallsiteContext,
2425 TotalSizeToContextIdTopNCold);
2430 DotAllocContextIds = AllocNode->getContextIds();
2434 I.setMetadata(LLVMContext::MD_memprof,
nullptr);
2435 I.setMetadata(LLVMContext::MD_callsite,
nullptr);
2438 else if (
I.getMetadata(LLVMContext::MD_callsite)) {
2439 CallsWithMetadata.push_back(&
I);
2443 if (!CallsWithMetadata.empty())
2444 FuncToCallsWithMetadata[&
F] = CallsWithMetadata;
2448 dbgs() <<
"CCG before updating call stack chains:\n";
2453 exportToDot(
"prestackupdate");
2458 exportToDot(
"poststackupdate");
2460 handleCallsitesWithMultipleTargets();
2465 for (
auto &FuncEntry : FuncToCallsWithMetadata)
2466 for (
auto &
Call : FuncEntry.second)
2467 Call.call()->setMetadata(LLVMContext::MD_callsite,
nullptr);
2470IndexCallsiteContextGraph::IndexCallsiteContextGraph(
2474 : Index(Index), isPrevailing(isPrevailing) {
2478 std::map<uint64_t, uint32_t> TotalSizeToContextIdTopNCold;
2479 for (
auto &
I : Index) {
2480 auto VI = Index.getValueInfo(
I);
2481 for (
auto &S : VI.getSummaryList()) {
2490 !isPrevailing(VI.getGUID(), S.get()))
2495 std::vector<CallInfo> CallsWithMetadata;
2496 if (!
FS->allocs().empty()) {
2497 for (
auto &AN :
FS->mutableAllocs()) {
2502 if (AN.MIBs.empty())
2504 IndexCall AllocCall(&AN);
2505 CallsWithMetadata.push_back(AllocCall);
2506 auto *AllocNode = addAllocNode(AllocCall, FS);
2514 AN.ContextSizeInfos.size() == AN.MIBs.size());
2516 for (
auto &MIB : AN.MIBs) {
2519 std::vector<ContextTotalSize> ContextSizeInfo;
2520 if (!AN.ContextSizeInfos.empty()) {
2521 for (
auto [FullStackId, TotalSize] : AN.ContextSizeInfos[
I])
2522 ContextSizeInfo.push_back({FullStackId, TotalSize});
2524 addStackNodesForMIB<MIBInfo, SmallVector<unsigned>::const_iterator>(
2525 AllocNode, StackContext, EmptyContext, MIB.AllocType,
2526 ContextSizeInfo, TotalSizeToContextIdTopNCold);
2532 DotAllocContextIds = AllocNode->getContextIds();
2538 AN.Versions[0] = (
uint8_t)allocTypeToUse(AllocNode->AllocTypes);
2542 if (!
FS->callsites().empty())
2543 for (
auto &SN :
FS->mutableCallsites()) {
2544 IndexCall StackNodeCall(&SN);
2545 CallsWithMetadata.push_back(StackNodeCall);
2548 if (!CallsWithMetadata.empty())
2549 FuncToCallsWithMetadata[
FS] = CallsWithMetadata;
2551 if (!
FS->allocs().empty() || !
FS->callsites().empty())
2557 dbgs() <<
"CCG before updating call stack chains:\n";
2562 exportToDot(
"prestackupdate");
2567 exportToDot(
"poststackupdate");
2569 handleCallsitesWithMultipleTargets();
2574template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2575void CallsiteContextGraph<DerivedCCG, FuncTy,
2576 CallTy>::handleCallsitesWithMultipleTargets() {
2591 std::vector<std::pair<CallInfo, ContextNode *>> NewCallToNode;
2592 for (
auto &Entry : NonAllocationCallToContextNodeMap) {
2593 auto *
Node = Entry.second;
2602 std::vector<CallInfo> AllCalls;
2603 AllCalls.reserve(
Node->MatchingCalls.size() + 1);
2604 AllCalls.push_back(
Node->Call);
2618 if (partitionCallsByCallee(
Node, AllCalls, NewCallToNode))
2621 auto It = AllCalls.begin();
2623 for (; It != AllCalls.end(); ++It) {
2626 for (
auto EI =
Node->CalleeEdges.begin(); EI !=
Node->CalleeEdges.end();
2629 if (!Edge->Callee->hasCall())
2631 assert(NodeToCallingFunc.count(Edge->Callee));
2633 if (!calleesMatch(
ThisCall.call(), EI, TailCallToContextNodeMap)) {
2642 if (
Node->Call != ThisCall) {
2643 Node->setCall(ThisCall);
2654 Node->MatchingCalls.clear();
2657 if (It == AllCalls.end()) {
2658 RemovedEdgesWithMismatchedCallees++;
2662 Node->setCall(CallInfo());
2667 for (++It; It != AllCalls.end(); ++It) {
2671 Node->MatchingCalls.push_back(ThisCall);
2680 NonAllocationCallToContextNodeMap.remove_if([](
const auto &it) {
2681 return !it.second->hasCall() || it.second->Call != it.first;
2685 for (
auto &[
Call,
Node] : NewCallToNode)
2686 NonAllocationCallToContextNodeMap[
Call] =
Node;
2690 for (
auto &[
Call,
Node] : TailCallToContextNodeMap)
2691 NonAllocationCallToContextNodeMap[
Call] =
Node;
2694template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2695bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::partitionCallsByCallee(
2697 std::vector<std::pair<CallInfo, ContextNode *>> &NewCallToNode) {
2701 struct CallsWithSameCallee {
2702 std::vector<CallInfo> Calls;
2703 ContextNode *
Node =
nullptr;
2709 for (
auto ThisCall : AllCalls) {
2710 auto *
F = getCalleeFunc(
ThisCall.call());
2712 CalleeFuncToCallInfo[
F].Calls.push_back(ThisCall);
2721 for (
const auto &Edge :
Node->CalleeEdges) {
2722 if (!Edge->Callee->hasCall())
2724 const FuncTy *ProfiledCalleeFunc = NodeToCallingFunc[Edge->Callee];
2725 if (CalleeFuncToCallInfo.
contains(ProfiledCalleeFunc))
2726 CalleeNodeToCallInfo[Edge->Callee] =
2727 &CalleeFuncToCallInfo[ProfiledCalleeFunc];
2733 if (CalleeNodeToCallInfo.
empty())
2745 ContextNode *UnmatchedCalleesNode =
nullptr;
2747 bool UsedOrigNode =
false;
2752 auto CalleeEdges =
Node->CalleeEdges;
2753 for (
auto &Edge : CalleeEdges) {
2754 if (!Edge->Callee->hasCall())
2759 ContextNode *CallerNodeToUse =
nullptr;
2763 if (!CalleeNodeToCallInfo.
contains(Edge->Callee)) {
2764 if (!UnmatchedCalleesNode)
2765 UnmatchedCalleesNode =
2766 createNewNode(
false, NodeToCallingFunc[
Node]);
2767 CallerNodeToUse = UnmatchedCalleesNode;
2771 auto *
Info = CalleeNodeToCallInfo[Edge->Callee];
2774 if (!UsedOrigNode) {
2777 Node->MatchingCalls.clear();
2778 UsedOrigNode =
true;
2781 createNewNode(
false, NodeToCallingFunc[
Node]);
2785 Info->Node->setCall(
Info->Calls.front());
2791 NewCallToNode.push_back({
Info->Node->Call,
Info->Node});
2793 CallerNodeToUse =
Info->Node;
2797 if (CallerNodeToUse ==
Node)
2800 moveCalleeEdgeToNewCaller(Edge, CallerNodeToUse);
2807 for (
auto &
I : CalleeNodeToCallInfo)
2808 removeNoneTypeCallerEdges(
I.second->Node);
2809 if (UnmatchedCalleesNode)
2810 removeNoneTypeCallerEdges(UnmatchedCalleesNode);
2811 removeNoneTypeCallerEdges(
Node);
2821uint64_t IndexCallsiteContextGraph::getStackId(uint64_t IdOrIndex)
const {
2824 return Index.getStackIdAtIndex(IdOrIndex);
2827template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2828bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::calleesMatch(
2829 CallTy
Call, EdgeIter &EI,
2830 MapVector<CallInfo, ContextNode *> &TailCallToContextNodeMap) {
2832 const FuncTy *ProfiledCalleeFunc = NodeToCallingFunc[
Edge->Callee];
2833 const FuncTy *CallerFunc = NodeToCallingFunc[
Edge->Caller];
2836 std::vector<std::pair<CallTy, FuncTy *>> FoundCalleeChain;
2837 if (!calleeMatchesFunc(
Call, ProfiledCalleeFunc, CallerFunc,
2842 if (FoundCalleeChain.empty())
2846 auto *CurEdge =
Callee->findEdgeFromCaller(Caller);
2850 CurEdge->ContextIds.insert_range(
Edge->ContextIds);
2851 CurEdge->AllocTypes |=
Edge->AllocTypes;
2856 auto NewEdge = std::make_shared<ContextEdge>(
2857 Callee, Caller,
Edge->AllocTypes,
Edge->ContextIds);
2858 Callee->CallerEdges.push_back(NewEdge);
2859 if (Caller ==
Edge->Caller) {
2863 EI =
Caller->CalleeEdges.insert(EI, NewEdge);
2866 "Iterator position not restored after insert and increment");
2868 Caller->CalleeEdges.push_back(NewEdge);
2873 auto *CurCalleeNode =
Edge->Callee;
2874 for (
auto &[NewCall, Func] : FoundCalleeChain) {
2875 ContextNode *NewNode =
nullptr;
2877 if (TailCallToContextNodeMap.
count(NewCall)) {
2878 NewNode = TailCallToContextNodeMap[NewCall];
2879 NewNode->AllocTypes |=
Edge->AllocTypes;
2881 FuncToCallsWithMetadata[
Func].push_back({NewCall});
2883 NewNode = createNewNode(
false, Func, NewCall);
2884 TailCallToContextNodeMap[NewCall] = NewNode;
2885 NewNode->AllocTypes =
Edge->AllocTypes;
2889 AddEdge(NewNode, CurCalleeNode);
2891 CurCalleeNode = NewNode;
2895 AddEdge(
Edge->Caller, CurCalleeNode);
2903 removeEdgeFromGraph(
Edge.get(), &EI,
true);
2915bool ModuleCallsiteContextGraph::findProfiledCalleeThroughTailCalls(
2916 const Function *ProfiledCallee,
Value *CurCallee,
unsigned Depth,
2917 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain,
2918 bool &FoundMultipleCalleeChains) {
2925 FoundCalleeChain.push_back({Callsite,
F});
2940 bool FoundSingleCalleeChain =
false;
2941 for (
auto &BB : *CalleeFunc) {
2942 for (
auto &
I : BB) {
2944 if (!CB || !CB->isTailCall())
2946 auto *CalledValue = CB->getCalledOperand();
2947 auto *CalledFunction = CB->getCalledFunction();
2948 if (CalledValue && !CalledFunction) {
2949 CalledValue = CalledValue->stripPointerCasts();
2956 assert(!CalledFunction &&
2957 "Expected null called function in callsite for alias");
2960 if (!CalledFunction)
2962 if (CalledFunction == ProfiledCallee) {
2963 if (FoundSingleCalleeChain) {
2964 FoundMultipleCalleeChains =
true;
2967 FoundSingleCalleeChain =
true;
2968 FoundProfiledCalleeCount++;
2969 FoundProfiledCalleeDepth +=
Depth;
2970 if (
Depth > FoundProfiledCalleeMaxDepth)
2971 FoundProfiledCalleeMaxDepth =
Depth;
2972 SaveCallsiteInfo(&
I, CalleeFunc);
2973 }
else if (findProfiledCalleeThroughTailCalls(
2974 ProfiledCallee, CalledFunction,
Depth + 1,
2975 FoundCalleeChain, FoundMultipleCalleeChains)) {
2978 assert(!FoundMultipleCalleeChains);
2979 if (FoundSingleCalleeChain) {
2980 FoundMultipleCalleeChains =
true;
2983 FoundSingleCalleeChain =
true;
2984 SaveCallsiteInfo(&
I, CalleeFunc);
2985 }
else if (FoundMultipleCalleeChains)
2990 return FoundSingleCalleeChain;
2993const Function *ModuleCallsiteContextGraph::getCalleeFunc(Instruction *
Call) {
2995 if (!CB->getCalledOperand() || CB->isIndirectCall())
2997 auto *CalleeVal = CB->getCalledOperand()->stripPointerCasts();
3004bool ModuleCallsiteContextGraph::calleeMatchesFunc(
3005 Instruction *
Call,
const Function *Func,
const Function *CallerFunc,
3006 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain) {
3008 if (!CB->getCalledOperand() || CB->isIndirectCall())
3010 auto *CalleeVal = CB->getCalledOperand()->stripPointerCasts();
3012 if (CalleeFunc == Func)
3015 if (Alias && Alias->getAliasee() == Func)
3026 bool FoundMultipleCalleeChains =
false;
3027 if (!findProfiledCalleeThroughTailCalls(Func, CalleeVal,
Depth,
3029 FoundMultipleCalleeChains)) {
3030 LLVM_DEBUG(
dbgs() <<
"Not found through unique tail call chain: "
3031 <<
Func->getName() <<
" from " << CallerFunc->
getName()
3032 <<
" that actually called " << CalleeVal->getName()
3033 << (FoundMultipleCalleeChains
3034 ?
" (found multiple possible chains)"
3037 if (FoundMultipleCalleeChains)
3038 FoundProfiledCalleeNonUniquelyCount++;
3045bool ModuleCallsiteContextGraph::sameCallee(Instruction *Call1,
3046 Instruction *Call2) {
3048 if (!CB1->getCalledOperand() || CB1->isIndirectCall())
3050 auto *CalleeVal1 = CB1->getCalledOperand()->stripPointerCasts();
3053 if (!CB2->getCalledOperand() || CB2->isIndirectCall())
3055 auto *CalleeVal2 = CB2->getCalledOperand()->stripPointerCasts();
3057 return CalleeFunc1 == CalleeFunc2;
3060bool IndexCallsiteContextGraph::findProfiledCalleeThroughTailCalls(
3061 ValueInfo ProfiledCallee, ValueInfo CurCallee,
unsigned Depth,
3062 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain,
3063 bool &FoundMultipleCalleeChains) {
3069 auto CreateAndSaveCallsiteInfo = [&](ValueInfo
Callee, FunctionSummary *
FS) {
3072 if (!FunctionCalleesToSynthesizedCallsiteInfos.count(FS) ||
3073 !FunctionCalleesToSynthesizedCallsiteInfos[FS].count(Callee))
3076 FunctionCalleesToSynthesizedCallsiteInfos[
FS][
Callee] =
3077 std::make_unique<CallsiteInfo>(Callee, SmallVector<unsigned>());
3078 CallsiteInfo *NewCallsiteInfo =
3079 FunctionCalleesToSynthesizedCallsiteInfos[
FS][
Callee].get();
3080 FoundCalleeChain.push_back({NewCallsiteInfo,
FS});
3087 bool FoundSingleCalleeChain =
false;
3090 !isPrevailing(CurCallee.
getGUID(), S.get()))
3095 auto FSVI = CurCallee;
3098 FSVI = AS->getAliaseeVI();
3099 for (
auto &CallEdge :
FS->calls()) {
3100 if (!CallEdge.second.hasTailCall())
3102 if (CallEdge.first == ProfiledCallee) {
3103 if (FoundSingleCalleeChain) {
3104 FoundMultipleCalleeChains =
true;
3107 FoundSingleCalleeChain =
true;
3108 FoundProfiledCalleeCount++;
3109 FoundProfiledCalleeDepth +=
Depth;
3110 if (
Depth > FoundProfiledCalleeMaxDepth)
3111 FoundProfiledCalleeMaxDepth =
Depth;
3112 CreateAndSaveCallsiteInfo(CallEdge.first, FS);
3114 assert(!FSToVIMap.count(FS) || FSToVIMap[FS] == FSVI);
3115 FSToVIMap[
FS] = FSVI;
3116 }
else if (findProfiledCalleeThroughTailCalls(
3117 ProfiledCallee, CallEdge.first,
Depth + 1,
3118 FoundCalleeChain, FoundMultipleCalleeChains)) {
3121 assert(!FoundMultipleCalleeChains);
3122 if (FoundSingleCalleeChain) {
3123 FoundMultipleCalleeChains =
true;
3126 FoundSingleCalleeChain =
true;
3127 CreateAndSaveCallsiteInfo(CallEdge.first, FS);
3129 assert(!FSToVIMap.count(FS) || FSToVIMap[FS] == FSVI);
3130 FSToVIMap[
FS] = FSVI;
3131 }
else if (FoundMultipleCalleeChains)
3136 return FoundSingleCalleeChain;
3139const FunctionSummary *
3140IndexCallsiteContextGraph::getCalleeFunc(IndexCall &
Call) {
3142 if (
Callee.getSummaryList().empty())
3147bool IndexCallsiteContextGraph::calleeMatchesFunc(
3148 IndexCall &
Call,
const FunctionSummary *Func,
3149 const FunctionSummary *CallerFunc,
3150 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain) {
3154 AliasSummary *Alias =
3155 Callee.getSummaryList().empty()
3158 assert(FSToVIMap.count(Func));
3159 auto FuncVI = FSToVIMap[
Func];
3160 if (Callee == FuncVI ||
3175 bool FoundMultipleCalleeChains =
false;
3176 if (!findProfiledCalleeThroughTailCalls(
3177 FuncVI, Callee,
Depth, FoundCalleeChain, FoundMultipleCalleeChains)) {
3178 LLVM_DEBUG(
dbgs() <<
"Not found through unique tail call chain: " << FuncVI
3179 <<
" from " << FSToVIMap[CallerFunc]
3180 <<
" that actually called " << Callee
3181 << (FoundMultipleCalleeChains
3182 ?
" (found multiple possible chains)"
3185 if (FoundMultipleCalleeChains)
3186 FoundProfiledCalleeNonUniquelyCount++;
3193bool IndexCallsiteContextGraph::sameCallee(IndexCall &Call1, IndexCall &Call2) {
3196 return Callee1 == Callee2;
3199template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3200void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::dump()
3206template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3207void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::print(
3208 raw_ostream &OS)
const {
3209 OS <<
"Node " <<
this <<
"\n";
3213 OS <<
" (recursive)";
3215 if (!MatchingCalls.empty()) {
3216 OS <<
"\tMatchingCalls:\n";
3217 for (
auto &MatchingCall : MatchingCalls) {
3219 MatchingCall.print(OS);
3223 OS <<
"\tNodeId: " <<
NodeId <<
"\n";
3225 OS <<
"\tContextIds:";
3227 auto ContextIds = getContextIds();
3228 std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
3229 std::sort(SortedIds.begin(), SortedIds.end());
3230 for (
auto Id : SortedIds)
3233 OS <<
"\tCalleeEdges:\n";
3234 for (
auto &
Edge : CalleeEdges)
3235 OS <<
"\t\t" << *
Edge <<
" (Callee NodeId: " <<
Edge->Callee->NodeId
3237 OS <<
"\tCallerEdges:\n";
3238 for (
auto &
Edge : CallerEdges)
3239 OS <<
"\t\t" << *
Edge <<
" (Caller NodeId: " <<
Edge->Caller->NodeId
3241 if (!Clones.empty()) {
3244 for (
auto *
C : Clones)
3245 OS <<
LS <<
C <<
" NodeId: " <<
C->NodeId;
3247 }
else if (CloneOf) {
3248 OS <<
"\tClone of " << CloneOf <<
" NodeId: " << CloneOf->NodeId <<
"\n";
3252template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3253void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::dump()
3259template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3260void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::print(
3261 raw_ostream &OS)
const {
3262 OS <<
"Edge from Callee " <<
Callee <<
" to Caller: " <<
Caller
3263 << (IsBackedge ?
" (BE)" :
"")
3265 OS <<
" ContextIds:";
3266 std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
3267 std::sort(SortedIds.begin(), SortedIds.end());
3268 for (
auto Id : SortedIds)
3272template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3273void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::dump()
const {
3277template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3278void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::print(
3279 raw_ostream &OS)
const {
3280 OS <<
"Callsite Context Graph:\n";
3281 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3283 if (
Node->isRemoved())
3290template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3291void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::printTotalSizes(
3292 raw_ostream &OS)
const {
3293 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3295 if (
Node->isRemoved())
3297 if (!
Node->IsAllocation)
3299 DenseSet<uint32_t> ContextIds =
Node->getContextIds();
3300 auto AllocTypeFromCall = getAllocationCallType(
Node->Call);
3301 std::vector<uint32_t> SortedIds(ContextIds.
begin(), ContextIds.
end());
3302 std::sort(SortedIds.begin(), SortedIds.end());
3303 for (
auto Id : SortedIds) {
3304 auto TypeI = ContextIdToAllocationType.find(Id);
3305 assert(TypeI != ContextIdToAllocationType.end());
3306 auto CSI = ContextIdToContextSizeInfos.find(Id);
3307 if (CSI != ContextIdToContextSizeInfos.end()) {
3308 for (
auto &
Info : CSI->second) {
3309 OS <<
"MemProf hinting: "
3311 <<
" full allocation context " <<
Info.FullStackId
3312 <<
" with total size " <<
Info.TotalSize <<
" is "
3314 if (allocTypeToUse(
Node->AllocTypes) != AllocTypeFromCall)
3316 <<
" due to cold byte percent";
3318 OS <<
" (context id " <<
Id <<
")";
3326template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3327void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::check()
const {
3328 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3331 for (
auto &
Edge :
Node->CallerEdges)
3336template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3338 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3339 using NodeRef =
const ContextNode<DerivedCCG, FuncTy, CallTy> *;
3341 using NodePtrTy = std::unique_ptr<ContextNode<DerivedCCG, FuncTy, CallTy>>;
3357 return G->NodeOwner.begin()->get();
3360 using EdgePtrTy = std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>;
3361 static const ContextNode<DerivedCCG, FuncTy, CallTy> *
3380template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3394 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3400 std::string LabelString =
3401 (
Twine(
"OrigId: ") + (
Node->IsAllocation ?
"Alloc" :
"") +
3404 LabelString +=
"\n";
3405 if (
Node->hasCall()) {
3406 auto Func =
G->NodeToCallingFunc.find(
Node);
3407 assert(Func !=
G->NodeToCallingFunc.end());
3409 G->getLabel(Func->second,
Node->Call.call(),
Node->Call.cloneNo());
3410 for (
auto &MatchingCall :
Node->MatchingCalls) {
3411 LabelString +=
"\n";
3412 LabelString +=
G->getLabel(Func->second, MatchingCall.call(),
3413 MatchingCall.cloneNo());
3416 LabelString +=
"null call";
3417 if (
Node->Recursive)
3418 LabelString +=
" (recursive)";
3420 LabelString +=
" (external)";
3426 auto ContextIds =
Node->getContextIds();
3430 bool Highlight =
false;
3439 std::string AttributeString = (
Twine(
"tooltip=\"") + getNodeId(
Node) +
" " +
3440 getContextIds(ContextIds) +
"\"")
3444 AttributeString +=
",fontsize=\"30\"";
3446 (
Twine(
",fillcolor=\"") + getColor(
Node->AllocTypes, Highlight) +
"\"")
3448 if (
Node->CloneOf) {
3449 AttributeString +=
",color=\"blue\"";
3450 AttributeString +=
",style=\"filled,bold,dashed\"";
3452 AttributeString +=
",style=\"filled\"";
3453 return AttributeString;
3458 auto &Edge = *(ChildIter.getCurrent());
3463 bool Highlight =
false;
3472 auto Color = getColor(Edge->AllocTypes, Highlight);
3473 std::string AttributeString =
3474 (
Twine(
"tooltip=\"") + getContextIds(Edge->ContextIds) +
"\"" +
3476 Twine(
",fillcolor=\"") + Color +
"\"" +
Twine(
",color=\"") + Color +
3479 if (Edge->IsBackedge)
3480 AttributeString +=
",style=\"dotted\"";
3483 AttributeString +=
",penwidth=\"2.0\",weight=\"2\"";
3484 return AttributeString;
3490 if (
Node->isRemoved())
3503 std::string IdString =
"ContextIds:";
3504 if (ContextIds.
size() < 100) {
3505 std::vector<uint32_t> SortedIds(ContextIds.
begin(), ContextIds.
end());
3506 std::sort(SortedIds.begin(), SortedIds.end());
3507 for (
auto Id : SortedIds)
3508 IdString += (
" " +
Twine(Id)).str();
3510 IdString += (
" (" + Twine(ContextIds.
size()) +
" ids)").str();
3515 static std::string getColor(uint8_t AllocTypes,
bool Highlight) {
3521 if (AllocTypes == (uint8_t)AllocationType::NotCold)
3523 return !
DoHighlight || Highlight ?
"brown1" :
"lightpink";
3524 if (AllocTypes == (uint8_t)AllocationType::Cold)
3525 return !
DoHighlight || Highlight ?
"cyan" :
"lightskyblue";
3527 ((uint8_t)AllocationType::NotCold | (uint8_t)AllocationType::Cold))
3528 return Highlight ?
"magenta" :
"mediumorchid1";
3532 static std::string getNodeId(NodeRef Node) {
3533 std::stringstream SStream;
3534 SStream << std::hex <<
"N0x" << (
unsigned long long)Node;
3535 std::string
Result = SStream.str();
3544template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3549template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3550void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::exportToDot(
3551 std::string Label)
const {
3556template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3557typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
3558CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::moveEdgeToNewCalleeClone(
3559 const std::shared_ptr<ContextEdge> &
Edge,
3560 DenseSet<uint32_t> ContextIdsToMove) {
3562 assert(NodeToCallingFunc.count(Node));
3563 ContextNode *Clone =
3564 createNewNode(
Node->IsAllocation, NodeToCallingFunc[Node],
Node->Call);
3565 Node->addClone(Clone);
3566 Clone->MatchingCalls =
Node->MatchingCalls;
3567 moveEdgeToExistingCalleeClone(
Edge, Clone,
true,
3572template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3573void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
3574 moveEdgeToExistingCalleeClone(
const std::shared_ptr<ContextEdge> &
Edge,
3575 ContextNode *NewCallee,
bool NewClone,
3576 DenseSet<uint32_t> ContextIdsToMove) {
3579 assert(NewCallee->getOrigNode() ==
Edge->Callee->getOrigNode());
3581 bool EdgeIsRecursive =
Edge->Callee ==
Edge->Caller;
3583 ContextNode *OldCallee =
Edge->Callee;
3587 auto ExistingEdgeToNewCallee = NewCallee->findEdgeFromCaller(
Edge->Caller);
3591 if (ContextIdsToMove.
empty())
3592 ContextIdsToMove =
Edge->getContextIds();
3596 if (
Edge->getContextIds().size() == ContextIdsToMove.
size()) {
3599 NewCallee->AllocTypes |=
Edge->AllocTypes;
3601 if (ExistingEdgeToNewCallee) {
3604 ExistingEdgeToNewCallee->getContextIds().insert_range(ContextIdsToMove);
3605 ExistingEdgeToNewCallee->AllocTypes |=
Edge->AllocTypes;
3606 assert(
Edge->ContextIds == ContextIdsToMove);
3607 removeEdgeFromGraph(
Edge.get());
3610 Edge->Callee = NewCallee;
3611 NewCallee->CallerEdges.push_back(
Edge);
3613 OldCallee->eraseCallerEdge(
Edge.get());
3620 auto CallerEdgeAllocType = computeAllocType(ContextIdsToMove);
3621 if (ExistingEdgeToNewCallee) {
3624 ExistingEdgeToNewCallee->getContextIds().insert_range(ContextIdsToMove);
3625 ExistingEdgeToNewCallee->AllocTypes |= CallerEdgeAllocType;
3628 auto NewEdge = std::make_shared<ContextEdge>(
3629 NewCallee,
Edge->Caller, CallerEdgeAllocType, ContextIdsToMove);
3630 Edge->Caller->CalleeEdges.push_back(NewEdge);
3631 NewCallee->CallerEdges.push_back(NewEdge);
3635 NewCallee->AllocTypes |= CallerEdgeAllocType;
3637 Edge->AllocTypes = computeAllocType(
Edge->ContextIds);
3642 for (
auto &OldCalleeEdge : OldCallee->CalleeEdges) {
3643 ContextNode *CalleeToUse = OldCalleeEdge->Callee;
3647 if (CalleeToUse == OldCallee) {
3651 if (EdgeIsRecursive) {
3655 CalleeToUse = NewCallee;
3659 DenseSet<uint32_t> EdgeContextIdsToMove =
3661 set_subtract(OldCalleeEdge->getContextIds(), EdgeContextIdsToMove);
3662 OldCalleeEdge->AllocTypes =
3663 computeAllocType(OldCalleeEdge->getContextIds());
3670 if (
auto *NewCalleeEdge = NewCallee->findEdgeFromCallee(CalleeToUse)) {
3671 NewCalleeEdge->getContextIds().insert_range(EdgeContextIdsToMove);
3672 NewCalleeEdge->AllocTypes |= computeAllocType(EdgeContextIdsToMove);
3676 auto NewEdge = std::make_shared<ContextEdge>(
3677 CalleeToUse, NewCallee, computeAllocType(EdgeContextIdsToMove),
3678 EdgeContextIdsToMove);
3679 NewCallee->CalleeEdges.push_back(NewEdge);
3680 NewEdge->Callee->CallerEdges.push_back(NewEdge);
3684 OldCallee->AllocTypes = OldCallee->computeAllocType();
3686 assert((OldCallee->AllocTypes == (uint8_t)AllocationType::None) ==
3687 OldCallee->emptyContextIds());
3691 for (
const auto &OldCalleeEdge : OldCallee->CalleeEdges)
3694 for (
const auto &NewCalleeEdge : NewCallee->CalleeEdges)
3700template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3701void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
3702 moveCalleeEdgeToNewCaller(
const std::shared_ptr<ContextEdge> &
Edge,
3703 ContextNode *NewCaller) {
3704 auto *OldCallee =
Edge->Callee;
3705 auto *NewCallee = OldCallee;
3708 bool Recursive =
Edge->Caller ==
Edge->Callee;
3710 NewCallee = NewCaller;
3712 ContextNode *OldCaller =
Edge->Caller;
3713 OldCaller->eraseCalleeEdge(
Edge.get());
3717 auto ExistingEdgeToNewCaller = NewCaller->findEdgeFromCallee(NewCallee);
3719 if (ExistingEdgeToNewCaller) {
3722 ExistingEdgeToNewCaller->getContextIds().insert_range(
3723 Edge->getContextIds());
3724 ExistingEdgeToNewCaller->AllocTypes |=
Edge->AllocTypes;
3725 Edge->ContextIds.clear();
3726 Edge->AllocTypes = (uint8_t)AllocationType::None;
3727 OldCallee->eraseCallerEdge(
Edge.get());
3730 Edge->Caller = NewCaller;
3731 NewCaller->CalleeEdges.push_back(
Edge);
3733 assert(NewCallee == NewCaller);
3736 Edge->Callee = NewCallee;
3737 NewCallee->CallerEdges.push_back(
Edge);
3738 OldCallee->eraseCallerEdge(
Edge.get());
3744 NewCaller->AllocTypes |=
Edge->AllocTypes;
3751 bool IsNewNode = NewCaller->CallerEdges.empty();
3760 for (
auto &OldCallerEdge : OldCaller->CallerEdges) {
3761 auto OldCallerCaller = OldCallerEdge->Caller;
3765 OldCallerEdge->getContextIds(),
Edge->getContextIds());
3766 if (OldCaller == OldCallerCaller) {
3767 OldCallerCaller = NewCaller;
3773 set_subtract(OldCallerEdge->getContextIds(), EdgeContextIdsToMove);
3774 OldCallerEdge->AllocTypes =
3775 computeAllocType(OldCallerEdge->getContextIds());
3780 auto *ExistingCallerEdge = NewCaller->findEdgeFromCaller(OldCallerCaller);
3784 if (ExistingCallerEdge) {
3785 ExistingCallerEdge->getContextIds().insert_range(EdgeContextIdsToMove);
3786 ExistingCallerEdge->AllocTypes |=
3787 computeAllocType(EdgeContextIdsToMove);
3790 auto NewEdge = std::make_shared<ContextEdge>(
3791 NewCaller, OldCallerCaller, computeAllocType(EdgeContextIdsToMove),
3792 EdgeContextIdsToMove);
3793 NewCaller->CallerEdges.push_back(NewEdge);
3794 NewEdge->Caller->CalleeEdges.push_back(NewEdge);
3799 OldCaller->AllocTypes = OldCaller->computeAllocType();
3801 assert((OldCaller->AllocTypes == (uint8_t)AllocationType::None) ==
3802 OldCaller->emptyContextIds());
3806 for (
const auto &OldCallerEdge : OldCaller->CallerEdges)
3809 for (
const auto &NewCallerEdge : NewCaller->CallerEdges)
3815template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3816void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
3817 recursivelyRemoveNoneTypeCalleeEdges(
3818 ContextNode *Node, DenseSet<const ContextNode *> &Visited) {
3823 removeNoneTypeCalleeEdges(Node);
3825 for (
auto *Clone :
Node->Clones)
3826 recursivelyRemoveNoneTypeCalleeEdges(Clone, Visited);
3830 auto CallerEdges =
Node->CallerEdges;
3831 for (
auto &
Edge : CallerEdges) {
3833 if (
Edge->isRemoved()) {
3837 recursivelyRemoveNoneTypeCalleeEdges(
Edge->Caller, Visited);
3842template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3843void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::markBackedges() {
3848 DenseSet<const ContextNode *> Visited;
3849 DenseSet<const ContextNode *> CurrentStack;
3850 for (
auto &Entry : NonAllocationCallToContextNodeMap) {
3852 if (
Node->isRemoved())
3855 if (!
Node->CallerEdges.empty())
3857 markBackedges(Node, Visited, CurrentStack);
3863template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3864void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::markBackedges(
3865 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
3866 DenseSet<const ContextNode *> &CurrentStack) {
3867 auto I = Visited.
insert(Node);
3871 for (
auto &CalleeEdge :
Node->CalleeEdges) {
3872 auto *
Callee = CalleeEdge->Callee;
3873 if (Visited.
count(Callee)) {
3876 if (CurrentStack.
count(Callee))
3877 CalleeEdge->IsBackedge =
true;
3880 CurrentStack.
insert(Callee);
3881 markBackedges(Callee, Visited, CurrentStack);
3882 CurrentStack.
erase(Callee);
3886template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3887void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones() {
3888 DenseSet<const ContextNode *> Visited;
3889 for (
auto &Entry : AllocationCallToContextNodeMap) {
3891 identifyClones(
Entry.second, Visited,
Entry.second->getContextIds());
3894 for (
auto &Entry : AllocationCallToContextNodeMap)
3895 recursivelyRemoveNoneTypeCalleeEdges(
Entry.second, Visited);
3908template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3909void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones(
3910 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
3911 const DenseSet<uint32_t> &AllocContextIds) {
3921 if (!
Node->hasCall())
3940 auto CallerEdges =
Node->CallerEdges;
3941 for (
auto &
Edge : CallerEdges) {
3943 if (
Edge->isRemoved()) {
3949 if (
Edge->IsBackedge) {
3956 if (!Visited.
count(
Edge->Caller) && !
Edge->Caller->CloneOf) {
3957 identifyClones(
Edge->Caller, Visited, AllocContextIds);
3980 const unsigned AllocTypeCloningPriority[] = { 3, 4,
3984 [&](
const std::shared_ptr<ContextEdge> &
A,
3985 const std::shared_ptr<ContextEdge> &
B) {
3988 if (A->ContextIds.empty())
3994 if (B->ContextIds.empty())
3997 if (A->AllocTypes == B->AllocTypes)
4000 return *A->ContextIds.begin() < *B->ContextIds.begin();
4001 return AllocTypeCloningPriority[A->AllocTypes] <
4002 AllocTypeCloningPriority[B->AllocTypes];
4005 assert(
Node->AllocTypes != (uint8_t)AllocationType::None);
4007 DenseSet<uint32_t> RecursiveContextIds;
4012 DenseSet<uint32_t> AllCallerContextIds;
4013 for (
auto &CE :
Node->CallerEdges) {
4016 AllCallerContextIds.
reserve(
CE->getContextIds().size());
4017 for (
auto Id :
CE->getContextIds())
4018 if (!AllCallerContextIds.
insert(Id).second)
4019 RecursiveContextIds.
insert(Id);
4029 auto CallerEdges =
Node->CallerEdges;
4030 for (
auto &CallerEdge : CallerEdges) {
4032 if (CallerEdge->isRemoved()) {
4036 assert(CallerEdge->Callee == Node);
4045 if (!CallerEdge->Caller->hasCall())
4050 auto CallerEdgeContextsForAlloc =
4052 if (!RecursiveContextIds.
empty())
4053 CallerEdgeContextsForAlloc =
4055 if (CallerEdgeContextsForAlloc.empty())
4058 auto CallerAllocTypeForAlloc = computeAllocType(CallerEdgeContextsForAlloc);
4062 std::vector<uint8_t> CalleeEdgeAllocTypesForCallerEdge;
4063 CalleeEdgeAllocTypesForCallerEdge.reserve(
Node->CalleeEdges.size());
4064 for (
auto &CalleeEdge :
Node->CalleeEdges)
4065 CalleeEdgeAllocTypesForCallerEdge.push_back(intersectAllocTypes(
4066 CalleeEdge->getContextIds(), CallerEdgeContextsForAlloc));
4082 assert(CallerEdge->AllocTypes != (uint8_t)AllocationType::None);
4083 assert(
Node->AllocTypes != (uint8_t)AllocationType::None);
4084 if (!CallerEdge->IsBackedge &&
4085 allocTypeToUse(CallerAllocTypeForAlloc) ==
4086 allocTypeToUse(
Node->AllocTypes) &&
4087 allocTypesMatch<DerivedCCG, FuncTy, CallTy>(
4088 CalleeEdgeAllocTypesForCallerEdge,
Node->CalleeEdges)) {
4092 if (CallerEdge->IsBackedge) {
4096 DeferredBackedges++;
4109 if (CallerEdge->IsBackedge && !CallerEdge->Caller->CloneOf &&
4110 !Visited.
count(CallerEdge->Caller)) {
4111 const auto OrigIdCount = CallerEdge->getContextIds().size();
4114 identifyClones(CallerEdge->Caller, Visited, CallerEdgeContextsForAlloc);
4115 removeNoneTypeCalleeEdges(CallerEdge->Caller);
4119 bool UpdatedEdge =
false;
4120 if (OrigIdCount > CallerEdge->getContextIds().size()) {
4121 for (
auto E :
Node->CallerEdges) {
4123 if (
E->Caller->CloneOf != CallerEdge->Caller)
4127 auto CallerEdgeContextsForAllocNew =
4129 if (CallerEdgeContextsForAllocNew.empty())
4139 CallerEdgeContextsForAlloc.swap(CallerEdgeContextsForAllocNew);
4149 if (CallerEdge->isRemoved())
4159 CallerEdgeContextsForAlloc, CallerEdge->getContextIds());
4160 if (CallerEdgeContextsForAlloc.empty())
4165 CallerAllocTypeForAlloc = computeAllocType(CallerEdgeContextsForAlloc);
4166 CalleeEdgeAllocTypesForCallerEdge.clear();
4167 for (
auto &CalleeEdge :
Node->CalleeEdges) {
4168 CalleeEdgeAllocTypesForCallerEdge.push_back(intersectAllocTypes(
4169 CalleeEdge->getContextIds(), CallerEdgeContextsForAlloc));
4175 ContextNode *Clone =
nullptr;
4176 for (
auto *CurClone :
Node->Clones) {
4177 if (allocTypeToUse(CurClone->AllocTypes) !=
4178 allocTypeToUse(CallerAllocTypeForAlloc))
4185 assert(!BothSingleAlloc ||
4186 CurClone->AllocTypes == CallerAllocTypeForAlloc);
4192 if (BothSingleAlloc || allocTypesMatchClone<DerivedCCG, FuncTy, CallTy>(
4193 CalleeEdgeAllocTypesForCallerEdge, CurClone)) {
4201 moveEdgeToExistingCalleeClone(CallerEdge, Clone,
false,
4202 CallerEdgeContextsForAlloc);
4204 Clone = moveEdgeToNewCalleeClone(CallerEdge, CallerEdgeContextsForAlloc);
4207 assert(Clone->AllocTypes != (uint8_t)AllocationType::None);
4214 assert(
Node->AllocTypes != (uint8_t)AllocationType::None);
4220void ModuleCallsiteContextGraph::updateAllocationCall(
4225 "memprof", AllocTypeString);
4228 .emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofAttribute",
Call.call())
4229 <<
ore::NV(
"AllocationCall",
Call.call()) <<
" in clone "
4231 <<
" marked with memprof allocation attribute "
4232 <<
ore::NV(
"Attribute", AllocTypeString));
4235void IndexCallsiteContextGraph::updateAllocationCall(CallInfo &
Call,
4239 assert(AI->Versions.size() >
Call.cloneNo());
4244ModuleCallsiteContextGraph::getAllocationCallType(
const CallInfo &
Call)
const {
4246 if (!CB->getAttributes().hasFnAttr(
"memprof"))
4247 return AllocationType::None;
4248 return CB->getAttributes().getFnAttr(
"memprof").getValueAsString() ==
"cold"
4249 ? AllocationType::Cold
4250 : AllocationType::NotCold;
4254IndexCallsiteContextGraph::getAllocationCallType(
const CallInfo &
Call)
const {
4256 assert(AI->Versions.size() >
Call.cloneNo());
4260void ModuleCallsiteContextGraph::updateCall(CallInfo &CallerCall,
4261 FuncInfo CalleeFunc) {
4262 auto *CurF = getCalleeFunc(CallerCall.call());
4263 auto NewCalleeCloneNo = CalleeFunc.cloneNo();
4270 if (CurCalleeCloneNo != NewCalleeCloneNo) {
4272 << CurCalleeCloneNo <<
" now " << NewCalleeCloneNo
4274 MismatchedCloneAssignments++;
4277 if (NewCalleeCloneNo > 0)
4278 cast<CallBase>(CallerCall.call())->setCalledFunction(CalleeFunc.func());
4279 OREGetter(CallerCall.call()->getFunction())
4280 .emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofCall", CallerCall.call())
4281 <<
ore::NV(
"Call", CallerCall.call()) <<
" in clone "
4282 <<
ore::NV(
"Caller", CallerCall.call()->getFunction())
4283 <<
" assigned to call function clone "
4284 <<
ore::NV(
"Callee", CalleeFunc.func()));
4287void IndexCallsiteContextGraph::updateCall(CallInfo &CallerCall,
4288 FuncInfo CalleeFunc) {
4291 "Caller cannot be an allocation which should not have profiled calls");
4292 assert(CI->Clones.size() > CallerCall.cloneNo());
4293 auto NewCalleeCloneNo = CalleeFunc.cloneNo();
4294 auto &CurCalleeCloneNo = CI->Clones[CallerCall.cloneNo()];
4299 if (CurCalleeCloneNo != 0 && CurCalleeCloneNo != NewCalleeCloneNo) {
4301 << CurCalleeCloneNo <<
" now " << NewCalleeCloneNo
4303 MismatchedCloneAssignments++;
4305 CurCalleeCloneNo = NewCalleeCloneNo;
4317 SP->replaceLinkageName(MDName);
4321 TempDISubprogram NewDecl = Decl->
clone();
4322 NewDecl->replaceLinkageName(MDName);
4326CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
4328ModuleCallsiteContextGraph::cloneFunctionForCallsite(
4329 FuncInfo &Func, CallInfo &
Call, DenseMap<CallInfo, CallInfo> &CallMap,
4330 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
4335 assert(!
Func.func()->getParent()->getFunction(Name));
4336 NewFunc->setName(Name);
4338 for (
auto &Inst : CallsWithMetadataInFunc) {
4340 assert(Inst.cloneNo() == 0);
4343 OREGetter(
Func.func())
4344 .emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofClone",
Func.func())
4345 <<
"created clone " <<
ore::NV(
"NewFunction", NewFunc));
4346 return {NewFunc, CloneNo};
4349CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
4350 IndexCall>::FuncInfo
4351IndexCallsiteContextGraph::cloneFunctionForCallsite(
4352 FuncInfo &Func, CallInfo &
Call, DenseMap<CallInfo, CallInfo> &CallMap,
4353 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
4367 for (
auto &Inst : CallsWithMetadataInFunc) {
4369 assert(Inst.cloneNo() == 0);
4371 assert(AI->Versions.size() == CloneNo);
4374 AI->Versions.push_back(0);
4377 assert(CI && CI->Clones.size() == CloneNo);
4380 CI->Clones.push_back(0);
4382 CallMap[Inst] = {Inst.call(), CloneNo};
4384 return {
Func.func(), CloneNo};
4401template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4402void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::mergeClones() {
4408 DenseMap<uint32_t, ContextNode *> ContextIdToAllocationNode;
4409 for (
auto &Entry : AllocationCallToContextNodeMap) {
4411 for (
auto Id :
Node->getContextIds())
4412 ContextIdToAllocationNode[
Id] =
Node->getOrigNode();
4413 for (
auto *Clone :
Node->Clones) {
4414 for (
auto Id : Clone->getContextIds())
4415 ContextIdToAllocationNode[
Id] = Clone->getOrigNode();
4422 DenseSet<const ContextNode *> Visited;
4423 for (
auto &Entry : AllocationCallToContextNodeMap) {
4426 mergeClones(Node, Visited, ContextIdToAllocationNode);
4432 auto Clones =
Node->Clones;
4433 for (
auto *Clone : Clones)
4434 mergeClones(Clone, Visited, ContextIdToAllocationNode);
4438 dbgs() <<
"CCG after merging:\n";
4442 exportToDot(
"aftermerge");
4450template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4451void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::mergeClones(
4452 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
4453 DenseMap<uint32_t, ContextNode *> &ContextIdToAllocationNode) {
4463 bool FoundUnvisited =
true;
4465 while (FoundUnvisited) {
4467 FoundUnvisited =
false;
4470 auto CallerEdges =
Node->CallerEdges;
4471 for (
auto CallerEdge : CallerEdges) {
4473 if (CallerEdge->Callee != Node)
4478 FoundUnvisited =
true;
4479 mergeClones(CallerEdge->Caller, Visited, ContextIdToAllocationNode);
4483 TotalMergeInvokes++;
4484 TotalMergeIters += Iters;
4485 if (Iters > MaxMergeIters)
4486 MaxMergeIters = Iters;
4489 mergeNodeCalleeClones(Node, Visited, ContextIdToAllocationNode);
4492template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4493void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::mergeNodeCalleeClones(
4494 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
4495 DenseMap<uint32_t, ContextNode *> &ContextIdToAllocationNode) {
4497 if (
Node->emptyContextIds())
4502 MapVector<ContextNode *, std::vector<std::shared_ptr<ContextEdge>>>
4503 OrigNodeToCloneEdges;
4504 for (
const auto &
E :
Node->CalleeEdges) {
4509 OrigNodeToCloneEdges[
Base].push_back(
E);
4515 auto CalleeCallerEdgeLessThan = [](
const std::shared_ptr<ContextEdge> &
A,
4516 const std::shared_ptr<ContextEdge> &
B) {
4517 if (
A->Callee->CallerEdges.size() !=
B->Callee->CallerEdges.size())
4518 return A->Callee->CallerEdges.size() <
B->Callee->CallerEdges.size();
4519 if (
A->Callee->CloneOf && !
B->Callee->CloneOf)
4521 else if (!
A->Callee->CloneOf &&
B->Callee->CloneOf)
4525 return *
A->ContextIds.begin() < *
B->ContextIds.begin();
4530 for (
auto Entry : OrigNodeToCloneEdges) {
4533 auto &CalleeEdges =
Entry.second;
4534 auto NumCalleeClones = CalleeEdges.size();
4536 if (NumCalleeClones == 1)
4547 DenseSet<ContextNode *> OtherCallersToShareMerge;
4548 findOtherCallersToShareMerge(Node, CalleeEdges, ContextIdToAllocationNode,
4549 OtherCallersToShareMerge);
4554 ContextNode *MergeNode =
nullptr;
4555 DenseMap<ContextNode *, unsigned> CallerToMoveCount;
4556 for (
auto CalleeEdge : CalleeEdges) {
4557 auto *OrigCallee = CalleeEdge->Callee;
4563 if (CalleeEdge->Callee->CallerEdges.size() == 1) {
4564 MergeNode = OrigCallee;
4565 NonNewMergedNodes++;
4572 if (!OtherCallersToShareMerge.
empty()) {
4573 bool MoveAllCallerEdges =
true;
4574 for (
auto CalleeCallerE : OrigCallee->CallerEdges) {
4575 if (CalleeCallerE == CalleeEdge)
4577 if (!OtherCallersToShareMerge.
contains(CalleeCallerE->Caller)) {
4578 MoveAllCallerEdges =
false;
4584 if (MoveAllCallerEdges) {
4585 MergeNode = OrigCallee;
4586 NonNewMergedNodes++;
4593 assert(MergeNode != OrigCallee);
4594 moveEdgeToExistingCalleeClone(CalleeEdge, MergeNode,
4597 MergeNode = moveEdgeToNewCalleeClone(CalleeEdge);
4602 if (!OtherCallersToShareMerge.
empty()) {
4606 auto OrigCalleeCallerEdges = OrigCallee->CallerEdges;
4607 for (
auto &CalleeCallerE : OrigCalleeCallerEdges) {
4608 if (CalleeCallerE == CalleeEdge)
4610 if (!OtherCallersToShareMerge.
contains(CalleeCallerE->Caller))
4612 CallerToMoveCount[CalleeCallerE->Caller]++;
4613 moveEdgeToExistingCalleeClone(CalleeCallerE, MergeNode,
4617 removeNoneTypeCalleeEdges(OrigCallee);
4618 removeNoneTypeCalleeEdges(MergeNode);
4636template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4637void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
4638 findOtherCallersToShareMerge(
4640 std::vector<std::shared_ptr<ContextEdge>> &CalleeEdges,
4641 DenseMap<uint32_t, ContextNode *> &ContextIdToAllocationNode,
4642 DenseSet<ContextNode *> &OtherCallersToShareMerge) {
4643 auto NumCalleeClones = CalleeEdges.size();
4646 DenseMap<ContextNode *, unsigned> OtherCallersToSharedCalleeEdgeCount;
4649 unsigned PossibleOtherCallerNodes = 0;
4653 if (CalleeEdges[0]->
Callee->CallerEdges.size() < 2)
4659 DenseMap<ContextEdge *, DenseSet<ContextNode *>> CalleeEdgeToAllocNodes;
4660 for (
auto CalleeEdge : CalleeEdges) {
4661 assert(CalleeEdge->Callee->CallerEdges.size() > 1);
4664 for (
auto CalleeCallerEdges : CalleeEdge->Callee->CallerEdges) {
4665 if (CalleeCallerEdges->Caller == Node) {
4666 assert(CalleeCallerEdges == CalleeEdge);
4669 OtherCallersToSharedCalleeEdgeCount[CalleeCallerEdges->Caller]++;
4672 if (OtherCallersToSharedCalleeEdgeCount[CalleeCallerEdges->Caller] ==
4674 PossibleOtherCallerNodes++;
4678 for (
auto Id : CalleeEdge->getContextIds()) {
4679 auto *
Alloc = ContextIdToAllocationNode.
lookup(Id);
4683 MissingAllocForContextId++;
4686 CalleeEdgeToAllocNodes[CalleeEdge.get()].
insert(
Alloc);
4693 for (
auto CalleeEdge : CalleeEdges) {
4694 assert(CalleeEdge->Callee->CallerEdges.size() > 1);
4696 if (!PossibleOtherCallerNodes)
4698 auto &CurCalleeAllocNodes = CalleeEdgeToAllocNodes[CalleeEdge.get()];
4700 for (
auto &CalleeCallerE : CalleeEdge->Callee->CallerEdges) {
4702 if (CalleeCallerE == CalleeEdge)
4706 if (OtherCallersToSharedCalleeEdgeCount[CalleeCallerE->Caller] !=
4711 for (
auto Id : CalleeCallerE->getContextIds()) {
4712 auto *
Alloc = ContextIdToAllocationNode.
lookup(Id);
4717 if (!CurCalleeAllocNodes.contains(
Alloc)) {
4718 OtherCallersToSharedCalleeEdgeCount[CalleeCallerE->Caller] = 0;
4719 PossibleOtherCallerNodes--;
4726 if (!PossibleOtherCallerNodes)
4731 for (
auto &[OtherCaller,
Count] : OtherCallersToSharedCalleeEdgeCount) {
4732 if (
Count != NumCalleeClones)
4734 OtherCallersToShareMerge.
insert(OtherCaller);
4779template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4780bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
4787 DenseMap<ContextNode *, FuncInfo> CallsiteToCalleeFuncCloneMap;
4791 auto RecordCalleeFuncOfCallsite = [&](ContextNode *
Caller,
4792 const FuncInfo &CalleeFunc) {
4794 CallsiteToCalleeFuncCloneMap[
Caller] = CalleeFunc;
4798 struct FuncCloneInfo {
4803 DenseMap<CallInfo, CallInfo> CallMap;
4831 DenseMap<const ContextNode *, std::map<unsigned, SmallVector<CallInfo, 0>>>
4832 UnassignedCallClones;
4836 for (
auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) {
4837 FuncInfo OrigFunc(Func);
4842 std::vector<FuncCloneInfo> FuncCloneInfos;
4843 for (
auto &
Call : CallsWithMetadata) {
4844 ContextNode *
Node = getNodeForInst(
Call);
4848 if (!Node ||
Node->Clones.empty())
4851 "Not having a call should have prevented cloning");
4855 std::map<FuncInfo, ContextNode *> FuncCloneToCurNodeCloneMap;
4859 auto AssignCallsiteCloneToFuncClone = [&](
const FuncInfo &FuncClone,
4861 ContextNode *CallsiteClone,
4864 FuncCloneToCurNodeCloneMap[FuncClone] = CallsiteClone;
4866 assert(FuncCloneInfos.size() > FuncClone.cloneNo());
4867 DenseMap<CallInfo, CallInfo> &CallMap =
4868 FuncCloneInfos[FuncClone.cloneNo()].CallMap;
4869 CallInfo CallClone(
Call);
4870 if (
auto It = CallMap.
find(
Call); It != CallMap.
end())
4871 CallClone = It->second;
4872 CallsiteClone->setCall(CallClone);
4874 for (
auto &MatchingCall :
Node->MatchingCalls) {
4875 CallInfo CallClone(MatchingCall);
4876 if (
auto It = CallMap.
find(MatchingCall); It != CallMap.
end())
4877 CallClone = It->second;
4879 MatchingCall = CallClone;
4887 auto MoveEdgeToNewCalleeCloneAndSetUp =
4888 [&](
const std::shared_ptr<ContextEdge> &
Edge) {
4889 ContextNode *OrigCallee =
Edge->Callee;
4890 ContextNode *NewClone = moveEdgeToNewCalleeClone(
Edge);
4891 removeNoneTypeCalleeEdges(NewClone);
4892 assert(NewClone->AllocTypes != (uint8_t)AllocationType::None);
4896 if (CallsiteToCalleeFuncCloneMap.
count(OrigCallee))
4897 RecordCalleeFuncOfCallsite(
4898 NewClone, CallsiteToCalleeFuncCloneMap[OrigCallee]);
4905 std::deque<ContextNode *> ClonesWorklist;
4907 if (!
Node->emptyContextIds())
4908 ClonesWorklist.push_back(Node);
4914 unsigned NodeCloneCount = 0;
4915 while (!ClonesWorklist.empty()) {
4916 ContextNode *Clone = ClonesWorklist.front();
4917 ClonesWorklist.pop_front();
4926 if (FuncCloneInfos.size() < NodeCloneCount) {
4928 if (NodeCloneCount == 1) {
4933 Clone->CallerEdges, [&](
const std::shared_ptr<ContextEdge> &
E) {
4934 return CallsiteToCalleeFuncCloneMap.count(E->Caller);
4938 FuncCloneInfos.push_back(
4939 {OrigFunc, DenseMap<CallInfo, CallInfo>()});
4940 AssignCallsiteCloneToFuncClone(
4941 OrigFunc,
Call, Clone,
4942 AllocationCallToContextNodeMap.count(
Call));
4943 for (
auto &CE : Clone->CallerEdges) {
4945 if (!
CE->Caller->hasCall())
4947 RecordCalleeFuncOfCallsite(
CE->Caller, OrigFunc);
4957 FuncInfo PreviousAssignedFuncClone;
4959 Clone->CallerEdges, [&](
const std::shared_ptr<ContextEdge> &
E) {
4960 return CallsiteToCalleeFuncCloneMap.count(E->Caller);
4962 bool CallerAssignedToCloneOfFunc =
false;
4963 if (EI != Clone->CallerEdges.end()) {
4964 const std::shared_ptr<ContextEdge> &
Edge = *EI;
4965 PreviousAssignedFuncClone =
4966 CallsiteToCalleeFuncCloneMap[
Edge->Caller];
4967 CallerAssignedToCloneOfFunc =
true;
4972 DenseMap<CallInfo, CallInfo> NewCallMap;
4973 unsigned CloneNo = FuncCloneInfos.size();
4974 assert(CloneNo > 0 &&
"Clone 0 is the original function, which "
4975 "should already exist in the map");
4976 FuncInfo NewFuncClone = cloneFunctionForCallsite(
4977 OrigFunc,
Call, NewCallMap, CallsWithMetadata, CloneNo);
4978 FuncCloneInfos.push_back({NewFuncClone, std::move(NewCallMap)});
4979 FunctionClonesAnalysis++;
4985 if (!CallerAssignedToCloneOfFunc) {
4986 AssignCallsiteCloneToFuncClone(
4987 NewFuncClone,
Call, Clone,
4988 AllocationCallToContextNodeMap.count(
Call));
4989 for (
auto &CE : Clone->CallerEdges) {
4991 if (!
CE->Caller->hasCall())
4993 RecordCalleeFuncOfCallsite(
CE->Caller, NewFuncClone);
5005 auto CallerEdges = Clone->CallerEdges;
5006 for (
auto CE : CallerEdges) {
5008 if (
CE->isRemoved()) {
5014 if (!
CE->Caller->hasCall())
5017 if (!CallsiteToCalleeFuncCloneMap.
count(
CE->Caller) ||
5021 CallsiteToCalleeFuncCloneMap[
CE->Caller] !=
5022 PreviousAssignedFuncClone)
5025 RecordCalleeFuncOfCallsite(
CE->Caller, NewFuncClone);
5038 auto CalleeEdges =
CE->Caller->CalleeEdges;
5039 for (
auto CalleeEdge : CalleeEdges) {
5042 if (CalleeEdge->isRemoved()) {
5047 ContextNode *
Callee = CalleeEdge->Callee;
5051 if (Callee == Clone || !
Callee->hasCall())
5056 if (Callee == CalleeEdge->Caller)
5058 ContextNode *NewClone =
5059 MoveEdgeToNewCalleeCloneAndSetUp(CalleeEdge);
5062 removeNoneTypeCalleeEdges(Callee);
5070 CallInfo OrigCall(
Callee->getOrigNode()->Call);
5071 OrigCall.setCloneNo(0);
5072 DenseMap<CallInfo, CallInfo> &CallMap =
5073 FuncCloneInfos[NewFuncClone.cloneNo()].CallMap;
5075 CallInfo NewCall(CallMap[OrigCall]);
5077 NewClone->setCall(NewCall);
5079 for (
auto &MatchingCall : NewClone->MatchingCalls) {
5080 CallInfo OrigMatchingCall(MatchingCall);
5081 OrigMatchingCall.setCloneNo(0);
5083 CallInfo NewCall(CallMap[OrigMatchingCall]);
5086 MatchingCall = NewCall;
5095 auto FindFirstAvailFuncClone = [&]() {
5100 for (
auto &CF : FuncCloneInfos) {
5101 if (!FuncCloneToCurNodeCloneMap.count(CF.FuncClone))
5102 return CF.FuncClone;
5105 "Expected an available func clone for this callsite clone");
5122 std::map<FuncInfo, ContextNode *> FuncCloneToNewCallsiteCloneMap;
5123 FuncInfo FuncCloneAssignedToCurCallsiteClone;
5127 auto CloneCallerEdges = Clone->CallerEdges;
5128 for (
auto &
Edge : CloneCallerEdges) {
5132 if (
Edge->isRemoved())
5135 if (!
Edge->Caller->hasCall())
5139 if (CallsiteToCalleeFuncCloneMap.
count(
Edge->Caller)) {
5140 FuncInfo FuncCloneCalledByCaller =
5141 CallsiteToCalleeFuncCloneMap[
Edge->Caller];
5151 if ((FuncCloneToCurNodeCloneMap.count(FuncCloneCalledByCaller) &&
5152 FuncCloneToCurNodeCloneMap[FuncCloneCalledByCaller] !=
5160 (FuncCloneAssignedToCurCallsiteClone &&
5161 FuncCloneAssignedToCurCallsiteClone !=
5162 FuncCloneCalledByCaller)) {
5177 if (FuncCloneToNewCallsiteCloneMap.count(
5178 FuncCloneCalledByCaller)) {
5179 ContextNode *NewClone =
5180 FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller];
5181 moveEdgeToExistingCalleeClone(
Edge, NewClone);
5183 removeNoneTypeCalleeEdges(NewClone);
5186 ContextNode *NewClone = MoveEdgeToNewCalleeCloneAndSetUp(
Edge);
5187 FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller] =
5190 ClonesWorklist.push_back(NewClone);
5194 removeNoneTypeCalleeEdges(Clone);
5202 if (!FuncCloneAssignedToCurCallsiteClone) {
5203 FuncCloneAssignedToCurCallsiteClone = FuncCloneCalledByCaller;
5205 AssignCallsiteCloneToFuncClone(
5206 FuncCloneCalledByCaller,
Call, Clone,
5207 AllocationCallToContextNodeMap.count(
Call));
5211 assert(FuncCloneAssignedToCurCallsiteClone ==
5212 FuncCloneCalledByCaller);
5221 if (!FuncCloneAssignedToCurCallsiteClone) {
5222 FuncCloneAssignedToCurCallsiteClone = FindFirstAvailFuncClone();
5223 assert(FuncCloneAssignedToCurCallsiteClone);
5225 AssignCallsiteCloneToFuncClone(
5226 FuncCloneAssignedToCurCallsiteClone,
Call, Clone,
5227 AllocationCallToContextNodeMap.count(
Call));
5229 assert(FuncCloneToCurNodeCloneMap
5230 [FuncCloneAssignedToCurCallsiteClone] == Clone);
5232 RecordCalleeFuncOfCallsite(
Edge->Caller,
5233 FuncCloneAssignedToCurCallsiteClone);
5253 if (!FuncCloneAssignedToCurCallsiteClone) {
5254 FuncCloneAssignedToCurCallsiteClone = FindFirstAvailFuncClone();
5255 assert(FuncCloneAssignedToCurCallsiteClone &&
5256 "No available func clone for this callsite clone");
5257 AssignCallsiteCloneToFuncClone(
5258 FuncCloneAssignedToCurCallsiteClone,
Call, Clone,
5259 AllocationCallToContextNodeMap.contains(
Call));
5264 for (
const auto &PE :
Node->CalleeEdges)
5266 for (
const auto &CE :
Node->CallerEdges)
5268 for (
auto *Clone :
Node->Clones) {
5270 for (
const auto &PE : Clone->CalleeEdges)
5272 for (
const auto &CE : Clone->CallerEdges)
5278 if (FuncCloneInfos.size() < 2)
5284 for (
auto &
Call : CallsWithMetadata) {
5285 ContextNode *
Node = getNodeForInst(
Call);
5286 if (!Node || !
Node->hasCall() ||
Node->emptyContextIds())
5292 if (
Node->Clones.size() + 1 >= FuncCloneInfos.size())
5296 DenseSet<unsigned> NodeCallClones;
5297 for (
auto *
C :
Node->Clones)
5298 NodeCallClones.
insert(
C->Call.cloneNo());
5301 for (
auto &FC : FuncCloneInfos) {
5306 if (++
I == 1 || NodeCallClones.
contains(
I)) {
5311 auto &CallVector = UnassignedCallClones[
Node][
I];
5312 DenseMap<CallInfo, CallInfo> &CallMap =
FC.CallMap;
5313 if (
auto It = CallMap.
find(
Call); It != CallMap.
end()) {
5314 CallInfo CallClone = It->second;
5315 CallVector.push_back(CallClone);
5319 assert(
false &&
"Expected to find call in CallMap");
5322 for (
auto &MatchingCall :
Node->MatchingCalls) {
5323 if (
auto It = CallMap.
find(MatchingCall); It != CallMap.
end()) {
5324 CallInfo CallClone = It->second;
5325 CallVector.push_back(CallClone);
5329 assert(
false &&
"Expected to find call in CallMap");
5337 (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
5339 auto UpdateCalls = [&](ContextNode *
Node,
5340 DenseSet<const ContextNode *> &Visited,
5341 auto &&UpdateCalls) {
5342 auto Inserted = Visited.insert(Node);
5346 for (
auto *Clone :
Node->Clones)
5347 UpdateCalls(Clone, Visited, UpdateCalls);
5349 for (
auto &
Edge :
Node->CallerEdges)
5350 UpdateCalls(
Edge->Caller, Visited, UpdateCalls);
5354 if (!
Node->hasCall() ||
Node->emptyContextIds())
5357 if (
Node->IsAllocation) {
5358 auto AT = allocTypeToUse(
Node->AllocTypes);
5364 !ContextIdToContextSizeInfos.empty()) {
5365 uint64_t TotalCold = 0;
5367 for (
auto Id :
Node->getContextIds()) {
5368 auto TypeI = ContextIdToAllocationType.find(Id);
5369 assert(TypeI != ContextIdToAllocationType.end());
5370 auto CSI = ContextIdToContextSizeInfos.find(Id);
5371 if (CSI != ContextIdToContextSizeInfos.end()) {
5372 for (
auto &
Info : CSI->second) {
5374 if (TypeI->second == AllocationType::Cold)
5375 TotalCold +=
Info.TotalSize;
5380 AT = AllocationType::Cold;
5382 updateAllocationCall(
Node->Call, AT);
5387 if (!CallsiteToCalleeFuncCloneMap.
count(Node))
5390 auto CalleeFunc = CallsiteToCalleeFuncCloneMap[
Node];
5391 updateCall(
Node->Call, CalleeFunc);
5393 for (
auto &
Call :
Node->MatchingCalls)
5394 updateCall(
Call, CalleeFunc);
5398 if (!UnassignedCallClones.
contains(Node))
5400 DenseSet<unsigned> NodeCallClones;
5401 for (
auto *
C :
Node->Clones)
5402 NodeCallClones.
insert(
C->Call.cloneNo());
5404 auto &ClonedCalls = UnassignedCallClones[
Node];
5405 for (
auto &[CloneNo, CallVector] : ClonedCalls) {
5409 if (NodeCallClones.
contains(CloneNo))
5412 for (
auto &
Call : CallVector)
5413 updateCall(
Call, CalleeFunc);
5422 DenseSet<const ContextNode *> Visited;
5423 for (
auto &Entry : AllocationCallToContextNodeMap)
5424 UpdateCalls(
Entry.second, Visited, UpdateCalls);
5435 for (
auto &SN : FS->callsites()) {
5440 SN.Clones.size() >
I &&
5441 "Callsite summary has fewer entries than other summaries in function");
5442 if (SN.Clones.size() <=
I || !SN.Clones[
I])
5449 for (
auto &AN : FS->allocs()) {
5453 assert(AN.Versions.size() >
I &&
5454 "Alloc summary has fewer entries than other summaries in function");
5455 if (AN.Versions.size() <=
I ||
5472 NewGV->takeName(DeclGV);
5479 auto CloneFuncAliases = [&](
Function *NewF,
unsigned I) {
5480 if (!FuncToAliasMap.count(&
F))
5482 for (
auto *
A : FuncToAliasMap[&
F]) {
5484 auto *PrevA = M.getNamedAlias(AliasName);
5486 A->getType()->getPointerAddressSpace(),
5487 A->getLinkage(), AliasName, NewF);
5488 NewA->copyAttributesFrom(
A);
5490 TakeDeclNameAndReplace(PrevA, NewA);
5499 FunctionsClonedThinBackend++;
5516 for (
unsigned I = 1;
I < NumClones;
I++) {
5517 VMaps.
emplace_back(std::make_unique<ValueToValueMapTy>());
5524 FunctionCloneDuplicatesThinBackend++;
5525 auto *Func = HashToFunc[Hash];
5526 if (Func->hasAvailableExternallyLinkage()) {
5532 auto Decl = M.getOrInsertFunction(Name, Func->getFunctionType());
5534 <<
"created clone decl " <<
ore::NV(
"Decl", Decl.getCallee()));
5537 auto *PrevF = M.getFunction(Name);
5540 TakeDeclNameAndReplace(PrevF, Alias);
5542 <<
"created clone alias " <<
ore::NV(
"Alias", Alias));
5545 CloneFuncAliases(Func,
I);
5549 HashToFunc[Hash] = NewF;
5550 FunctionClonesThinBackend++;
5553 for (
auto &BB : *NewF) {
5554 for (
auto &Inst : BB) {
5555 Inst.setMetadata(LLVMContext::MD_memprof,
nullptr);
5556 Inst.setMetadata(LLVMContext::MD_callsite,
nullptr);
5561 TakeDeclNameAndReplace(PrevF, NewF);
5563 NewF->setName(Name);
5566 <<
"created clone " <<
ore::NV(
"NewFunction", NewF));
5569 CloneFuncAliases(NewF,
I);
5578 const Function *CallingFunc =
nullptr) {
5597 auto SrcFileMD =
F.getMetadata(
"thinlto_src_file");
5603 if (!SrcFileMD &&
F.isDeclaration()) {
5607 SrcFileMD = CallingFunc->getMetadata(
"thinlto_src_file");
5612 assert(SrcFileMD || OrigName ==
F.getName());
5614 StringRef SrcFile = M.getSourceFileName();
5626 if (!TheFnVI && OrigName ==
F.getName() &&
F.hasLocalLinkage() &&
5627 F.getName().contains(
'.')) {
5628 OrigName =
F.getName().rsplit(
'.').first;
5637 assert(TheFnVI ||
F.isDeclaration());
5641bool MemProfContextDisambiguation::initializeIndirectCallPromotionInfo(
5643 ICallAnalysis = std::make_unique<ICallPromotionAnalysis>();
5644 Symtab = std::make_unique<InstrProfSymtab>();
5655 if (
Error E = Symtab->create(M,
true,
false)) {
5656 std::string SymtabFailure =
toString(std::move(
E));
5657 M.getContext().emitError(
"Failed to create symtab: " + SymtabFailure);
5670 auto MIBIter = AllocNode.
MIBs.begin();
5671 for (
auto &MDOp : MemProfMD->
operands()) {
5673 auto StackIdIndexIter = MIBIter->StackIdIndices.begin();
5678 auto ContextIterBegin =
5682 (ContextIterBegin != StackContext.
end() && *ContextIterBegin == 0) ? 1
5684 for (
auto ContextIter = ContextIterBegin; ContextIter != StackContext.
end();
5689 if (LastStackContextId == *ContextIter)
5691 LastStackContextId = *ContextIter;
5692 assert(StackIdIndexIter != MIBIter->StackIdIndices.end());
5702bool MemProfContextDisambiguation::applyImport(
Module &M) {
5709 std::map<const Function *, SmallPtrSet<const GlobalAlias *, 1>>
5711 for (
auto &
A :
M.aliases()) {
5712 auto *Aliasee =
A.getAliaseeObject();
5714 FuncToAliasMap[
F].insert(&
A);
5717 if (!initializeIndirectCallPromotionInfo(M))
5724 OptimizationRemarkEmitter ORE(&
F);
5727 bool ClonesCreated =
false;
5728 unsigned NumClonesCreated = 0;
5729 auto CloneFuncIfNeeded = [&](
unsigned NumClones, FunctionSummary *
FS) {
5739 if (ClonesCreated) {
5740 assert(NumClonesCreated == NumClones);
5747 ClonesCreated =
true;
5748 NumClonesCreated = NumClones;
5751 auto CloneCallsite = [&](
const CallsiteInfo &StackNode, CallBase *CB,
5752 Function *CalledFunction, FunctionSummary *
FS) {
5754 CloneFuncIfNeeded(StackNode.
Clones.
size(), FS);
5766 if (CalledFunction != CB->getCalledOperand() &&
5767 (!GA || CalledFunction != GA->getAliaseeObject())) {
5768 SkippedCallsCloning++;
5774 auto CalleeOrigName = CalledFunction->getName();
5775 for (
unsigned J = 0; J < StackNode.
Clones.
size(); J++) {
5778 if (J > 0 && VMaps[J - 1]->
empty())
5782 if (!StackNode.
Clones[J])
5784 auto NewF =
M.getOrInsertFunction(
5786 CalledFunction->getFunctionType());
5800 ORE.emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofCall", CBClone)
5801 <<
ore::NV(
"Call", CBClone) <<
" in clone "
5803 <<
" assigned to call function clone "
5804 <<
ore::NV(
"Callee", NewF.getCallee()));
5818 ImportSummary->findSummaryInModule(TheFnVI,
M.getModuleIdentifier());
5822 auto SrcModuleMD =
F.getMetadata(
"thinlto_src_module");
5824 "enable-import-metadata is needed to emit thinlto_src_module");
5825 StringRef SrcModule =
5828 if (GVS->modulePath() == SrcModule) {
5829 GVSummary = GVS.get();
5833 assert(GVSummary && GVSummary->modulePath() == SrcModule);
5843 if (
FS->allocs().empty() &&
FS->callsites().empty())
5846 auto SI =
FS->callsites().begin();
5847 auto AI =
FS->allocs().begin();
5852 DenseMap<ValueInfo, CallsiteInfo> MapTailCallCalleeVIToCallsite;
5855 for (
auto CallsiteIt =
FS->callsites().rbegin();
5856 CallsiteIt !=
FS->callsites().rend(); CallsiteIt++) {
5857 auto &Callsite = *CallsiteIt;
5861 if (!Callsite.StackIdIndices.empty())
5863 MapTailCallCalleeVIToCallsite.
insert({Callsite.Callee, Callsite});
5872 for (
auto &BB :
F) {
5873 for (
auto &
I : BB) {
5879 auto *CalledValue = CB->getCalledOperand();
5880 auto *CalledFunction = CB->getCalledFunction();
5881 if (CalledValue && !CalledFunction) {
5882 CalledValue = CalledValue->stripPointerCasts();
5889 assert(!CalledFunction &&
5890 "Expected null called function in callsite for alias");
5894 CallStack<MDNode, MDNode::op_iterator> CallsiteContext(
5895 I.getMetadata(LLVMContext::MD_callsite));
5896 auto *MemProfMD =
I.getMetadata(LLVMContext::MD_memprof);
5902 if (CB->getAttributes().hasFnAttr(
"memprof") && !MemProfMD) {
5903 CB->getAttributes().getFnAttr(
"memprof").getValueAsString() ==
"cold"
5904 ? AllocTypeColdThinBackend++
5905 : AllocTypeNotColdThinBackend++;
5906 OrigAllocsThinBackend++;
5907 AllocVersionsThinBackend++;
5908 if (!MaxAllocVersionsThinBackend)
5909 MaxAllocVersionsThinBackend = 1;
5916 auto &AllocNode = *(AI++);
5924 CloneFuncIfNeeded(AllocNode.Versions.size(), FS);
5926 OrigAllocsThinBackend++;
5927 AllocVersionsThinBackend += AllocNode.Versions.size();
5928 if (MaxAllocVersionsThinBackend < AllocNode.Versions.size())
5929 MaxAllocVersionsThinBackend = AllocNode.Versions.size();
5939 if (AllocNode.Versions.size() == 1 &&
5942 AllocationType::NotCold ||
5944 AllocationType::None);
5945 UnclonableAllocsThinBackend++;
5951 return Type == ((uint8_t)AllocationType::NotCold |
5952 (uint8_t)AllocationType::Cold);
5956 for (
unsigned J = 0; J < AllocNode.Versions.size(); J++) {
5959 if (J > 0 && VMaps[J - 1]->
empty())
5962 if (AllocNode.Versions[J] == (uint8_t)AllocationType::None)
5965 AllocTy == AllocationType::Cold ? AllocTypeColdThinBackend++
5966 : AllocTypeNotColdThinBackend++;
5981 ORE.emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofAttribute", CBClone)
5982 <<
ore::NV(
"AllocationCall", CBClone) <<
" in clone "
5984 <<
" marked with memprof allocation attribute "
5985 <<
ore::NV(
"Attribute", AllocTypeString));
5987 }
else if (!CallsiteContext.empty()) {
5988 if (!CalledFunction) {
5992 assert(!CI || !CI->isInlineAsm());
6002 recordICPInfo(CB,
FS->callsites(), SI, ICallAnalysisInfo);
6008 CloneFuncIfNeeded(NumClones, FS);
6013 assert(SI !=
FS->callsites().end());
6014 auto &StackNode = *(
SI++);
6020 for (
auto StackId : CallsiteContext) {
6022 assert(ImportSummary->getStackIdAtIndex(*StackIdIndexIter) ==
6028 CloneCallsite(StackNode, CB, CalledFunction, FS);
6030 }
else if (CB->isTailCall() && CalledFunction) {
6033 ValueInfo CalleeVI =
6035 if (CalleeVI && MapTailCallCalleeVIToCallsite.
count(CalleeVI)) {
6036 auto Callsite = MapTailCallCalleeVIToCallsite.
find(CalleeVI);
6037 assert(Callsite != MapTailCallCalleeVIToCallsite.
end());
6038 CloneCallsite(Callsite->second, CB, CalledFunction, FS);
6045 performICP(M,
FS->callsites(), VMaps, ICallAnalysisInfo, ORE);
6055 for (
auto &BB :
F) {
6056 for (
auto &
I : BB) {
6059 I.setMetadata(LLVMContext::MD_memprof,
nullptr);
6060 I.setMetadata(LLVMContext::MD_callsite,
nullptr);
6068unsigned MemProfContextDisambiguation::recordICPInfo(
6073 uint32_t NumCandidates;
6074 uint64_t TotalCount;
6075 auto CandidateProfileData =
6076 ICallAnalysis->getPromotionCandidatesForInstruction(
6078 if (CandidateProfileData.empty())
6084 bool ICPNeeded =
false;
6085 unsigned NumClones = 0;
6086 size_t CallsiteInfoStartIndex = std::distance(AllCallsites.
begin(), SI);
6087 for (
const auto &Candidate : CandidateProfileData) {
6089 auto CalleeValueInfo =
6091 ImportSummary->getValueInfo(Candidate.Value);
6094 assert(!CalleeValueInfo ||
SI->Callee == CalleeValueInfo);
6096 auto &StackNode = *(
SI++);
6101 [](
unsigned CloneNo) { return CloneNo != 0; });
6111 ICallAnalysisInfo.
push_back({CB, CandidateProfileData.vec(), NumCandidates,
6112 TotalCount, CallsiteInfoStartIndex});
6116void MemProfContextDisambiguation::performICP(
6118 ArrayRef<std::unique_ptr<ValueToValueMapTy>> VMaps,
6120 OptimizationRemarkEmitter &ORE) {
6127 for (
auto &
Info : ICallAnalysisInfo) {
6130 auto TotalCount =
Info.TotalCount;
6131 unsigned NumPromoted = 0;
6132 unsigned NumClones = 0;
6134 for (
auto &Candidate :
Info.CandidateProfileData) {
6145 Function *TargetFunction = Symtab->getFunction(Candidate.Value);
6146 if (TargetFunction ==
nullptr ||
6154 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnableToFindTarget", CB)
6155 <<
"Memprof cannot promote indirect call: target with md5sum "
6156 <<
ore::NV(
"target md5sum", Candidate.Value) <<
" not found";
6165 const char *Reason =
nullptr;
6168 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnableToPromote", CB)
6169 <<
"Memprof cannot promote indirect call to "
6170 <<
ore::NV(
"TargetFunction", TargetFunction)
6171 <<
" with count of " <<
ore::NV(
"TotalCount", TotalCount)
6182 CallBase *CBClone = CB;
6183 for (
unsigned J = 0; J < NumClones; J++) {
6186 if (J > 0 && VMaps[J - 1]->
empty())
6196 TotalCount, isSamplePGO, &ORE);
6197 auto *TargetToUse = TargetFunction;
6200 if (StackNode.
Clones[J]) {
6219 <<
ore::NV(
"Call", CBClone) <<
" in clone "
6221 <<
" promoted and assigned to call function clone "
6222 <<
ore::NV(
"Callee", TargetToUse));
6226 TotalCount -= Candidate.Count;
6231 CallBase *CBClone = CB;
6232 for (
unsigned J = 0; J < NumClones; J++) {
6235 if (J > 0 && VMaps[J - 1]->
empty())
6241 CBClone->
setMetadata(LLVMContext::MD_prof,
nullptr);
6244 if (TotalCount != 0)
6246 M, *CBClone,
ArrayRef(
Info.CandidateProfileData).slice(NumPromoted),
6247 TotalCount, IPVK_IndirectCallTarget,
Info.NumCandidates);
6252template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
6253bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::process() {
6255 dbgs() <<
"CCG before cloning:\n";
6259 exportToDot(
"postbuild");
6272 dbgs() <<
"CCG after cloning:\n";
6276 exportToDot(
"cloned");
6278 bool Changed = assignFunctions();
6281 dbgs() <<
"CCG after assigning function clones:\n";
6285 exportToDot(
"clonefuncassign");
6288 printTotalSizes(
errs());
6293bool MemProfContextDisambiguation::processModule(
6295 llvm::function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter) {
6300 return applyImport(M);
6313 ModuleCallsiteContextGraph CCG(M, OREGetter);
6314 return CCG.process();
6319 : ImportSummary(Summary), isSamplePGO(isSamplePGO) {
6324 "-memprof-dot-scope=alloc requires -memprof-dot-alloc-id");
6328 "-memprof-dot-scope=context requires -memprof-dot-context-id");
6332 "-memprof-dot-scope=all can't have both -memprof-dot-alloc-id and "
6333 "-memprof-dot-context-id");
6334 if (ImportSummary) {
6344 auto ReadSummaryFile =
6346 if (!ReadSummaryFile) {
6353 if (!ImportSummaryForTestingOrErr) {
6359 ImportSummaryForTesting = std::move(*ImportSummaryForTestingOrErr);
6360 ImportSummary = ImportSummaryForTesting.get();
6369 if (!processModule(M, OREGetter))
6386 IndexCallsiteContextGraph CCG(Index, isPrevailing);
6402 for (
auto &BB :
F) {
6403 for (
auto &
I : BB) {
6407 if (CI->hasFnAttr(
"memprof")) {
6408 CI->removeFnAttr(
"memprof");
6411 if (!CI->hasMetadata(LLVMContext::MD_callsite)) {
6412 assert(!CI->hasMetadata(LLVMContext::MD_memprof));
6418 CI->setMetadata(LLVMContext::MD_memprof,
nullptr);
6419 CI->setMetadata(LLVMContext::MD_callsite,
nullptr);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Prepare AGPR Alloc
Unify divergent function exit nodes
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
Module.h This file contains the declarations for the Module class.
Machine Check Debug Module
This file implements a map that provides insertion order iteration.
static cl::opt< unsigned > TailCallSearchDepth("memprof-tail-call-search-depth", cl::init(5), cl::Hidden, cl::desc("Max depth to recursively search for missing " "frames through tail calls."))
uint64_t ComputeHash(const FunctionSummary *FS, unsigned I)
static cl::opt< DotScope > DotGraphScope("memprof-dot-scope", cl::desc("Scope of graph to export to dot"), cl::Hidden, cl::init(DotScope::All), cl::values(clEnumValN(DotScope::All, "all", "Export full callsite graph"), clEnumValN(DotScope::Alloc, "alloc", "Export only nodes with contexts feeding given " "-memprof-dot-alloc-id"), clEnumValN(DotScope::Context, "context", "Export only nodes with given -memprof-dot-context-id")))
static cl::opt< bool > DoMergeIteration("memprof-merge-iteration", cl::init(true), cl::Hidden, cl::desc("Iteratively apply merging on a node to catch new callers"))
static bool isMemProfClone(const Function &F)
static cl::opt< unsigned > AllocIdForDot("memprof-dot-alloc-id", cl::init(0), cl::Hidden, cl::desc("Id of alloc to export if -memprof-dot-scope=alloc " "or to highlight if -memprof-dot-scope=all"))
static cl::opt< unsigned > ContextIdForDot("memprof-dot-context-id", cl::init(0), cl::Hidden, cl::desc("Id of context to export if -memprof-dot-scope=context or to " "highlight otherwise"))
static cl::opt< bool > ExportToDot("memprof-export-to-dot", cl::init(false), cl::Hidden, cl::desc("Export graph to dot files."))
static void checkEdge(const std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > &Edge)
static cl::opt< bool > AllowRecursiveCallsites("memprof-allow-recursive-callsites", cl::init(true), cl::Hidden, cl::desc("Allow cloning of callsites involved in recursive cycles"))
bool checkColdOrNotCold(uint8_t AllocType)
static ValueInfo findValueInfoForFunc(const Function &F, const Module &M, const ModuleSummaryIndex *ImportSummary, const Function *CallingFunc=nullptr)
static cl::opt< bool > CloneRecursiveContexts("memprof-clone-recursive-contexts", cl::init(true), cl::Hidden, cl::desc("Allow cloning of contexts through recursive cycles"))
static std::string getAllocTypeString(uint8_t AllocTypes)
static cl::opt< unsigned > MemProfICPNoInlineThreshold("memprof-icp-noinline-threshold", cl::init(2), cl::Hidden, cl::desc("Minimum absolute count for promoted target to be inlinable"))
bool DOTGraphTraits< constCallsiteContextGraph< DerivedCCG, FuncTy, CallTy > * >::DoHighlight
static unsigned getMemProfCloneNum(const Function &F)
static SmallVector< std::unique_ptr< ValueToValueMapTy >, 4 > createFunctionClones(Function &F, unsigned NumClones, Module &M, OptimizationRemarkEmitter &ORE, std::map< const Function *, SmallPtrSet< const GlobalAlias *, 1 > > &FuncToAliasMap, FunctionSummary *FS)
static cl::opt< bool > VerifyCCG("memprof-verify-ccg", cl::init(false), cl::Hidden, cl::desc("Perform verification checks on CallingContextGraph."))
static void checkNode(const ContextNode< DerivedCCG, FuncTy, CallTy > *Node, bool CheckEdges=true)
static cl::opt< bool > MergeClones("memprof-merge-clones", cl::init(true), cl::Hidden, cl::desc("Merge clones before assigning functions"))
static std::string getMemProfFuncName(Twine Base, unsigned CloneNo)
static cl::opt< std::string > MemProfImportSummary("memprof-import-summary", cl::desc("Import summary to use for testing the ThinLTO backend via opt"), cl::Hidden)
static const std::string MemProfCloneSuffix
static void updateSubprogramLinkageName(Function *NewFunc, StringRef Name)
static cl::opt< bool > AllowRecursiveContexts("memprof-allow-recursive-contexts", cl::init(true), cl::Hidden, cl::desc("Allow cloning of contexts having recursive cycles"))
static cl::opt< std::string > DotFilePathPrefix("memprof-dot-file-path-prefix", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path prefix of the MemProf dot files."))
static cl::opt< bool > VerifyNodes("memprof-verify-nodes", cl::init(false), cl::Hidden, cl::desc("Perform frequent verification checks on nodes."))
static void checkAllocContextIds(const AllocInfo &AllocNode, const MDNode *MemProfMD, const CallStack< MDNode, MDNode::op_iterator > &CallsiteContext, const ModuleSummaryIndex *ImportSummary)
static cl::opt< bool > DumpCCG("memprof-dump-ccg", cl::init(false), cl::Hidden, cl::desc("Dump CallingContextGraph to stdout after each stage."))
This is the interface to build a ModuleSummaryIndex for a module.
ModuleSummaryIndex.h This file contains the declarations the classes that hold the module index and s...
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
FunctionAnalysisManager FAM
std::pair< BasicBlock *, BasicBlock * > Edge
This file defines generic set operations that may be used on set's of different types,...
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
void print(OutputBuffer &OB) const
ValueInfo getAliaseeVI() const
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
bool empty() const
empty - Check if the array is empty.
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
void addFnAttr(Attribute::AttrKind Kind)
Adds the attribute to the function.
void setCalledOperand(Value *V)
Subprogram description. Uses SubclassData1.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
void reserve(size_type NumEntries)
Grow the densemap so that it can contain at least NumEntries items before resizing again.
Implements a dense probed hash-table based set.
Function summary information to aid decisions and implementation of importing.
FunctionType * getFunctionType() const
Returns the FunctionType for me.
DISubprogram * getSubprogram() const
Get the attached subprogram.
const Function & getFunction() const
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
static LLVM_ABI GlobalAlias * create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent)
If a parent module is specified, the alias is automatically inserted into the end of the specified mo...
Function and variable summary information to aid decisions and implementation of importing.
static LLVM_ABI GUID getGUIDAssumingExternalLinkage(StringRef GlobalName)
Return a 64-bit global unique ID constructed from the name of a global symbol.
static bool isLocalLinkage(LinkageTypes Linkage)
LLVM_ABI bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
uint64_t GUID
Declare a type to represent a global unique identifier for a global value.
Module * getParent()
Get the module that this global value is contained inside of...
LLVM_ABI void eraseFromParent()
This method unlinks 'this' from the containing module and deletes it.
static LLVM_ABI std::string getGlobalIdentifier(StringRef Name, GlobalValue::LinkageTypes Linkage, StringRef FileName)
Return the modified name for a global value suitable to be used as the key for a global lookup (e....
@ InternalLinkage
Rename collisions when linking (static functions).
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
const MDOperand & getOperand(unsigned I) const
ArrayRef< MDOperand > operands() const
unsigned getNumOperands() const
Return number of MDNode operands.
LLVM_ABI TempMDNode clone() const
Create a (temporary) clone of this.
static std::enable_if_t< std::is_base_of< MDNode, T >::value, T * > replaceWithUniqued(std::unique_ptr< T, TempMDNodeDeleter > N)
Replace a temporary node with a uniqued one.
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
This class implements a map that also provides access to all stored values in a deterministic order.
size_type count(const KeyT &Key) const
MemProfContextDisambiguation(const ModuleSummaryIndex *Summary=nullptr, bool isSamplePGO=false)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
Class to hold module path string table and global value map, and encapsulate methods for operating on...
static StringRef getOriginalNameBeforePromote(StringRef Name)
Helper to obtain the unpromoted name for a global value (or the original name if not promoted).
ValueInfo getValueInfo(const GlobalValueSummaryMapTy::value_type &R) const
Return a ValueInfo for the index value_type (convenient when iterating index).
uint64_t getStackIdAtIndex(unsigned Index) const
A Module instance is used to store all the information related to an LLVM module.
LLVMContext & getContext() const
Get the global data context.
A NodeSet contains a set of SUnit DAG nodes with additional information that assigns a priority to th...
A discriminated union of two or more pointer types, with the discriminator in the low bit of the poin...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
A class that wrap the SHA1 algorithm.
LLVM_ABI void update(ArrayRef< uint8_t > Data)
Digest more data.
LLVM_ABI std::array< uint8_t, 20 > result()
Return the current raw 160-bits SHA1 for the digested data since the last call to init().
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
std::pair< iterator, bool > insert(const ValueT &V)
void reserve(size_t Size)
Grow the DenseSet so that it can contain at least NumEntries items before resizing again.
void insert_range(Range &&R)
void swap(DenseSetImpl &RHS)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
bool erase(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
An efficient, type-erasing, non-owning reference to a callable.
Helper class to iterate through stack ids in both metadata (memprof MIB and callsite) and the corresp...
CallStackIterator beginAfterSharedPrefix(const CallStack &Other)
CallStackIterator end() const
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
@ CE
Windows NT (Windows on ARM)
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract(Y &&MD)
Extract a Value from Metadata, if any.
LLVM_ABI AllocationType getMIBAllocType(const MDNode *MIB)
Returns the allocation type from an MIB metadata node.
LLVM_ABI bool metadataMayIncludeContextSizeInfo()
Whether the alloc memprof metadata may include context size info for some MIBs (but possibly not all)...
LLVM_ABI bool hasSingleAllocType(uint8_t AllocTypes)
True if the AllocTypes bitmask contains just a single type.
LLVM_ABI std::string getAllocTypeAttributeString(AllocationType Type)
Returns the string to use in attributes with the given type.
LLVM_ABI MDNode * getMIBStackNode(const MDNode *MIB)
Returns the stack node from an MIB metadata node.
LLVM_ABI void removeAnyExistingAmbiguousAttribute(CallBase *CB)
Removes any existing "ambiguous" memprof attribute.
DiagnosticInfoOptimizationBase::Argument NV
LLVM_ABI CallBase & promoteIndirectCall(CallBase &CB, Function *F, uint64_t Count, uint64_t TotalCount, bool AttachProfToDirectCall, OptimizationRemarkEmitter *ORE)
NodeAddr< NodeBase * > Node
NodeAddr< FuncNode * > Func
friend class Instruction
Iterator for Instructions in a `BasicBlock.
uint64_t read64le(const void *P)
void write32le(void *P, uint32_t V)
This is an optimization pass for GlobalISel generic memory operations.
cl::opt< unsigned > MinClonedColdBytePercent("memprof-cloning-cold-threshold", cl::init(100), cl::Hidden, cl::desc("Min percent of cold bytes to hint alloc cold during cloning"))
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
LLVM_ABI void logAllUnhandledErrors(Error E, raw_ostream &OS, Twine ErrorBanner={})
Log all errors (if any) in E to OS.
FunctionAddr VTableAddr Value
void stable_sort(R &&Range)
cl::opt< bool > MemProfReportHintedSizes("memprof-report-hinted-sizes", cl::init(false), cl::Hidden, cl::desc("Report total allocation sizes of hinted allocations"))
LLVM_ABI bool isLegalToPromote(const CallBase &CB, Function *Callee, const char **FailureReason=nullptr)
Return true if the given indirect call site can be made to call Callee.
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI bool mayHaveMemprofSummary(const CallBase *CB)
Returns true if the instruction could have memprof metadata, used to ensure consistency between summa...
constexpr from_range_t from_range
static cl::opt< bool > MemProfRequireDefinitionForPromotion("memprof-require-definition-for-promotion", cl::init(false), cl::Hidden, cl::desc("Require target function definition when promoting indirect calls"))
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
cl::opt< unsigned > MemProfTopNImportant("memprof-top-n-important", cl::init(10), cl::Hidden, cl::desc("Number of largest cold contexts to consider important"))
bool set_is_subset(const S1Ty &S1, const S2Ty &S2)
set_is_subset(A, B) - Return true iff A in B
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
void set_subtract(S1Ty &S1, const S2Ty &S2)
set_subtract(A, B) - Compute A := A - B
InnerAnalysisManagerProxy< FunctionAnalysisManager, Module > FunctionAnalysisManagerModuleProxy
Provide the FunctionAnalysisManager to Module proxy.
raw_ostream & WriteGraph(raw_ostream &O, const GraphType &G, bool ShortNames=false, const Twine &Title="")
bool set_intersects(const S1Ty &S1, const S2Ty &S2)
set_intersects(A, B) - Return true iff A ^ B is non empty
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
LLVM_ABI Expected< std::unique_ptr< ModuleSummaryIndex > > getModuleSummaryIndex(MemoryBufferRef Buffer)
Parse the specified bitcode buffer, returning the module summary index.
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
cl::opt< unsigned > MaxSummaryIndirectEdges("module-summary-max-indirect-edges", cl::init(0), cl::Hidden, cl::desc("Max number of summary edges added from " "indirect call profile metadata"))
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
FunctionAddr VTableAddr Count
bool set_union(S1Ty &S1, const S2Ty &S2)
set_union(A, B) - Compute A := A u B, return whether A changed.
cl::opt< bool > SupportsHotColdNew
Indicate we are linking with an allocator that supports hot/cold operator new interfaces.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
S1Ty set_intersection(const S1Ty &S1, const S2Ty &S2)
set_intersection(A, B) - Return A ^ B
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
cl::opt< bool > EnableMemProfContextDisambiguation
Enable MemProf context disambiguation for thin link.
S1Ty set_difference(const S1Ty &S1, const S2Ty &S2)
set_difference(A, B) - Return A - B
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Expected< T > errorOrToExpected(ErrorOr< T > &&EO)
Convert an ErrorOr<T> to an Expected<T>.
ArrayRef(const T &OneElt) -> ArrayRef< T >
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
LLVM_ABI Function * CloneFunction(Function *F, ValueToValueMapTy &VMap, ClonedCodeInfo *CodeInfo=nullptr)
Return a copy of the specified function and add it to that function's module.
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
cl::opt< bool > MemProfFixupImportant("memprof-fixup-important", cl::init(true), cl::Hidden, cl::desc("Enables edge fixup for important contexts"))
DOTGraphTraits(bool IsSimple=false)
typename GTraits::NodeRef NodeRef
static std::string getEdgeAttributes(NodeRef, ChildIteratorType ChildIter, GraphType G)
const CallsiteContextGraph< DerivedCCG, FuncTy, CallTy > * GraphType
typename GTraits::ChildIteratorType ChildIteratorType
static std::string getNodeAttributes(NodeRef Node, GraphType G)
static bool isNodeHidden(NodeRef Node, GraphType G)
static std::string getNodeLabel(NodeRef Node, GraphType G)
GraphTraits< GraphType > GTraits
static NodeRef getNode(const NodePtrTy &P)
static const ContextNode< DerivedCCG, FuncTy, CallTy > * GetCallee(const EdgePtrTy &P)
static ChildIteratorType child_end(NodeRef N)
std::unique_ptr< ContextNode< DerivedCCG, FuncTy, CallTy > > NodePtrTy
mapped_iterator< typename std::vector< std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > >::const_iterator, decltype(&GetCallee)> ChildIteratorType
const CallsiteContextGraph< DerivedCCG, FuncTy, CallTy > * GraphType
const ContextNode< DerivedCCG, FuncTy, CallTy > * NodeRef
mapped_iterator< typename std::vector< NodePtrTy >::const_iterator, decltype(&getNode)> nodes_iterator
static ChildIteratorType child_begin(NodeRef N)
static NodeRef getEntryNode(GraphType G)
static nodes_iterator nodes_begin(GraphType G)
static nodes_iterator nodes_end(GraphType G)
std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > EdgePtrTy
Summary of memprof metadata on allocations.
std::vector< MIBInfo > MIBs
SmallVector< unsigned > StackIdIndices
SmallVector< unsigned > Clones
DOTGraphTraits - Template class that can be specialized to customize how graphs are converted to 'dot...
DefaultDOTGraphTraits(bool simple=false)
An information struct used to provide DenseMap with the various necessary components for a given valu...
typename GraphType::UnknownGraphTypeError NodeRef
Struct that holds a reference to a particular GUID in a global value summary.
ArrayRef< std::unique_ptr< GlobalValueSummary > > getSummaryList() const
GlobalValue::GUID getGUID() const
PointerUnion< CallsiteInfo *, AllocInfo * > SimpleType
static SimpleType getSimplifiedValue(IndexCall &Val)
const PointerUnion< CallsiteInfo *, AllocInfo * > SimpleType
static SimpleType getSimplifiedValue(const IndexCall &Val)
Define a template that can be specialized by smart pointers to reflect the fact that they are automat...