52#include <unordered_map>
57#define DEBUG_TYPE "memprof-context-disambiguation"
60 "Number of function clones created during whole program analysis");
62 "Number of function clones created during ThinLTO backend");
64 "Number of functions that had clones created during ThinLTO backend");
66 FunctionCloneDuplicatesThinBackend,
67 "Number of function clone duplicates detected during ThinLTO backend");
68STATISTIC(AllocTypeNotCold,
"Number of not cold static allocations (possibly "
69 "cloned) during whole program analysis");
70STATISTIC(AllocTypeCold,
"Number of cold static allocations (possibly cloned) "
71 "during whole program analysis");
73 "Number of not cold static allocations (possibly cloned) during "
75STATISTIC(AllocTypeColdThinBackend,
"Number of cold static allocations "
76 "(possibly cloned) during ThinLTO backend");
78 "Number of original (not cloned) allocations with memprof profiles "
79 "during ThinLTO backend");
81 AllocVersionsThinBackend,
82 "Number of allocation versions (including clones) during ThinLTO backend");
84 "Maximum number of allocation versions created for an original "
85 "allocation during ThinLTO backend");
87 "Number of unclonable ambigous allocations during ThinLTO backend");
89 "Number of edges removed due to mismatched callees (profiled vs IR)");
91 "Number of profiled callees found via tail calls");
93 "Aggregate depth of profiled callees found via tail calls");
95 "Maximum depth of profiled callees found via tail calls");
97 "Number of profiled callees found via multiple tail call chains");
98STATISTIC(DeferredBackedges,
"Number of backedges with deferred cloning");
99STATISTIC(NewMergedNodes,
"Number of new nodes created during merging");
100STATISTIC(NonNewMergedNodes,
"Number of non new nodes used during merging");
102 "Number of missing alloc nodes for context ids");
104 "Number of calls skipped during cloning due to unexpected operand");
106 "Number of callsites assigned to call multiple non-matching clones");
107STATISTIC(TotalMergeInvokes,
"Number of merge invocations for nodes");
108STATISTIC(TotalMergeIters,
"Number of merge iterations for nodes");
109STATISTIC(MaxMergeIters,
"Max merge iterations for nodes");
110STATISTIC(NumImportantContextIds,
"Number of important context ids");
111STATISTIC(NumFixupEdgeIdsInserted,
"Number of fixup edge ids inserted");
112STATISTIC(NumFixupEdgesAdded,
"Number of fixup edges added");
113STATISTIC(NumFixedContexts,
"Number of contexts with fixed edges");
118 cl::desc(
"Specify the path prefix of the MemProf dot files."));
122 cl::desc(
"Export graph to dot files."));
127 cl::desc(
"Iteratively apply merging on a node to catch new callers"));
137 "memprof-dot-scope",
cl::desc(
"Scope of graph to export to dot"),
142 "Export only nodes with contexts feeding given "
143 "-memprof-dot-alloc-id"),
145 "Export only nodes with given -memprof-dot-context-id")));
149 cl::desc(
"Id of alloc to export if -memprof-dot-scope=alloc "
150 "or to highlight if -memprof-dot-scope=all"));
154 cl::desc(
"Id of context to export if -memprof-dot-scope=context or to "
155 "highlight otherwise"));
159 cl::desc(
"Dump CallingContextGraph to stdout after each stage."));
163 cl::desc(
"Perform verification checks on CallingContextGraph."));
167 cl::desc(
"Perform frequent verification checks on nodes."));
170 "memprof-import-summary",
171 cl::desc(
"Import summary to use for testing the ThinLTO backend via opt"),
177 cl::desc(
"Max depth to recursively search for missing "
178 "frames through tail calls."));
183 cl::desc(
"Allow cloning of callsites involved in recursive cycles"));
187 cl::desc(
"Allow cloning of contexts through recursive cycles"));
194 cl::desc(
"Merge clones before assigning functions"));
203 cl::desc(
"Allow cloning of contexts having recursive cycles"));
209 cl::desc(
"Minimum absolute count for promoted target to be inlinable"));
220 cl::desc(
"Linking with hot/cold operator new interfaces"));
225 "Require target function definition when promoting indirect calls"));
232 cl::desc(
"Number of largest cold contexts to consider important"));
236 cl::desc(
"Enables edge fixup for important contexts"));
258template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
259class CallsiteContextGraph {
261 CallsiteContextGraph() =
default;
262 CallsiteContextGraph(
const CallsiteContextGraph &) =
default;
263 CallsiteContextGraph(CallsiteContextGraph &&) =
default;
270 void identifyClones();
277 bool assignFunctions();
284 const CallsiteContextGraph &CCG) {
290 const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>;
292 const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>;
294 void exportToDot(std::string Label)
const;
297 struct FuncInfo final
298 :
public std::pair<FuncTy *, unsigned > {
299 using Base = std::pair<FuncTy *, unsigned>;
301 FuncInfo(FuncTy *
F =
nullptr,
unsigned CloneNo = 0) :
Base(
F, CloneNo) {}
302 explicit operator bool()
const {
return this->first !=
nullptr; }
303 FuncTy *func()
const {
return this->first; }
304 unsigned cloneNo()
const {
return this->second; }
308 struct CallInfo final :
public std::pair<CallTy, unsigned > {
309 using Base = std::pair<CallTy, unsigned>;
311 CallInfo(CallTy
Call =
nullptr,
unsigned CloneNo = 0)
313 explicit operator bool()
const {
return (
bool)this->first; }
314 CallTy call()
const {
return this->first; }
315 unsigned cloneNo()
const {
return this->second; }
316 void setCloneNo(
unsigned N) { this->second =
N; }
318 if (!
operator bool()) {
324 OS <<
"\t(clone " << cloneNo() <<
")";
350 bool Recursive =
false;
377 std::vector<std::shared_ptr<ContextEdge>> CalleeEdges;
381 std::vector<std::shared_ptr<ContextEdge>> CallerEdges;
385 bool useCallerEdgesForContextInfo()
const {
390 assert(!CalleeEdges.empty() || CallerEdges.empty() || IsAllocation ||
408 for (
auto &Edge : CalleeEdges.empty() ? CallerEdges : CalleeEdges)
409 Count += Edge->getContextIds().size();
413 CalleeEdges, useCallerEdgesForContextInfo()
415 : std::vector<std::shared_ptr<ContextEdge>>());
416 for (
const auto &Edge : Edges)
423 uint8_t computeAllocType()
const {
428 CalleeEdges, useCallerEdgesForContextInfo()
430 : std::vector<std::shared_ptr<ContextEdge>>());
431 for (
const auto &Edge : Edges) {
442 bool emptyContextIds()
const {
444 CalleeEdges, useCallerEdgesForContextInfo()
446 : std::vector<std::shared_ptr<ContextEdge>>());
447 for (
const auto &Edge : Edges) {
448 if (!Edge->getContextIds().empty())
455 std::vector<ContextNode *> Clones;
458 ContextNode *CloneOf =
nullptr;
460 ContextNode(
bool IsAllocation) : IsAllocation(IsAllocation),
Call() {}
462 ContextNode(
bool IsAllocation, CallInfo
C)
463 : IsAllocation(IsAllocation),
Call(
C) {}
465 void addClone(ContextNode *Clone) {
467 CloneOf->Clones.push_back(Clone);
468 Clone->CloneOf = CloneOf;
470 Clones.push_back(Clone);
472 Clone->CloneOf =
this;
476 ContextNode *getOrigNode() {
483 unsigned int ContextId);
485 ContextEdge *findEdgeFromCallee(
const ContextNode *Callee);
486 ContextEdge *findEdgeFromCaller(
const ContextNode *Caller);
487 void eraseCalleeEdge(
const ContextEdge *Edge);
488 void eraseCallerEdge(
const ContextEdge *Edge);
490 void setCall(CallInfo
C) {
Call =
C; }
492 bool hasCall()
const {
return (
bool)
Call.call(); }
498 bool isRemoved()
const {
534 bool IsBackedge =
false;
541 : Callee(Callee), Caller(Caller), AllocTypes(
AllocType),
542 ContextIds(std::move(ContextIds)) {}
548 inline void clear() {
558 inline bool isRemoved()
const {
559 if (Callee || Caller)
580 void removeNoneTypeCalleeEdges(ContextNode *
Node);
581 void removeNoneTypeCallerEdges(ContextNode *
Node);
583 recursivelyRemoveNoneTypeCalleeEdges(ContextNode *
Node,
589 template <
class NodeT,
class IteratorT>
590 std::vector<uint64_t>
595 ContextNode *addAllocNode(CallInfo
Call,
const FuncTy *
F);
598 template <
class NodeT,
class IteratorT>
599 void addStackNodesForMIB(
603 std::map<uint64_t, uint32_t> &TotalSizeToContextIdTopNCold);
608 void updateStackNodes();
617 void fixupImportantContexts();
621 void handleCallsitesWithMultipleTargets();
624 void markBackedges();
634 bool partitionCallsByCallee(
636 std::vector<std::pair<CallInfo, ContextNode *>> &NewCallToNode);
643 std::map<const ContextNode *, const FuncTy *> NodeToCallingFunc;
650 using EdgeIter =
typename std::vector<std::shared_ptr<ContextEdge>>
::iterator;
655 struct CallContextInfo {
659 std::vector<uint64_t> StackIds;
673 void removeEdgeFromGraph(ContextEdge *Edge, EdgeIter *EI =
nullptr,
674 bool CalleeIter =
true);
682 void assignStackNodesPostOrder(
696 void propagateDuplicateContextIds(
702 void connectNewNode(ContextNode *NewNode, ContextNode *OrigNode,
710 return static_cast<const DerivedCCG *
>(
this)->getStackId(IdOrIndex);
720 calleesMatch(CallTy
Call, EdgeIter &EI,
725 const FuncTy *getCalleeFunc(CallTy
Call) {
726 return static_cast<DerivedCCG *
>(
this)->getCalleeFunc(
Call);
732 bool calleeMatchesFunc(
733 CallTy
Call,
const FuncTy *Func,
const FuncTy *CallerFunc,
734 std::vector<std::pair<CallTy, FuncTy *>> &FoundCalleeChain) {
735 return static_cast<DerivedCCG *
>(
this)->calleeMatchesFunc(
736 Call, Func, CallerFunc, FoundCalleeChain);
740 bool sameCallee(CallTy Call1, CallTy Call2) {
741 return static_cast<DerivedCCG *
>(
this)->sameCallee(Call1, Call2);
746 std::vector<uint64_t> getStackIdsWithContextNodesForCall(CallTy
Call) {
747 return static_cast<DerivedCCG *
>(
this)->getStackIdsWithContextNodesForCall(
753 return static_cast<DerivedCCG *
>(
this)->getLastStackId(
Call);
759 static_cast<DerivedCCG *
>(
this)->updateAllocationCall(
Call,
AllocType);
764 return static_cast<const DerivedCCG *
>(
this)->getAllocationCallType(
Call);
769 void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc) {
770 static_cast<DerivedCCG *
>(
this)->updateCall(CallerCall, CalleeFunc);
776 FuncInfo cloneFunctionForCallsite(
778 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
779 return static_cast<DerivedCCG *
>(
this)->cloneFunctionForCallsite(
780 Func,
Call, CallMap, CallsWithMetadataInFunc, CloneNo);
785 std::string getLabel(
const FuncTy *Func,
const CallTy
Call,
786 unsigned CloneNo)
const {
787 return static_cast<const DerivedCCG *
>(
this)->getLabel(Func,
Call, CloneNo);
791 ContextNode *createNewNode(
bool IsAllocation,
const FuncTy *
F =
nullptr,
792 CallInfo
C = CallInfo()) {
793 NodeOwner.push_back(std::make_unique<ContextNode>(IsAllocation,
C));
794 auto *NewNode = NodeOwner.back().get();
796 NodeToCallingFunc[NewNode] =
F;
797 NewNode->NodeId = NodeOwner.size();
802 ContextNode *getNodeForInst(
const CallInfo &
C);
803 ContextNode *getNodeForAlloc(
const CallInfo &
C);
804 ContextNode *getNodeForStackId(
uint64_t StackId);
826 moveEdgeToNewCalleeClone(
const std::shared_ptr<ContextEdge> &Edge,
833 void moveEdgeToExistingCalleeClone(
const std::shared_ptr<ContextEdge> &Edge,
834 ContextNode *NewCallee,
835 bool NewClone =
false,
843 void moveCalleeEdgeToNewCaller(
const std::shared_ptr<ContextEdge> &Edge,
844 ContextNode *NewCaller);
855 void mergeNodeCalleeClones(
860 void findOtherCallersToShareMerge(
861 ContextNode *
Node, std::vector<std::shared_ptr<ContextEdge>> &CalleeEdges,
889 struct ImportantContextInfo {
891 std::vector<uint64_t> StackIds;
894 unsigned MaxLength = 0;
898 std::map<std::vector<uint64_t>, ContextNode *> StackIdsToNode;
907 void recordStackNode(std::vector<uint64_t> &StackIds, ContextNode *
Node,
921 auto Size = StackIds.size();
922 for (
auto Id : Ids) {
923 auto &Entry = ImportantContextIdInfo[Id];
924 Entry.StackIdsToNode[StackIds] =
Node;
926 if (
Size > Entry.MaxLength)
927 Entry.MaxLength =
Size;
936 std::vector<std::unique_ptr<ContextNode>> NodeOwner;
942 unsigned int LastContextId = 0;
945template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
947 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode;
948template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
950 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge;
951template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
953 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::FuncInfo;
954template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
956 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::CallInfo;
959class ModuleCallsiteContextGraph
960 :
public CallsiteContextGraph<ModuleCallsiteContextGraph, Function,
963 ModuleCallsiteContextGraph(
965 llvm::function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter);
968 friend CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
971 uint64_t getStackId(uint64_t IdOrIndex)
const;
973 bool calleeMatchesFunc(
974 Instruction *
Call,
const Function *Func,
const Function *CallerFunc,
975 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain);
976 bool sameCallee(Instruction *Call1, Instruction *Call2);
977 bool findProfiledCalleeThroughTailCalls(
978 const Function *ProfiledCallee,
Value *CurCallee,
unsigned Depth,
979 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain,
980 bool &FoundMultipleCalleeChains);
981 uint64_t getLastStackId(Instruction *
Call);
982 std::vector<uint64_t> getStackIdsWithContextNodesForCall(Instruction *
Call);
985 void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc);
986 CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
988 cloneFunctionForCallsite(FuncInfo &Func, CallInfo &
Call,
989 DenseMap<CallInfo, CallInfo> &CallMap,
990 std::vector<CallInfo> &CallsWithMetadataInFunc,
992 std::string getLabel(
const Function *Func,
const Instruction *
Call,
993 unsigned CloneNo)
const;
996 llvm::function_ref<OptimizationRemarkEmitter &(
Function *)> OREGetter;
1002struct IndexCall :
public PointerUnion<CallsiteInfo *, AllocInfo *> {
1003 IndexCall() : PointerUnion() {}
1004 IndexCall(std::nullptr_t) : IndexCall() {}
1005 IndexCall(CallsiteInfo *StackNode) : PointerUnion(StackNode) {}
1006 IndexCall(AllocInfo *AllocNode) : PointerUnion(AllocNode) {}
1007 IndexCall(PointerUnion PT) : PointerUnion(PT) {}
1009 IndexCall *operator->() {
return this; }
1011 void print(raw_ostream &OS)
const {
1012 PointerUnion<CallsiteInfo *, AllocInfo *>
Base = *
this;
1037class IndexCallsiteContextGraph
1038 :
public CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
1041 IndexCallsiteContextGraph(
1042 ModuleSummaryIndex &Index,
1046 ~IndexCallsiteContextGraph() {
1051 for (
auto &
I : FunctionCalleesToSynthesizedCallsiteInfos) {
1053 for (
auto &Callsite :
I.second)
1054 FS->addCallsite(*Callsite.second);
1059 friend CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
1062 uint64_t getStackId(uint64_t IdOrIndex)
const;
1063 const FunctionSummary *getCalleeFunc(IndexCall &
Call);
1064 bool calleeMatchesFunc(
1065 IndexCall &
Call,
const FunctionSummary *Func,
1066 const FunctionSummary *CallerFunc,
1067 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain);
1068 bool sameCallee(IndexCall &Call1, IndexCall &Call2);
1069 bool findProfiledCalleeThroughTailCalls(
1070 ValueInfo ProfiledCallee, ValueInfo CurCallee,
unsigned Depth,
1071 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain,
1072 bool &FoundMultipleCalleeChains);
1073 uint64_t getLastStackId(IndexCall &
Call);
1074 std::vector<uint64_t> getStackIdsWithContextNodesForCall(IndexCall &
Call);
1077 void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc);
1078 CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
1079 IndexCall>::FuncInfo
1080 cloneFunctionForCallsite(FuncInfo &Func, CallInfo &
Call,
1081 DenseMap<CallInfo, CallInfo> &CallMap,
1082 std::vector<CallInfo> &CallsWithMetadataInFunc,
1084 std::string getLabel(
const FunctionSummary *Func,
const IndexCall &
Call,
1085 unsigned CloneNo)
const;
1089 std::map<const FunctionSummary *, ValueInfo> FSToVIMap;
1091 const ModuleSummaryIndex &Index;
1099 std::unordered_map<FunctionSummary *,
1100 std::map<ValueInfo, std::unique_ptr<CallsiteInfo>>>
1101 FunctionCalleesToSynthesizedCallsiteInfos;
1112 :
public DenseMapInfo<std::pair<IndexCall, unsigned>> {};
1115 :
public DenseMapInfo<PointerUnion<CallsiteInfo *, AllocInfo *>> {};
1136template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1137bool allocTypesMatch(
1138 const std::vector<uint8_t> &InAllocTypes,
1139 const std::vector<std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>>
1143 assert(InAllocTypes.size() == Edges.size());
1145 InAllocTypes.begin(), InAllocTypes.end(), Edges.begin(), Edges.end(),
1147 const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &r) {
1151 if (l == (uint8_t)AllocationType::None ||
1152 r->AllocTypes == (uint8_t)AllocationType::None)
1154 return allocTypeToUse(l) == allocTypeToUse(r->AllocTypes);
1163template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1164bool allocTypesMatchClone(
1165 const std::vector<uint8_t> &InAllocTypes,
1166 const ContextNode<DerivedCCG, FuncTy, CallTy> *Clone) {
1167 const ContextNode<DerivedCCG, FuncTy, CallTy> *
Node = Clone->CloneOf;
1171 assert(InAllocTypes.size() ==
Node->CalleeEdges.size());
1175 for (
const auto &
E : Clone->CalleeEdges) {
1177 EdgeCalleeMap[
E->Callee] =
E->AllocTypes;
1181 for (
unsigned I = 0;
I <
Node->CalleeEdges.size();
I++) {
1182 auto Iter = EdgeCalleeMap.
find(
Node->CalleeEdges[
I]->Callee);
1184 if (Iter == EdgeCalleeMap.
end())
1192 if (allocTypeToUse(Iter->second) != allocTypeToUse(InAllocTypes[
I]))
1200template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1201typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1202CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForInst(
1203 const CallInfo &
C) {
1204 ContextNode *
Node = getNodeForAlloc(
C);
1208 return NonAllocationCallToContextNodeMap.lookup(
C);
1211template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1212typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1213CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForAlloc(
1214 const CallInfo &
C) {
1215 return AllocationCallToContextNodeMap.lookup(
C);
1218template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1219typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1220CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForStackId(
1222 auto StackEntryNode = StackEntryIdToContextNodeMap.find(StackId);
1223 if (StackEntryNode != StackEntryIdToContextNodeMap.end())
1224 return StackEntryNode->second;
1228template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1229void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1231 unsigned int ContextId) {
1232 for (
auto &
Edge : CallerEdges) {
1233 if (
Edge->Caller == Caller) {
1235 Edge->getContextIds().insert(ContextId);
1239 std::shared_ptr<ContextEdge>
Edge = std::make_shared<ContextEdge>(
1240 this, Caller, (uint8_t)
AllocType, DenseSet<uint32_t>({ContextId}));
1241 CallerEdges.push_back(
Edge);
1245template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1246void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::removeEdgeFromGraph(
1247 ContextEdge *
Edge, EdgeIter *EI,
bool CalleeIter) {
1263 auto CalleeCallerCount =
Callee->CallerEdges.size();
1264 auto CallerCalleeCount =
Caller->CalleeEdges.size();
1269 }
else if (CalleeIter) {
1271 *EI =
Caller->CalleeEdges.erase(*EI);
1274 *EI =
Callee->CallerEdges.erase(*EI);
1276 assert(
Callee->CallerEdges.size() < CalleeCallerCount);
1277 assert(
Caller->CalleeEdges.size() < CallerCalleeCount);
1280template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1281void CallsiteContextGraph<
1282 DerivedCCG, FuncTy, CallTy>::removeNoneTypeCalleeEdges(ContextNode *Node) {
1283 for (
auto EI =
Node->CalleeEdges.begin(); EI !=
Node->CalleeEdges.end();) {
1285 if (
Edge->AllocTypes == (uint8_t)AllocationType::None) {
1287 removeEdgeFromGraph(
Edge.get(), &EI,
true);
1293template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1294void CallsiteContextGraph<
1295 DerivedCCG, FuncTy, CallTy>::removeNoneTypeCallerEdges(ContextNode *Node) {
1296 for (
auto EI =
Node->CallerEdges.begin(); EI !=
Node->CallerEdges.end();) {
1298 if (
Edge->AllocTypes == (uint8_t)AllocationType::None) {
1300 Edge->Caller->eraseCalleeEdge(
Edge.get());
1301 EI =
Node->CallerEdges.erase(EI);
1307template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1308typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge *
1309CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1310 findEdgeFromCallee(
const ContextNode *Callee) {
1311 for (
const auto &
Edge : CalleeEdges)
1312 if (
Edge->Callee == Callee)
1317template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1318typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge *
1319CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1320 findEdgeFromCaller(
const ContextNode *Caller) {
1321 for (
const auto &
Edge : CallerEdges)
1322 if (
Edge->Caller == Caller)
1327template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1328void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1329 eraseCalleeEdge(
const ContextEdge *
Edge) {
1331 CalleeEdges, [
Edge](
const std::shared_ptr<ContextEdge> &CalleeEdge) {
1332 return CalleeEdge.get() ==
Edge;
1334 assert(EI != CalleeEdges.end());
1335 CalleeEdges.erase(EI);
1338template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1339void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1340 eraseCallerEdge(
const ContextEdge *
Edge) {
1342 CallerEdges, [
Edge](
const std::shared_ptr<ContextEdge> &CallerEdge) {
1343 return CallerEdge.get() ==
Edge;
1345 assert(EI != CallerEdges.end());
1346 CallerEdges.erase(EI);
1349template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1350uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::computeAllocType(
1351 DenseSet<uint32_t> &ContextIds)
const {
1353 (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
1354 uint8_t
AllocType = (uint8_t)AllocationType::None;
1355 for (
auto Id : ContextIds) {
1356 AllocType |= (uint8_t)ContextIdToAllocationType.at(Id);
1364template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1366CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypesImpl(
1367 const DenseSet<uint32_t> &Node1Ids,
1368 const DenseSet<uint32_t> &Node2Ids)
const {
1370 (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
1371 uint8_t
AllocType = (uint8_t)AllocationType::None;
1372 for (
auto Id : Node1Ids) {
1373 if (!Node2Ids.
count(Id))
1375 AllocType |= (uint8_t)ContextIdToAllocationType.at(Id);
1383template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1384uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypes(
1385 const DenseSet<uint32_t> &Node1Ids,
1386 const DenseSet<uint32_t> &Node2Ids)
const {
1387 if (Node1Ids.
size() < Node2Ids.
size())
1388 return intersectAllocTypesImpl(Node1Ids, Node2Ids);
1390 return intersectAllocTypesImpl(Node2Ids, Node1Ids);
1393template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1394typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1395CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addAllocNode(
1396 CallInfo
Call,
const FuncTy *
F) {
1398 ContextNode *AllocNode = createNewNode(
true,
F,
Call);
1399 AllocationCallToContextNodeMap[
Call] = AllocNode;
1401 AllocNode->OrigStackOrAllocId = LastContextId;
1404 AllocNode->AllocTypes = (uint8_t)AllocationType::None;
1420template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1421template <
class NodeT,
class IteratorT>
1422void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addStackNodesForMIB(
1423 ContextNode *AllocNode, CallStack<NodeT, IteratorT> &StackContext,
1426 std::map<uint64_t, uint32_t> &TotalSizeToContextIdTopNCold) {
1432 ContextIdToAllocationType[++LastContextId] =
AllocType;
1434 bool IsImportant =
false;
1435 if (!ContextSizeInfo.
empty()) {
1436 auto &
Entry = ContextIdToContextSizeInfos[LastContextId];
1440 uint64_t TotalCold = 0;
1441 for (
auto &CSI : ContextSizeInfo)
1442 TotalCold += CSI.TotalSize;
1448 TotalCold > TotalSizeToContextIdTopNCold.begin()->first) {
1451 auto IdToRemove = TotalSizeToContextIdTopNCold.begin()->second;
1452 TotalSizeToContextIdTopNCold.erase(
1453 TotalSizeToContextIdTopNCold.begin());
1454 assert(ImportantContextIdInfo.count(IdToRemove));
1455 ImportantContextIdInfo.erase(IdToRemove);
1457 TotalSizeToContextIdTopNCold[TotalCold] = LastContextId;
1461 Entry.insert(
Entry.begin(), ContextSizeInfo.begin(), ContextSizeInfo.end());
1465 AllocNode->AllocTypes |= (uint8_t)
AllocType;
1470 ContextNode *PrevNode = AllocNode;
1474 SmallSet<uint64_t, 8> StackIdSet;
1477 ContextIter != StackContext.
end(); ++ContextIter) {
1478 auto StackId = getStackId(*ContextIter);
1480 ImportantContextIdInfo[LastContextId].StackIds.push_back(StackId);
1481 ContextNode *StackNode = getNodeForStackId(StackId);
1483 StackNode = createNewNode(
false);
1484 StackEntryIdToContextNodeMap[StackId] = StackNode;
1485 StackNode->OrigStackOrAllocId = StackId;
1490 auto Ins = StackIdSet.
insert(StackId);
1492 StackNode->Recursive =
true;
1494 StackNode->AllocTypes |= (uint8_t)
AllocType;
1495 PrevNode->addOrUpdateCallerEdge(StackNode,
AllocType, LastContextId);
1496 PrevNode = StackNode;
1500template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1502CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::duplicateContextIds(
1503 const DenseSet<uint32_t> &StackSequenceContextIds,
1504 DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds) {
1505 DenseSet<uint32_t> NewContextIds;
1506 for (
auto OldId : StackSequenceContextIds) {
1507 NewContextIds.
insert(++LastContextId);
1508 OldToNewContextIds[OldId].insert(LastContextId);
1509 assert(ContextIdToAllocationType.count(OldId));
1511 ContextIdToAllocationType[LastContextId] = ContextIdToAllocationType[OldId];
1512 auto CSI = ContextIdToContextSizeInfos.find(OldId);
1513 if (CSI != ContextIdToContextSizeInfos.end())
1514 ContextIdToContextSizeInfos[LastContextId] = CSI->second;
1515 if (DotAllocContextIds.
contains(OldId))
1516 DotAllocContextIds.
insert(LastContextId);
1518 return NewContextIds;
1521template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1522void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
1523 propagateDuplicateContextIds(
1524 const DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds) {
1526 auto GetNewIds = [&OldToNewContextIds](
const DenseSet<uint32_t> &ContextIds) {
1527 DenseSet<uint32_t> NewIds;
1528 for (
auto Id : ContextIds)
1529 if (
auto NewId = OldToNewContextIds.find(Id);
1530 NewId != OldToNewContextIds.end())
1536 auto UpdateCallers = [&](ContextNode *
Node,
1537 DenseSet<const ContextEdge *> &Visited,
1538 auto &&UpdateCallers) ->
void {
1539 for (
const auto &
Edge :
Node->CallerEdges) {
1543 ContextNode *NextNode =
Edge->Caller;
1544 DenseSet<uint32_t> NewIdsToAdd = GetNewIds(
Edge->getContextIds());
1547 if (!NewIdsToAdd.
empty()) {
1548 Edge->getContextIds().insert_range(NewIdsToAdd);
1549 UpdateCallers(NextNode, Visited, UpdateCallers);
1554 DenseSet<const ContextEdge *> Visited;
1555 for (
auto &Entry : AllocationCallToContextNodeMap) {
1557 UpdateCallers(Node, Visited, UpdateCallers);
1561template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1562void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::connectNewNode(
1563 ContextNode *NewNode, ContextNode *OrigNode,
bool TowardsCallee,
1566 DenseSet<uint32_t> RemainingContextIds) {
1568 TowardsCallee ? OrigNode->CalleeEdges : OrigNode->CallerEdges;
1569 DenseSet<uint32_t> RecursiveContextIds;
1570 DenseSet<uint32_t> AllCallerContextIds;
1575 for (
auto &CE : OrigEdges) {
1576 AllCallerContextIds.
reserve(
CE->getContextIds().size());
1577 for (
auto Id :
CE->getContextIds())
1578 if (!AllCallerContextIds.
insert(Id).second)
1579 RecursiveContextIds.
insert(Id);
1583 for (
auto EI = OrigEdges.begin(); EI != OrigEdges.end();) {
1585 DenseSet<uint32_t> NewEdgeContextIds;
1586 DenseSet<uint32_t> NotFoundContextIds;
1590 set_subtract(
Edge->getContextIds(), RemainingContextIds, NewEdgeContextIds,
1591 NotFoundContextIds);
1594 if (RecursiveContextIds.
empty()) {
1597 RemainingContextIds.
swap(NotFoundContextIds);
1607 DenseSet<uint32_t> NonRecursiveRemainingCurEdgeIds =
1609 set_subtract(RemainingContextIds, NonRecursiveRemainingCurEdgeIds);
1612 if (NewEdgeContextIds.
empty()) {
1616 if (TowardsCallee) {
1617 uint8_t NewAllocType = computeAllocType(NewEdgeContextIds);
1618 auto NewEdge = std::make_shared<ContextEdge>(
1619 Edge->Callee, NewNode, NewAllocType, std::move(NewEdgeContextIds));
1620 NewNode->CalleeEdges.push_back(NewEdge);
1621 NewEdge->Callee->CallerEdges.push_back(NewEdge);
1623 uint8_t NewAllocType = computeAllocType(NewEdgeContextIds);
1624 auto NewEdge = std::make_shared<ContextEdge>(
1625 NewNode,
Edge->Caller, NewAllocType, std::move(NewEdgeContextIds));
1626 NewNode->CallerEdges.push_back(NewEdge);
1627 NewEdge->Caller->CalleeEdges.push_back(NewEdge);
1630 if (
Edge->getContextIds().empty()) {
1631 removeEdgeFromGraph(
Edge.get(), &EI, TowardsCallee);
1638template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1640 const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &Edge) {
1644 assert(!Edge->ContextIds.empty());
1647template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1649 bool CheckEdges =
true) {
1650 if (
Node->isRemoved())
1654 auto NodeContextIds =
Node->getContextIds();
1658 if (
Node->CallerEdges.size()) {
1660 Node->CallerEdges.front()->ContextIds);
1664 set_union(CallerEdgeContextIds, Edge->ContextIds);
1671 NodeContextIds == CallerEdgeContextIds ||
1674 if (
Node->CalleeEdges.size()) {
1676 Node->CalleeEdges.front()->ContextIds);
1680 set_union(CalleeEdgeContextIds, Edge->getContextIds());
1686 NodeContextIds == CalleeEdgeContextIds);
1695 for (
const auto &
E :
Node->CalleeEdges)
1701template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1702void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
1703 assignStackNodesPostOrder(ContextNode *Node,
1704 DenseSet<const ContextNode *> &Visited,
1705 DenseMap<uint64_t, std::vector<CallContextInfo>>
1706 &StackIdToMatchingCalls,
1707 DenseMap<CallInfo, CallInfo> &CallToMatchingCall,
1708 const DenseSet<uint32_t> &ImportantContextIds) {
1716 auto CallerEdges =
Node->CallerEdges;
1717 for (
auto &
Edge : CallerEdges) {
1719 if (
Edge->isRemoved()) {
1723 assignStackNodesPostOrder(
Edge->Caller, Visited, StackIdToMatchingCalls,
1724 CallToMatchingCall, ImportantContextIds);
1733 if (
Node->IsAllocation ||
1734 !StackIdToMatchingCalls.count(
Node->OrigStackOrAllocId))
1737 auto &Calls = StackIdToMatchingCalls[
Node->OrigStackOrAllocId];
1741 if (Calls.size() == 1) {
1742 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[0];
1743 if (Ids.size() == 1) {
1744 assert(SavedContextIds.empty());
1746 assert(Node == getNodeForStackId(Ids[0]));
1747 if (
Node->Recursive)
1750 NonAllocationCallToContextNodeMap[
Call] =
Node;
1752 recordStackNode(Ids, Node,
Node->getContextIds(), ImportantContextIds);
1760 uint64_t LastId =
Node->OrigStackOrAllocId;
1761 ContextNode *LastNode = getNodeForStackId(LastId);
1764 assert(LastNode == Node);
1766 ContextNode *LastNode =
Node;
1771 DenseSet<uint32_t> LastNodeContextIds = LastNode->getContextIds();
1773 [[maybe_unused]]
bool PrevIterCreatedNode =
false;
1774 bool CreatedNode =
false;
1775 for (
unsigned I = 0;
I < Calls.size();
1776 I++, PrevIterCreatedNode = CreatedNode) {
1777 CreatedNode =
false;
1778 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[
I];
1781 if (SavedContextIds.empty()) {
1788 auto MatchingCall = CallToMatchingCall[
Call];
1789 if (!NonAllocationCallToContextNodeMap.contains(MatchingCall)) {
1793 assert(
I > 0 && !PrevIterCreatedNode);
1796 NonAllocationCallToContextNodeMap[MatchingCall]->MatchingCalls.push_back(
1801 assert(LastId == Ids.back());
1810 ContextNode *PrevNode = LastNode;
1814 for (
auto IdIter = Ids.rbegin() + 1; IdIter != Ids.rend(); IdIter++) {
1816 ContextNode *CurNode = getNodeForStackId(Id);
1820 assert(!CurNode->Recursive);
1822 auto *
Edge = CurNode->findEdgeFromCaller(PrevNode);
1834 if (SavedContextIds.empty()) {
1843 ContextNode *NewNode = createNewNode(
false, Func,
Call);
1844 NonAllocationCallToContextNodeMap[
Call] = NewNode;
1846 NewNode->AllocTypes = computeAllocType(SavedContextIds);
1848 ContextNode *FirstNode = getNodeForStackId(Ids[0]);
1854 connectNewNode(NewNode, FirstNode,
true, SavedContextIds);
1859 connectNewNode(NewNode, LastNode,
false, SavedContextIds);
1864 for (
auto Id : Ids) {
1865 ContextNode *CurNode = getNodeForStackId(Id);
1872 auto *PrevEdge = CurNode->findEdgeFromCallee(PrevNode);
1879 set_subtract(PrevEdge->getContextIds(), SavedContextIds);
1880 if (PrevEdge->getContextIds().empty())
1881 removeEdgeFromGraph(PrevEdge);
1886 CurNode->AllocTypes = CurNode->CalleeEdges.empty()
1887 ? (uint8_t)AllocationType::None
1888 : CurNode->computeAllocType();
1892 recordStackNode(Ids, NewNode, SavedContextIds, ImportantContextIds);
1896 for (
auto Id : Ids) {
1897 ContextNode *CurNode = getNodeForStackId(Id);
1906template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1907void CallsiteContextGraph<DerivedCCG, FuncTy,
1908 CallTy>::fixupImportantContexts() {
1909 if (ImportantContextIdInfo.empty())
1913 NumImportantContextIds = ImportantContextIdInfo.size();
1919 exportToDot(
"beforestackfixup");
1944 for (
auto &[CurContextId,
Info] : ImportantContextIdInfo) {
1945 if (
Info.StackIdsToNode.empty())
1948 ContextNode *PrevNode =
nullptr;
1949 ContextNode *CurNode =
nullptr;
1950 DenseSet<const ContextEdge *> VisitedEdges;
1951 ArrayRef<uint64_t> AllStackIds(
Info.StackIds);
1954 for (
unsigned I = 0;
I < AllStackIds.size();
I++, PrevNode = CurNode) {
1958 auto LenToEnd = AllStackIds.size() -
I;
1966 auto CheckStackIds = AllStackIds.slice(
I, Len);
1967 auto EntryIt =
Info.StackIdsToNode.find(CheckStackIds);
1968 if (EntryIt ==
Info.StackIdsToNode.end())
1970 CurNode = EntryIt->second;
1987 auto *CurEdge = PrevNode->findEdgeFromCaller(CurNode);
1990 if (CurEdge->getContextIds().insert(CurContextId).second) {
1991 NumFixupEdgeIdsInserted++;
1996 NumFixupEdgesAdded++;
1997 DenseSet<uint32_t> ContextIds({CurContextId});
1998 auto AllocType = computeAllocType(ContextIds);
1999 auto NewEdge = std::make_shared<ContextEdge>(
2000 PrevNode, CurNode,
AllocType, std::move(ContextIds));
2001 PrevNode->CallerEdges.push_back(NewEdge);
2002 CurNode->CalleeEdges.push_back(NewEdge);
2004 CurEdge = NewEdge.get();
2007 VisitedEdges.
insert(CurEdge);
2010 for (
auto &
Edge : PrevNode->CallerEdges) {
2014 Edge->getContextIds().erase(CurContextId);
2022template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2023void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
2031 DenseMap<uint64_t, std::vector<CallContextInfo>> StackIdToMatchingCalls;
2032 for (
auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) {
2033 for (
auto &
Call : CallsWithMetadata) {
2035 if (AllocationCallToContextNodeMap.count(
Call))
2037 auto StackIdsWithContextNodes =
2038 getStackIdsWithContextNodesForCall(
Call.call());
2041 if (StackIdsWithContextNodes.empty())
2045 StackIdToMatchingCalls[StackIdsWithContextNodes.back()].push_back(
2046 {
Call.call(), StackIdsWithContextNodes,
Func, {}});
2056 DenseMap<uint32_t, DenseSet<uint32_t>> OldToNewContextIds;
2060 DenseMap<CallInfo, CallInfo> CallToMatchingCall;
2061 for (
auto &It : StackIdToMatchingCalls) {
2062 auto &Calls = It.getSecond();
2064 if (Calls.size() == 1) {
2065 auto &Ids = Calls[0].StackIds;
2066 if (Ids.size() == 1)
2079 DenseMap<const FuncTy *, unsigned> FuncToIndex;
2080 for (
const auto &[Idx, CallCtxInfo] :
enumerate(Calls))
2081 FuncToIndex.
insert({CallCtxInfo.Func, Idx});
2084 [&FuncToIndex](
const CallContextInfo &
A,
const CallContextInfo &
B) {
2085 return A.StackIds.size() >
B.StackIds.size() ||
2086 (
A.StackIds.size() ==
B.StackIds.size() &&
2087 (
A.StackIds <
B.StackIds ||
2088 (
A.StackIds ==
B.StackIds &&
2089 FuncToIndex[
A.Func] < FuncToIndex[
B.Func])));
2095 uint64_t LastId = It.getFirst();
2096 ContextNode *LastNode = getNodeForStackId(LastId);
2100 if (LastNode->Recursive)
2105 DenseSet<uint32_t> LastNodeContextIds = LastNode->getContextIds();
2113 DenseSet<const FuncTy *> MatchingIdsFuncSet;
2116 for (
unsigned I = 0;
I < Calls.size();
I++) {
2117 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[
I];
2118 assert(SavedContextIds.empty());
2119 assert(LastId == Ids.back());
2124 if (
I > 0 && Ids != Calls[
I - 1].StackIds)
2125 MatchingIdsFuncSet.
clear();
2132 DenseSet<uint32_t> StackSequenceContextIds = LastNodeContextIds;
2134 ContextNode *PrevNode = LastNode;
2135 ContextNode *CurNode = LastNode;
2140 for (
auto IdIter = Ids.rbegin() + 1; IdIter != Ids.rend(); IdIter++) {
2142 CurNode = getNodeForStackId(Id);
2146 if (CurNode->Recursive) {
2151 auto *
Edge = CurNode->findEdgeFromCaller(PrevNode);
2172 if (StackSequenceContextIds.
empty()) {
2185 if (Ids.back() != getLastStackId(
Call)) {
2186 for (
const auto &PE : LastNode->CallerEdges) {
2187 set_subtract(StackSequenceContextIds, PE->getContextIds());
2188 if (StackSequenceContextIds.
empty())
2192 if (StackSequenceContextIds.
empty())
2204 MatchingIdsFuncSet.
insert(Func);
2211 bool DuplicateContextIds =
false;
2212 for (
unsigned J =
I + 1; J < Calls.size(); J++) {
2213 auto &CallCtxInfo = Calls[J];
2214 auto &NextIds = CallCtxInfo.StackIds;
2217 auto *NextFunc = CallCtxInfo.Func;
2218 if (NextFunc != Func) {
2221 DuplicateContextIds =
true;
2224 auto &NextCall = CallCtxInfo.Call;
2225 CallToMatchingCall[NextCall] =
Call;
2236 OldToNewContextIds.
reserve(OldToNewContextIds.
size() +
2237 StackSequenceContextIds.
size());
2240 ? duplicateContextIds(StackSequenceContextIds, OldToNewContextIds)
2241 : StackSequenceContextIds;
2242 assert(!SavedContextIds.empty());
2244 if (!DuplicateContextIds) {
2248 set_subtract(LastNodeContextIds, StackSequenceContextIds);
2249 if (LastNodeContextIds.
empty())
2256 propagateDuplicateContextIds(OldToNewContextIds);
2266 DenseSet<const ContextNode *> Visited;
2268 ImportantContextIdInfo.keys());
2269 for (
auto &Entry : AllocationCallToContextNodeMap)
2270 assignStackNodesPostOrder(
Entry.second, Visited, StackIdToMatchingCalls,
2271 CallToMatchingCall, ImportantContextIds);
2273 fixupImportantContexts();
2279uint64_t ModuleCallsiteContextGraph::getLastStackId(Instruction *
Call) {
2280 CallStack<MDNode, MDNode::op_iterator> CallsiteContext(
2282 return CallsiteContext.
back();
2285uint64_t IndexCallsiteContextGraph::getLastStackId(IndexCall &
Call) {
2287 CallStack<CallsiteInfo, SmallVector<unsigned>::const_iterator>
2290 return Index.getStackIdAtIndex(CallsiteContext.
back());
2312 auto Pos =
F.getName().find_last_of(
'.');
2315 bool Err =
F.getName().drop_front(Pos + 1).getAsInteger(10, CloneNo);
2321std::string ModuleCallsiteContextGraph::getLabel(
const Function *Func,
2322 const Instruction *
Call,
2323 unsigned CloneNo)
const {
2329std::string IndexCallsiteContextGraph::getLabel(
const FunctionSummary *Func,
2330 const IndexCall &
Call,
2331 unsigned CloneNo)
const {
2332 auto VI = FSToVIMap.find(Func);
2333 assert(VI != FSToVIMap.end());
2336 return CallerName +
" -> alloc";
2339 return CallerName +
" -> " +
2341 Callsite->Clones[CloneNo]);
2345std::vector<uint64_t>
2346ModuleCallsiteContextGraph::getStackIdsWithContextNodesForCall(
2347 Instruction *
Call) {
2348 CallStack<MDNode, MDNode::op_iterator> CallsiteContext(
2350 return getStackIdsWithContextNodes<MDNode, MDNode::op_iterator>(
2354std::vector<uint64_t>
2355IndexCallsiteContextGraph::getStackIdsWithContextNodesForCall(IndexCall &
Call) {
2357 CallStack<CallsiteInfo, SmallVector<unsigned>::const_iterator>
2359 return getStackIdsWithContextNodes<CallsiteInfo,
2360 SmallVector<unsigned>::const_iterator>(
2364template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2365template <
class NodeT,
class IteratorT>
2366std::vector<uint64_t>
2367CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getStackIdsWithContextNodes(
2368 CallStack<NodeT, IteratorT> &CallsiteContext) {
2369 std::vector<uint64_t> StackIds;
2370 for (
auto IdOrIndex : CallsiteContext) {
2371 auto StackId = getStackId(IdOrIndex);
2372 ContextNode *
Node = getNodeForStackId(StackId);
2375 StackIds.push_back(StackId);
2380ModuleCallsiteContextGraph::ModuleCallsiteContextGraph(
2382 llvm::function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter)
2383 :
Mod(
M), OREGetter(OREGetter) {
2387 std::map<uint64_t, uint32_t> TotalSizeToContextIdTopNCold;
2389 std::vector<CallInfo> CallsWithMetadata;
2390 for (
auto &BB :
F) {
2391 for (
auto &
I : BB) {
2394 if (
auto *MemProfMD =
I.getMetadata(LLVMContext::MD_memprof)) {
2395 CallsWithMetadata.push_back(&
I);
2396 auto *AllocNode = addAllocNode(&
I, &
F);
2397 auto *CallsiteMD =
I.getMetadata(LLVMContext::MD_callsite);
2401 for (
auto &MDOp : MemProfMD->operands()) {
2403 std::vector<ContextTotalSize> ContextSizeInfo;
2405 if (MIBMD->getNumOperands() > 2) {
2406 for (
unsigned I = 2;
I < MIBMD->getNumOperands();
I++) {
2407 MDNode *ContextSizePair =
2416 ContextSizeInfo.push_back({FullStackId, TotalSize});
2422 addStackNodesForMIB<MDNode, MDNode::op_iterator>(
2423 AllocNode, StackContext, CallsiteContext,
2425 TotalSizeToContextIdTopNCold);
2430 DotAllocContextIds = AllocNode->getContextIds();
2434 I.setMetadata(LLVMContext::MD_memprof,
nullptr);
2435 I.setMetadata(LLVMContext::MD_callsite,
nullptr);
2438 else if (
I.getMetadata(LLVMContext::MD_callsite)) {
2439 CallsWithMetadata.push_back(&
I);
2443 if (!CallsWithMetadata.empty())
2444 FuncToCallsWithMetadata[&
F] = CallsWithMetadata;
2448 dbgs() <<
"CCG before updating call stack chains:\n";
2453 exportToDot(
"prestackupdate");
2458 exportToDot(
"poststackupdate");
2460 handleCallsitesWithMultipleTargets();
2465 for (
auto &FuncEntry : FuncToCallsWithMetadata)
2466 for (
auto &
Call : FuncEntry.second)
2467 Call.call()->setMetadata(LLVMContext::MD_callsite,
nullptr);
2470IndexCallsiteContextGraph::IndexCallsiteContextGraph(
2474 : Index(Index), isPrevailing(isPrevailing) {
2478 std::map<uint64_t, uint32_t> TotalSizeToContextIdTopNCold;
2479 for (
auto &
I : Index) {
2480 auto VI = Index.getValueInfo(
I);
2481 for (
auto &S : VI.getSummaryList()) {
2490 !isPrevailing(VI.getGUID(), S.get()))
2495 std::vector<CallInfo> CallsWithMetadata;
2496 if (!
FS->allocs().empty()) {
2497 for (
auto &AN :
FS->mutableAllocs()) {
2502 if (AN.MIBs.empty())
2504 IndexCall AllocCall(&AN);
2505 CallsWithMetadata.push_back(AllocCall);
2506 auto *AllocNode = addAllocNode(AllocCall, FS);
2514 AN.ContextSizeInfos.size() == AN.MIBs.size());
2516 for (
auto &MIB : AN.MIBs) {
2519 std::vector<ContextTotalSize> ContextSizeInfo;
2520 if (!AN.ContextSizeInfos.empty()) {
2521 for (
auto [FullStackId, TotalSize] : AN.ContextSizeInfos[
I])
2522 ContextSizeInfo.push_back({FullStackId, TotalSize});
2524 addStackNodesForMIB<MIBInfo, SmallVector<unsigned>::const_iterator>(
2525 AllocNode, StackContext, EmptyContext, MIB.AllocType,
2526 ContextSizeInfo, TotalSizeToContextIdTopNCold);
2532 DotAllocContextIds = AllocNode->getContextIds();
2538 AN.Versions[0] = (
uint8_t)allocTypeToUse(AllocNode->AllocTypes);
2542 if (!
FS->callsites().empty())
2543 for (
auto &SN :
FS->mutableCallsites()) {
2544 IndexCall StackNodeCall(&SN);
2545 CallsWithMetadata.push_back(StackNodeCall);
2548 if (!CallsWithMetadata.empty())
2549 FuncToCallsWithMetadata[
FS] = CallsWithMetadata;
2551 if (!
FS->allocs().empty() || !
FS->callsites().empty())
2557 dbgs() <<
"CCG before updating call stack chains:\n";
2562 exportToDot(
"prestackupdate");
2567 exportToDot(
"poststackupdate");
2569 handleCallsitesWithMultipleTargets();
2574template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2575void CallsiteContextGraph<DerivedCCG, FuncTy,
2576 CallTy>::handleCallsitesWithMultipleTargets() {
2591 std::vector<std::pair<CallInfo, ContextNode *>> NewCallToNode;
2592 for (
auto &Entry : NonAllocationCallToContextNodeMap) {
2593 auto *
Node = Entry.second;
2602 std::vector<CallInfo> AllCalls;
2603 AllCalls.reserve(
Node->MatchingCalls.size() + 1);
2604 AllCalls.push_back(
Node->Call);
2618 if (partitionCallsByCallee(
Node, AllCalls, NewCallToNode))
2621 auto It = AllCalls.begin();
2623 for (; It != AllCalls.end(); ++It) {
2626 for (
auto EI =
Node->CalleeEdges.begin(); EI !=
Node->CalleeEdges.end();
2629 if (!Edge->Callee->hasCall())
2631 assert(NodeToCallingFunc.count(Edge->Callee));
2633 if (!calleesMatch(
ThisCall.call(), EI, TailCallToContextNodeMap)) {
2642 if (
Node->Call != ThisCall) {
2643 Node->setCall(ThisCall);
2654 Node->MatchingCalls.clear();
2657 if (It == AllCalls.end()) {
2658 RemovedEdgesWithMismatchedCallees++;
2662 Node->setCall(CallInfo());
2667 for (++It; It != AllCalls.end(); ++It) {
2671 Node->MatchingCalls.push_back(ThisCall);
2680 NonAllocationCallToContextNodeMap.remove_if([](
const auto &it) {
2681 return !it.second->hasCall() || it.second->Call != it.first;
2685 for (
auto &[
Call,
Node] : NewCallToNode)
2686 NonAllocationCallToContextNodeMap[
Call] =
Node;
2690 for (
auto &[
Call,
Node] : TailCallToContextNodeMap)
2691 NonAllocationCallToContextNodeMap[
Call] =
Node;
2694template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2695bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::partitionCallsByCallee(
2697 std::vector<std::pair<CallInfo, ContextNode *>> &NewCallToNode) {
2701 struct CallsWithSameCallee {
2702 std::vector<CallInfo> Calls;
2703 ContextNode *
Node =
nullptr;
2709 for (
auto ThisCall : AllCalls) {
2710 auto *
F = getCalleeFunc(
ThisCall.call());
2712 CalleeFuncToCallInfo[
F].Calls.push_back(ThisCall);
2721 for (
const auto &Edge :
Node->CalleeEdges) {
2722 if (!Edge->Callee->hasCall())
2724 const FuncTy *ProfiledCalleeFunc = NodeToCallingFunc[Edge->Callee];
2725 if (CalleeFuncToCallInfo.
contains(ProfiledCalleeFunc))
2726 CalleeNodeToCallInfo[Edge->Callee] =
2727 &CalleeFuncToCallInfo[ProfiledCalleeFunc];
2733 if (CalleeNodeToCallInfo.
empty())
2745 ContextNode *UnmatchedCalleesNode =
nullptr;
2747 bool UsedOrigNode =
false;
2752 auto CalleeEdges =
Node->CalleeEdges;
2753 for (
auto &Edge : CalleeEdges) {
2754 if (!Edge->Callee->hasCall())
2759 ContextNode *CallerNodeToUse =
nullptr;
2763 if (!CalleeNodeToCallInfo.
contains(Edge->Callee)) {
2764 if (!UnmatchedCalleesNode)
2765 UnmatchedCalleesNode =
2766 createNewNode(
false, NodeToCallingFunc[
Node]);
2767 CallerNodeToUse = UnmatchedCalleesNode;
2771 auto *
Info = CalleeNodeToCallInfo[Edge->Callee];
2774 if (!UsedOrigNode) {
2777 Node->MatchingCalls.clear();
2778 UsedOrigNode =
true;
2781 createNewNode(
false, NodeToCallingFunc[
Node]);
2785 Info->Node->setCall(
Info->Calls.front());
2791 NewCallToNode.push_back({
Info->Node->Call,
Info->Node});
2793 CallerNodeToUse =
Info->Node;
2797 if (CallerNodeToUse ==
Node)
2800 moveCalleeEdgeToNewCaller(Edge, CallerNodeToUse);
2807 for (
auto &
I : CalleeNodeToCallInfo)
2808 removeNoneTypeCallerEdges(
I.second->Node);
2809 if (UnmatchedCalleesNode)
2810 removeNoneTypeCallerEdges(UnmatchedCalleesNode);
2811 removeNoneTypeCallerEdges(
Node);
2821uint64_t IndexCallsiteContextGraph::getStackId(uint64_t IdOrIndex)
const {
2824 return Index.getStackIdAtIndex(IdOrIndex);
2827template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2828bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::calleesMatch(
2829 CallTy
Call, EdgeIter &EI,
2830 MapVector<CallInfo, ContextNode *> &TailCallToContextNodeMap) {
2832 const FuncTy *ProfiledCalleeFunc = NodeToCallingFunc[
Edge->Callee];
2833 const FuncTy *CallerFunc = NodeToCallingFunc[
Edge->Caller];
2836 std::vector<std::pair<CallTy, FuncTy *>> FoundCalleeChain;
2837 if (!calleeMatchesFunc(
Call, ProfiledCalleeFunc, CallerFunc,
2842 if (FoundCalleeChain.empty())
2846 auto *CurEdge =
Callee->findEdgeFromCaller(Caller);
2850 CurEdge->ContextIds.insert_range(
Edge->ContextIds);
2851 CurEdge->AllocTypes |=
Edge->AllocTypes;
2856 auto NewEdge = std::make_shared<ContextEdge>(
2857 Callee, Caller,
Edge->AllocTypes,
Edge->ContextIds);
2858 Callee->CallerEdges.push_back(NewEdge);
2859 if (Caller ==
Edge->Caller) {
2863 EI =
Caller->CalleeEdges.insert(EI, NewEdge);
2866 "Iterator position not restored after insert and increment");
2868 Caller->CalleeEdges.push_back(NewEdge);
2873 auto *CurCalleeNode =
Edge->Callee;
2874 for (
auto &[NewCall, Func] : FoundCalleeChain) {
2875 ContextNode *NewNode =
nullptr;
2877 if (TailCallToContextNodeMap.
count(NewCall)) {
2878 NewNode = TailCallToContextNodeMap[NewCall];
2879 NewNode->AllocTypes |=
Edge->AllocTypes;
2881 FuncToCallsWithMetadata[
Func].push_back({NewCall});
2883 NewNode = createNewNode(
false, Func, NewCall);
2884 TailCallToContextNodeMap[NewCall] = NewNode;
2885 NewNode->AllocTypes =
Edge->AllocTypes;
2889 AddEdge(NewNode, CurCalleeNode);
2891 CurCalleeNode = NewNode;
2895 AddEdge(
Edge->Caller, CurCalleeNode);
2903 removeEdgeFromGraph(
Edge.get(), &EI,
true);
2915bool ModuleCallsiteContextGraph::findProfiledCalleeThroughTailCalls(
2916 const Function *ProfiledCallee,
Value *CurCallee,
unsigned Depth,
2917 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain,
2918 bool &FoundMultipleCalleeChains) {
2925 FoundCalleeChain.push_back({Callsite,
F});
2940 bool FoundSingleCalleeChain =
false;
2941 for (
auto &BB : *CalleeFunc) {
2942 for (
auto &
I : BB) {
2944 if (!CB || !CB->isTailCall())
2946 auto *CalledValue = CB->getCalledOperand();
2947 auto *CalledFunction = CB->getCalledFunction();
2948 if (CalledValue && !CalledFunction) {
2949 CalledValue = CalledValue->stripPointerCasts();
2956 assert(!CalledFunction &&
2957 "Expected null called function in callsite for alias");
2960 if (!CalledFunction)
2962 if (CalledFunction == ProfiledCallee) {
2963 if (FoundSingleCalleeChain) {
2964 FoundMultipleCalleeChains =
true;
2967 FoundSingleCalleeChain =
true;
2968 FoundProfiledCalleeCount++;
2969 FoundProfiledCalleeDepth +=
Depth;
2970 if (
Depth > FoundProfiledCalleeMaxDepth)
2971 FoundProfiledCalleeMaxDepth =
Depth;
2972 SaveCallsiteInfo(&
I, CalleeFunc);
2973 }
else if (findProfiledCalleeThroughTailCalls(
2974 ProfiledCallee, CalledFunction,
Depth + 1,
2975 FoundCalleeChain, FoundMultipleCalleeChains)) {
2978 assert(!FoundMultipleCalleeChains);
2979 if (FoundSingleCalleeChain) {
2980 FoundMultipleCalleeChains =
true;
2983 FoundSingleCalleeChain =
true;
2984 SaveCallsiteInfo(&
I, CalleeFunc);
2985 }
else if (FoundMultipleCalleeChains)
2990 return FoundSingleCalleeChain;
2993const Function *ModuleCallsiteContextGraph::getCalleeFunc(Instruction *
Call) {
2995 if (!CB->getCalledOperand() || CB->isIndirectCall())
2997 auto *CalleeVal = CB->getCalledOperand()->stripPointerCasts();
3004bool ModuleCallsiteContextGraph::calleeMatchesFunc(
3005 Instruction *
Call,
const Function *Func,
const Function *CallerFunc,
3006 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain) {
3008 if (!CB->getCalledOperand() || CB->isIndirectCall())
3010 auto *CalleeVal = CB->getCalledOperand()->stripPointerCasts();
3012 if (CalleeFunc == Func)
3015 if (Alias && Alias->getAliasee() == Func)
3026 bool FoundMultipleCalleeChains =
false;
3027 if (!findProfiledCalleeThroughTailCalls(Func, CalleeVal,
Depth,
3029 FoundMultipleCalleeChains)) {
3030 LLVM_DEBUG(
dbgs() <<
"Not found through unique tail call chain: "
3031 <<
Func->getName() <<
" from " << CallerFunc->
getName()
3032 <<
" that actually called " << CalleeVal->getName()
3033 << (FoundMultipleCalleeChains
3034 ?
" (found multiple possible chains)"
3037 if (FoundMultipleCalleeChains)
3038 FoundProfiledCalleeNonUniquelyCount++;
3045bool ModuleCallsiteContextGraph::sameCallee(Instruction *Call1,
3046 Instruction *Call2) {
3048 if (!CB1->getCalledOperand() || CB1->isIndirectCall())
3050 auto *CalleeVal1 = CB1->getCalledOperand()->stripPointerCasts();
3053 if (!CB2->getCalledOperand() || CB2->isIndirectCall())
3055 auto *CalleeVal2 = CB2->getCalledOperand()->stripPointerCasts();
3057 return CalleeFunc1 == CalleeFunc2;
3060bool IndexCallsiteContextGraph::findProfiledCalleeThroughTailCalls(
3061 ValueInfo ProfiledCallee, ValueInfo CurCallee,
unsigned Depth,
3062 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain,
3063 bool &FoundMultipleCalleeChains) {
3069 auto CreateAndSaveCallsiteInfo = [&](ValueInfo
Callee, FunctionSummary *
FS) {
3072 if (!FunctionCalleesToSynthesizedCallsiteInfos.count(FS) ||
3073 !FunctionCalleesToSynthesizedCallsiteInfos[FS].count(Callee))
3076 FunctionCalleesToSynthesizedCallsiteInfos[
FS][
Callee] =
3077 std::make_unique<CallsiteInfo>(Callee, SmallVector<unsigned>());
3078 CallsiteInfo *NewCallsiteInfo =
3079 FunctionCalleesToSynthesizedCallsiteInfos[
FS][
Callee].get();
3080 FoundCalleeChain.push_back({NewCallsiteInfo,
FS});
3087 bool FoundSingleCalleeChain =
false;
3090 !isPrevailing(CurCallee.
getGUID(), S.get()))
3095 auto FSVI = CurCallee;
3098 FSVI = AS->getAliaseeVI();
3099 for (
auto &CallEdge :
FS->calls()) {
3100 if (!CallEdge.second.hasTailCall())
3102 if (CallEdge.first == ProfiledCallee) {
3103 if (FoundSingleCalleeChain) {
3104 FoundMultipleCalleeChains =
true;
3107 FoundSingleCalleeChain =
true;
3108 FoundProfiledCalleeCount++;
3109 FoundProfiledCalleeDepth +=
Depth;
3110 if (
Depth > FoundProfiledCalleeMaxDepth)
3111 FoundProfiledCalleeMaxDepth =
Depth;
3112 CreateAndSaveCallsiteInfo(CallEdge.first, FS);
3114 assert(!FSToVIMap.count(FS) || FSToVIMap[FS] == FSVI);
3115 FSToVIMap[
FS] = FSVI;
3116 }
else if (findProfiledCalleeThroughTailCalls(
3117 ProfiledCallee, CallEdge.first,
Depth + 1,
3118 FoundCalleeChain, FoundMultipleCalleeChains)) {
3121 assert(!FoundMultipleCalleeChains);
3122 if (FoundSingleCalleeChain) {
3123 FoundMultipleCalleeChains =
true;
3126 FoundSingleCalleeChain =
true;
3127 CreateAndSaveCallsiteInfo(CallEdge.first, FS);
3129 assert(!FSToVIMap.count(FS) || FSToVIMap[FS] == FSVI);
3130 FSToVIMap[
FS] = FSVI;
3131 }
else if (FoundMultipleCalleeChains)
3136 return FoundSingleCalleeChain;
3139const FunctionSummary *
3140IndexCallsiteContextGraph::getCalleeFunc(IndexCall &
Call) {
3142 if (
Callee.getSummaryList().empty())
3147bool IndexCallsiteContextGraph::calleeMatchesFunc(
3148 IndexCall &
Call,
const FunctionSummary *Func,
3149 const FunctionSummary *CallerFunc,
3150 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain) {
3154 AliasSummary *Alias =
3155 Callee.getSummaryList().empty()
3158 assert(FSToVIMap.count(Func));
3159 auto FuncVI = FSToVIMap[
Func];
3160 if (Callee == FuncVI ||
3175 bool FoundMultipleCalleeChains =
false;
3176 if (!findProfiledCalleeThroughTailCalls(
3177 FuncVI, Callee,
Depth, FoundCalleeChain, FoundMultipleCalleeChains)) {
3178 LLVM_DEBUG(
dbgs() <<
"Not found through unique tail call chain: " << FuncVI
3179 <<
" from " << FSToVIMap[CallerFunc]
3180 <<
" that actually called " << Callee
3181 << (FoundMultipleCalleeChains
3182 ?
" (found multiple possible chains)"
3185 if (FoundMultipleCalleeChains)
3186 FoundProfiledCalleeNonUniquelyCount++;
3193bool IndexCallsiteContextGraph::sameCallee(IndexCall &Call1, IndexCall &Call2) {
3196 return Callee1 == Callee2;
3199template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3200void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::dump()
3206template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3207void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::print(
3208 raw_ostream &OS)
const {
3209 OS <<
"Node " <<
this <<
"\n";
3213 OS <<
" (recursive)";
3215 if (!MatchingCalls.empty()) {
3216 OS <<
"\tMatchingCalls:\n";
3217 for (
auto &MatchingCall : MatchingCalls) {
3219 MatchingCall.print(OS);
3223 OS <<
"\tNodeId: " <<
NodeId <<
"\n";
3225 OS <<
"\tContextIds:";
3227 auto ContextIds = getContextIds();
3228 std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
3229 std::sort(SortedIds.begin(), SortedIds.end());
3230 for (
auto Id : SortedIds)
3233 OS <<
"\tCalleeEdges:\n";
3234 for (
auto &
Edge : CalleeEdges)
3235 OS <<
"\t\t" << *
Edge <<
" (Callee NodeId: " <<
Edge->Callee->NodeId
3237 OS <<
"\tCallerEdges:\n";
3238 for (
auto &
Edge : CallerEdges)
3239 OS <<
"\t\t" << *
Edge <<
" (Caller NodeId: " <<
Edge->Caller->NodeId
3241 if (!Clones.empty()) {
3244 for (
auto *
C : Clones) {
3248 OS <<
C <<
" NodeId: " <<
C->NodeId;
3251 }
else if (CloneOf) {
3252 OS <<
"\tClone of " << CloneOf <<
" NodeId: " << CloneOf->NodeId <<
"\n";
3256template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3257void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::dump()
3263template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3264void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::print(
3265 raw_ostream &OS)
const {
3266 OS <<
"Edge from Callee " <<
Callee <<
" to Caller: " <<
Caller
3267 << (IsBackedge ?
" (BE)" :
"")
3269 OS <<
" ContextIds:";
3270 std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
3271 std::sort(SortedIds.begin(), SortedIds.end());
3272 for (
auto Id : SortedIds)
3276template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3277void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::dump()
const {
3281template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3282void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::print(
3283 raw_ostream &OS)
const {
3284 OS <<
"Callsite Context Graph:\n";
3285 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3287 if (
Node->isRemoved())
3294template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3295void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::printTotalSizes(
3296 raw_ostream &OS)
const {
3297 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3299 if (
Node->isRemoved())
3301 if (!
Node->IsAllocation)
3303 DenseSet<uint32_t> ContextIds =
Node->getContextIds();
3304 auto AllocTypeFromCall = getAllocationCallType(
Node->Call);
3305 std::vector<uint32_t> SortedIds(ContextIds.
begin(), ContextIds.
end());
3306 std::sort(SortedIds.begin(), SortedIds.end());
3307 for (
auto Id : SortedIds) {
3308 auto TypeI = ContextIdToAllocationType.find(Id);
3309 assert(TypeI != ContextIdToAllocationType.end());
3310 auto CSI = ContextIdToContextSizeInfos.find(Id);
3311 if (CSI != ContextIdToContextSizeInfos.end()) {
3312 for (
auto &
Info : CSI->second) {
3313 OS <<
"MemProf hinting: "
3315 <<
" full allocation context " <<
Info.FullStackId
3316 <<
" with total size " <<
Info.TotalSize <<
" is "
3318 if (allocTypeToUse(
Node->AllocTypes) != AllocTypeFromCall)
3320 <<
" due to cold byte percent";
3322 OS <<
" (context id " <<
Id <<
")";
3330template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3331void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::check()
const {
3332 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3335 for (
auto &
Edge :
Node->CallerEdges)
3340template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3342 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3343 using NodeRef =
const ContextNode<DerivedCCG, FuncTy, CallTy> *;
3345 using NodePtrTy = std::unique_ptr<ContextNode<DerivedCCG, FuncTy, CallTy>>;
3361 return G->NodeOwner.begin()->get();
3364 using EdgePtrTy = std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>;
3365 static const ContextNode<DerivedCCG, FuncTy, CallTy> *
3384template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3398 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3404 std::string LabelString =
3405 (
Twine(
"OrigId: ") + (
Node->IsAllocation ?
"Alloc" :
"") +
3408 LabelString +=
"\n";
3409 if (
Node->hasCall()) {
3410 auto Func =
G->NodeToCallingFunc.find(
Node);
3411 assert(Func !=
G->NodeToCallingFunc.end());
3413 G->getLabel(Func->second,
Node->Call.call(),
Node->Call.cloneNo());
3414 for (
auto &MatchingCall :
Node->MatchingCalls) {
3415 LabelString +=
"\n";
3416 LabelString +=
G->getLabel(Func->second, MatchingCall.call(),
3417 MatchingCall.cloneNo());
3420 LabelString +=
"null call";
3421 if (
Node->Recursive)
3422 LabelString +=
" (recursive)";
3424 LabelString +=
" (external)";
3430 auto ContextIds =
Node->getContextIds();
3434 bool Highlight =
false;
3443 std::string AttributeString = (
Twine(
"tooltip=\"") + getNodeId(
Node) +
" " +
3444 getContextIds(ContextIds) +
"\"")
3448 AttributeString +=
",fontsize=\"30\"";
3450 (
Twine(
",fillcolor=\"") + getColor(
Node->AllocTypes, Highlight) +
"\"")
3452 if (
Node->CloneOf) {
3453 AttributeString +=
",color=\"blue\"";
3454 AttributeString +=
",style=\"filled,bold,dashed\"";
3456 AttributeString +=
",style=\"filled\"";
3457 return AttributeString;
3462 auto &Edge = *(ChildIter.getCurrent());
3467 bool Highlight =
false;
3476 auto Color = getColor(Edge->AllocTypes, Highlight);
3477 std::string AttributeString =
3478 (
Twine(
"tooltip=\"") + getContextIds(Edge->ContextIds) +
"\"" +
3480 Twine(
",fillcolor=\"") + Color +
"\"" +
Twine(
",color=\"") + Color +
3483 if (Edge->IsBackedge)
3484 AttributeString +=
",style=\"dotted\"";
3487 AttributeString +=
",penwidth=\"2.0\",weight=\"2\"";
3488 return AttributeString;
3494 if (
Node->isRemoved())
3507 std::string IdString =
"ContextIds:";
3508 if (ContextIds.
size() < 100) {
3509 std::vector<uint32_t> SortedIds(ContextIds.
begin(), ContextIds.
end());
3510 std::sort(SortedIds.begin(), SortedIds.end());
3511 for (
auto Id : SortedIds)
3512 IdString += (
" " +
Twine(Id)).str();
3514 IdString += (
" (" + Twine(ContextIds.
size()) +
" ids)").str();
3519 static std::string getColor(uint8_t AllocTypes,
bool Highlight) {
3525 if (AllocTypes == (uint8_t)AllocationType::NotCold)
3527 return !
DoHighlight || Highlight ?
"brown1" :
"lightpink";
3528 if (AllocTypes == (uint8_t)AllocationType::Cold)
3529 return !
DoHighlight || Highlight ?
"cyan" :
"lightskyblue";
3531 ((uint8_t)AllocationType::NotCold | (uint8_t)AllocationType::Cold))
3532 return Highlight ?
"magenta" :
"mediumorchid1";
3536 static std::string getNodeId(NodeRef Node) {
3537 std::stringstream SStream;
3538 SStream << std::hex <<
"N0x" << (
unsigned long long)Node;
3539 std::string
Result = SStream.str();
3548template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3553template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3554void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::exportToDot(
3555 std::string Label)
const {
3560template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3561typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
3562CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::moveEdgeToNewCalleeClone(
3563 const std::shared_ptr<ContextEdge> &
Edge,
3564 DenseSet<uint32_t> ContextIdsToMove) {
3566 assert(NodeToCallingFunc.count(Node));
3567 ContextNode *Clone =
3568 createNewNode(
Node->IsAllocation, NodeToCallingFunc[Node],
Node->Call);
3569 Node->addClone(Clone);
3570 Clone->MatchingCalls =
Node->MatchingCalls;
3571 moveEdgeToExistingCalleeClone(
Edge, Clone,
true,
3576template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3577void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
3578 moveEdgeToExistingCalleeClone(
const std::shared_ptr<ContextEdge> &
Edge,
3579 ContextNode *NewCallee,
bool NewClone,
3580 DenseSet<uint32_t> ContextIdsToMove) {
3583 assert(NewCallee->getOrigNode() ==
Edge->Callee->getOrigNode());
3585 bool EdgeIsRecursive =
Edge->Callee ==
Edge->Caller;
3587 ContextNode *OldCallee =
Edge->Callee;
3591 auto ExistingEdgeToNewCallee = NewCallee->findEdgeFromCaller(
Edge->Caller);
3595 if (ContextIdsToMove.
empty())
3596 ContextIdsToMove =
Edge->getContextIds();
3600 if (
Edge->getContextIds().size() == ContextIdsToMove.
size()) {
3603 NewCallee->AllocTypes |=
Edge->AllocTypes;
3605 if (ExistingEdgeToNewCallee) {
3608 ExistingEdgeToNewCallee->getContextIds().insert_range(ContextIdsToMove);
3609 ExistingEdgeToNewCallee->AllocTypes |=
Edge->AllocTypes;
3610 assert(
Edge->ContextIds == ContextIdsToMove);
3611 removeEdgeFromGraph(
Edge.get());
3614 Edge->Callee = NewCallee;
3615 NewCallee->CallerEdges.push_back(
Edge);
3617 OldCallee->eraseCallerEdge(
Edge.get());
3624 auto CallerEdgeAllocType = computeAllocType(ContextIdsToMove);
3625 if (ExistingEdgeToNewCallee) {
3628 ExistingEdgeToNewCallee->getContextIds().insert_range(ContextIdsToMove);
3629 ExistingEdgeToNewCallee->AllocTypes |= CallerEdgeAllocType;
3632 auto NewEdge = std::make_shared<ContextEdge>(
3633 NewCallee,
Edge->Caller, CallerEdgeAllocType, ContextIdsToMove);
3634 Edge->Caller->CalleeEdges.push_back(NewEdge);
3635 NewCallee->CallerEdges.push_back(NewEdge);
3639 NewCallee->AllocTypes |= CallerEdgeAllocType;
3641 Edge->AllocTypes = computeAllocType(
Edge->ContextIds);
3646 for (
auto &OldCalleeEdge : OldCallee->CalleeEdges) {
3647 ContextNode *CalleeToUse = OldCalleeEdge->Callee;
3651 if (CalleeToUse == OldCallee) {
3655 if (EdgeIsRecursive) {
3659 CalleeToUse = NewCallee;
3663 DenseSet<uint32_t> EdgeContextIdsToMove =
3665 set_subtract(OldCalleeEdge->getContextIds(), EdgeContextIdsToMove);
3666 OldCalleeEdge->AllocTypes =
3667 computeAllocType(OldCalleeEdge->getContextIds());
3674 if (
auto *NewCalleeEdge = NewCallee->findEdgeFromCallee(CalleeToUse)) {
3675 NewCalleeEdge->getContextIds().insert_range(EdgeContextIdsToMove);
3676 NewCalleeEdge->AllocTypes |= computeAllocType(EdgeContextIdsToMove);
3680 auto NewEdge = std::make_shared<ContextEdge>(
3681 CalleeToUse, NewCallee, computeAllocType(EdgeContextIdsToMove),
3682 EdgeContextIdsToMove);
3683 NewCallee->CalleeEdges.push_back(NewEdge);
3684 NewEdge->Callee->CallerEdges.push_back(NewEdge);
3688 OldCallee->AllocTypes = OldCallee->computeAllocType();
3690 assert((OldCallee->AllocTypes == (uint8_t)AllocationType::None) ==
3691 OldCallee->emptyContextIds());
3695 for (
const auto &OldCalleeEdge : OldCallee->CalleeEdges)
3698 for (
const auto &NewCalleeEdge : NewCallee->CalleeEdges)
3704template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3705void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
3706 moveCalleeEdgeToNewCaller(
const std::shared_ptr<ContextEdge> &
Edge,
3707 ContextNode *NewCaller) {
3708 auto *OldCallee =
Edge->Callee;
3709 auto *NewCallee = OldCallee;
3712 bool Recursive =
Edge->Caller ==
Edge->Callee;
3714 NewCallee = NewCaller;
3716 ContextNode *OldCaller =
Edge->Caller;
3717 OldCaller->eraseCalleeEdge(
Edge.get());
3721 auto ExistingEdgeToNewCaller = NewCaller->findEdgeFromCallee(NewCallee);
3723 if (ExistingEdgeToNewCaller) {
3726 ExistingEdgeToNewCaller->getContextIds().insert_range(
3727 Edge->getContextIds());
3728 ExistingEdgeToNewCaller->AllocTypes |=
Edge->AllocTypes;
3729 Edge->ContextIds.clear();
3730 Edge->AllocTypes = (uint8_t)AllocationType::None;
3731 OldCallee->eraseCallerEdge(
Edge.get());
3734 Edge->Caller = NewCaller;
3735 NewCaller->CalleeEdges.push_back(
Edge);
3737 assert(NewCallee == NewCaller);
3740 Edge->Callee = NewCallee;
3741 NewCallee->CallerEdges.push_back(
Edge);
3742 OldCallee->eraseCallerEdge(
Edge.get());
3748 NewCaller->AllocTypes |=
Edge->AllocTypes;
3755 bool IsNewNode = NewCaller->CallerEdges.empty();
3764 for (
auto &OldCallerEdge : OldCaller->CallerEdges) {
3765 auto OldCallerCaller = OldCallerEdge->Caller;
3769 OldCallerEdge->getContextIds(),
Edge->getContextIds());
3770 if (OldCaller == OldCallerCaller) {
3771 OldCallerCaller = NewCaller;
3777 set_subtract(OldCallerEdge->getContextIds(), EdgeContextIdsToMove);
3778 OldCallerEdge->AllocTypes =
3779 computeAllocType(OldCallerEdge->getContextIds());
3784 auto *ExistingCallerEdge = NewCaller->findEdgeFromCaller(OldCallerCaller);
3788 if (ExistingCallerEdge) {
3789 ExistingCallerEdge->getContextIds().insert_range(EdgeContextIdsToMove);
3790 ExistingCallerEdge->AllocTypes |=
3791 computeAllocType(EdgeContextIdsToMove);
3794 auto NewEdge = std::make_shared<ContextEdge>(
3795 NewCaller, OldCallerCaller, computeAllocType(EdgeContextIdsToMove),
3796 EdgeContextIdsToMove);
3797 NewCaller->CallerEdges.push_back(NewEdge);
3798 NewEdge->Caller->CalleeEdges.push_back(NewEdge);
3803 OldCaller->AllocTypes = OldCaller->computeAllocType();
3805 assert((OldCaller->AllocTypes == (uint8_t)AllocationType::None) ==
3806 OldCaller->emptyContextIds());
3810 for (
const auto &OldCallerEdge : OldCaller->CallerEdges)
3813 for (
const auto &NewCallerEdge : NewCaller->CallerEdges)
3819template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3820void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
3821 recursivelyRemoveNoneTypeCalleeEdges(
3822 ContextNode *Node, DenseSet<const ContextNode *> &Visited) {
3827 removeNoneTypeCalleeEdges(Node);
3829 for (
auto *Clone :
Node->Clones)
3830 recursivelyRemoveNoneTypeCalleeEdges(Clone, Visited);
3834 auto CallerEdges =
Node->CallerEdges;
3835 for (
auto &
Edge : CallerEdges) {
3837 if (
Edge->isRemoved()) {
3841 recursivelyRemoveNoneTypeCalleeEdges(
Edge->Caller, Visited);
3846template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3847void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::markBackedges() {
3852 DenseSet<const ContextNode *> Visited;
3853 DenseSet<const ContextNode *> CurrentStack;
3854 for (
auto &Entry : NonAllocationCallToContextNodeMap) {
3856 if (
Node->isRemoved())
3859 if (!
Node->CallerEdges.empty())
3861 markBackedges(Node, Visited, CurrentStack);
3867template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3868void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::markBackedges(
3869 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
3870 DenseSet<const ContextNode *> &CurrentStack) {
3871 auto I = Visited.
insert(Node);
3875 for (
auto &CalleeEdge :
Node->CalleeEdges) {
3876 auto *
Callee = CalleeEdge->Callee;
3877 if (Visited.
count(Callee)) {
3880 if (CurrentStack.
count(Callee))
3881 CalleeEdge->IsBackedge =
true;
3884 CurrentStack.
insert(Callee);
3885 markBackedges(Callee, Visited, CurrentStack);
3886 CurrentStack.
erase(Callee);
3890template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3891void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones() {
3892 DenseSet<const ContextNode *> Visited;
3893 for (
auto &Entry : AllocationCallToContextNodeMap) {
3895 identifyClones(
Entry.second, Visited,
Entry.second->getContextIds());
3898 for (
auto &Entry : AllocationCallToContextNodeMap)
3899 recursivelyRemoveNoneTypeCalleeEdges(
Entry.second, Visited);
3912template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3913void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones(
3914 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
3915 const DenseSet<uint32_t> &AllocContextIds) {
3925 if (!
Node->hasCall())
3944 auto CallerEdges =
Node->CallerEdges;
3945 for (
auto &
Edge : CallerEdges) {
3947 if (
Edge->isRemoved()) {
3953 if (
Edge->IsBackedge) {
3960 if (!Visited.
count(
Edge->Caller) && !
Edge->Caller->CloneOf) {
3961 identifyClones(
Edge->Caller, Visited, AllocContextIds);
3984 const unsigned AllocTypeCloningPriority[] = { 3, 4,
3988 [&](
const std::shared_ptr<ContextEdge> &
A,
3989 const std::shared_ptr<ContextEdge> &
B) {
3992 if (A->ContextIds.empty())
3998 if (B->ContextIds.empty())
4001 if (A->AllocTypes == B->AllocTypes)
4004 return *A->ContextIds.begin() < *B->ContextIds.begin();
4005 return AllocTypeCloningPriority[A->AllocTypes] <
4006 AllocTypeCloningPriority[B->AllocTypes];
4009 assert(
Node->AllocTypes != (uint8_t)AllocationType::None);
4011 DenseSet<uint32_t> RecursiveContextIds;
4016 DenseSet<uint32_t> AllCallerContextIds;
4017 for (
auto &CE :
Node->CallerEdges) {
4020 AllCallerContextIds.
reserve(
CE->getContextIds().size());
4021 for (
auto Id :
CE->getContextIds())
4022 if (!AllCallerContextIds.
insert(Id).second)
4023 RecursiveContextIds.
insert(Id);
4033 auto CallerEdges =
Node->CallerEdges;
4034 for (
auto &CallerEdge : CallerEdges) {
4036 if (CallerEdge->isRemoved()) {
4040 assert(CallerEdge->Callee == Node);
4049 if (!CallerEdge->Caller->hasCall())
4054 auto CallerEdgeContextsForAlloc =
4056 if (!RecursiveContextIds.
empty())
4057 CallerEdgeContextsForAlloc =
4059 if (CallerEdgeContextsForAlloc.empty())
4062 auto CallerAllocTypeForAlloc = computeAllocType(CallerEdgeContextsForAlloc);
4066 std::vector<uint8_t> CalleeEdgeAllocTypesForCallerEdge;
4067 CalleeEdgeAllocTypesForCallerEdge.reserve(
Node->CalleeEdges.size());
4068 for (
auto &CalleeEdge :
Node->CalleeEdges)
4069 CalleeEdgeAllocTypesForCallerEdge.push_back(intersectAllocTypes(
4070 CalleeEdge->getContextIds(), CallerEdgeContextsForAlloc));
4086 assert(CallerEdge->AllocTypes != (uint8_t)AllocationType::None);
4087 assert(
Node->AllocTypes != (uint8_t)AllocationType::None);
4088 if (!CallerEdge->IsBackedge &&
4089 allocTypeToUse(CallerAllocTypeForAlloc) ==
4090 allocTypeToUse(
Node->AllocTypes) &&
4091 allocTypesMatch<DerivedCCG, FuncTy, CallTy>(
4092 CalleeEdgeAllocTypesForCallerEdge,
Node->CalleeEdges)) {
4096 if (CallerEdge->IsBackedge) {
4100 DeferredBackedges++;
4113 if (CallerEdge->IsBackedge && !CallerEdge->Caller->CloneOf &&
4114 !Visited.
count(CallerEdge->Caller)) {
4115 const auto OrigIdCount = CallerEdge->getContextIds().size();
4118 identifyClones(CallerEdge->Caller, Visited, CallerEdgeContextsForAlloc);
4119 removeNoneTypeCalleeEdges(CallerEdge->Caller);
4123 bool UpdatedEdge =
false;
4124 if (OrigIdCount > CallerEdge->getContextIds().size()) {
4125 for (
auto E :
Node->CallerEdges) {
4127 if (
E->Caller->CloneOf != CallerEdge->Caller)
4131 auto CallerEdgeContextsForAllocNew =
4133 if (CallerEdgeContextsForAllocNew.empty())
4143 CallerEdgeContextsForAlloc.swap(CallerEdgeContextsForAllocNew);
4153 if (CallerEdge->isRemoved())
4163 CallerEdgeContextsForAlloc, CallerEdge->getContextIds());
4164 if (CallerEdgeContextsForAlloc.empty())
4169 CallerAllocTypeForAlloc = computeAllocType(CallerEdgeContextsForAlloc);
4170 CalleeEdgeAllocTypesForCallerEdge.clear();
4171 for (
auto &CalleeEdge :
Node->CalleeEdges) {
4172 CalleeEdgeAllocTypesForCallerEdge.push_back(intersectAllocTypes(
4173 CalleeEdge->getContextIds(), CallerEdgeContextsForAlloc));
4179 ContextNode *Clone =
nullptr;
4180 for (
auto *CurClone :
Node->Clones) {
4181 if (allocTypeToUse(CurClone->AllocTypes) !=
4182 allocTypeToUse(CallerAllocTypeForAlloc))
4189 assert(!BothSingleAlloc ||
4190 CurClone->AllocTypes == CallerAllocTypeForAlloc);
4196 if (BothSingleAlloc || allocTypesMatchClone<DerivedCCG, FuncTy, CallTy>(
4197 CalleeEdgeAllocTypesForCallerEdge, CurClone)) {
4205 moveEdgeToExistingCalleeClone(CallerEdge, Clone,
false,
4206 CallerEdgeContextsForAlloc);
4208 Clone = moveEdgeToNewCalleeClone(CallerEdge, CallerEdgeContextsForAlloc);
4211 assert(Clone->AllocTypes != (uint8_t)AllocationType::None);
4218 assert(
Node->AllocTypes != (uint8_t)AllocationType::None);
4224void ModuleCallsiteContextGraph::updateAllocationCall(
4229 "memprof", AllocTypeString);
4232 .emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofAttribute",
Call.call())
4233 <<
ore::NV(
"AllocationCall",
Call.call()) <<
" in clone "
4235 <<
" marked with memprof allocation attribute "
4236 <<
ore::NV(
"Attribute", AllocTypeString));
4239void IndexCallsiteContextGraph::updateAllocationCall(CallInfo &
Call,
4243 assert(AI->Versions.size() >
Call.cloneNo());
4248ModuleCallsiteContextGraph::getAllocationCallType(
const CallInfo &
Call)
const {
4250 if (!CB->getAttributes().hasFnAttr(
"memprof"))
4251 return AllocationType::None;
4252 return CB->getAttributes().getFnAttr(
"memprof").getValueAsString() ==
"cold"
4253 ? AllocationType::Cold
4254 : AllocationType::NotCold;
4258IndexCallsiteContextGraph::getAllocationCallType(
const CallInfo &
Call)
const {
4260 assert(AI->Versions.size() >
Call.cloneNo());
4264void ModuleCallsiteContextGraph::updateCall(CallInfo &CallerCall,
4265 FuncInfo CalleeFunc) {
4266 auto *CurF = getCalleeFunc(CallerCall.call());
4267 auto NewCalleeCloneNo = CalleeFunc.cloneNo();
4274 if (CurCalleeCloneNo != NewCalleeCloneNo) {
4276 << CurCalleeCloneNo <<
" now " << NewCalleeCloneNo
4278 MismatchedCloneAssignments++;
4281 if (NewCalleeCloneNo > 0)
4282 cast<CallBase>(CallerCall.call())->setCalledFunction(CalleeFunc.func());
4283 OREGetter(CallerCall.call()->getFunction())
4284 .emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofCall", CallerCall.call())
4285 <<
ore::NV(
"Call", CallerCall.call()) <<
" in clone "
4286 <<
ore::NV(
"Caller", CallerCall.call()->getFunction())
4287 <<
" assigned to call function clone "
4288 <<
ore::NV(
"Callee", CalleeFunc.func()));
4291void IndexCallsiteContextGraph::updateCall(CallInfo &CallerCall,
4292 FuncInfo CalleeFunc) {
4295 "Caller cannot be an allocation which should not have profiled calls");
4296 assert(CI->Clones.size() > CallerCall.cloneNo());
4297 auto NewCalleeCloneNo = CalleeFunc.cloneNo();
4298 auto &CurCalleeCloneNo = CI->Clones[CallerCall.cloneNo()];
4303 if (CurCalleeCloneNo != 0 && CurCalleeCloneNo != NewCalleeCloneNo) {
4305 << CurCalleeCloneNo <<
" now " << NewCalleeCloneNo
4307 MismatchedCloneAssignments++;
4309 CurCalleeCloneNo = NewCalleeCloneNo;
4321 SP->replaceLinkageName(MDName);
4325 TempDISubprogram NewDecl = Decl->
clone();
4326 NewDecl->replaceLinkageName(MDName);
4330CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
4332ModuleCallsiteContextGraph::cloneFunctionForCallsite(
4333 FuncInfo &Func, CallInfo &
Call, DenseMap<CallInfo, CallInfo> &CallMap,
4334 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
4339 assert(!
Func.func()->getParent()->getFunction(Name));
4340 NewFunc->setName(Name);
4342 for (
auto &Inst : CallsWithMetadataInFunc) {
4344 assert(Inst.cloneNo() == 0);
4347 OREGetter(
Func.func())
4348 .emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofClone",
Func.func())
4349 <<
"created clone " <<
ore::NV(
"NewFunction", NewFunc));
4350 return {NewFunc, CloneNo};
4353CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
4354 IndexCall>::FuncInfo
4355IndexCallsiteContextGraph::cloneFunctionForCallsite(
4356 FuncInfo &Func, CallInfo &
Call, DenseMap<CallInfo, CallInfo> &CallMap,
4357 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
4371 for (
auto &Inst : CallsWithMetadataInFunc) {
4373 assert(Inst.cloneNo() == 0);
4375 assert(AI->Versions.size() == CloneNo);
4378 AI->Versions.push_back(0);
4381 assert(CI && CI->Clones.size() == CloneNo);
4384 CI->Clones.push_back(0);
4386 CallMap[Inst] = {Inst.call(), CloneNo};
4388 return {
Func.func(), CloneNo};
4405template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4406void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::mergeClones() {
4412 DenseMap<uint32_t, ContextNode *> ContextIdToAllocationNode;
4413 for (
auto &Entry : AllocationCallToContextNodeMap) {
4415 for (
auto Id :
Node->getContextIds())
4416 ContextIdToAllocationNode[
Id] =
Node->getOrigNode();
4417 for (
auto *Clone :
Node->Clones) {
4418 for (
auto Id : Clone->getContextIds())
4419 ContextIdToAllocationNode[
Id] = Clone->getOrigNode();
4426 DenseSet<const ContextNode *> Visited;
4427 for (
auto &Entry : AllocationCallToContextNodeMap) {
4430 mergeClones(Node, Visited, ContextIdToAllocationNode);
4436 auto Clones =
Node->Clones;
4437 for (
auto *Clone : Clones)
4438 mergeClones(Clone, Visited, ContextIdToAllocationNode);
4442 dbgs() <<
"CCG after merging:\n";
4446 exportToDot(
"aftermerge");
4454template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4455void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::mergeClones(
4456 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
4457 DenseMap<uint32_t, ContextNode *> &ContextIdToAllocationNode) {
4467 bool FoundUnvisited =
true;
4469 while (FoundUnvisited) {
4471 FoundUnvisited =
false;
4474 auto CallerEdges =
Node->CallerEdges;
4475 for (
auto CallerEdge : CallerEdges) {
4477 if (CallerEdge->Callee != Node)
4482 FoundUnvisited =
true;
4483 mergeClones(CallerEdge->Caller, Visited, ContextIdToAllocationNode);
4487 TotalMergeInvokes++;
4488 TotalMergeIters += Iters;
4489 if (Iters > MaxMergeIters)
4490 MaxMergeIters = Iters;
4493 mergeNodeCalleeClones(Node, Visited, ContextIdToAllocationNode);
4496template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4497void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::mergeNodeCalleeClones(
4498 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
4499 DenseMap<uint32_t, ContextNode *> &ContextIdToAllocationNode) {
4501 if (
Node->emptyContextIds())
4506 MapVector<ContextNode *, std::vector<std::shared_ptr<ContextEdge>>>
4507 OrigNodeToCloneEdges;
4508 for (
const auto &
E :
Node->CalleeEdges) {
4513 OrigNodeToCloneEdges[
Base].push_back(
E);
4519 auto CalleeCallerEdgeLessThan = [](
const std::shared_ptr<ContextEdge> &
A,
4520 const std::shared_ptr<ContextEdge> &
B) {
4521 if (
A->Callee->CallerEdges.size() !=
B->Callee->CallerEdges.size())
4522 return A->Callee->CallerEdges.size() <
B->Callee->CallerEdges.size();
4523 if (
A->Callee->CloneOf && !
B->Callee->CloneOf)
4525 else if (!
A->Callee->CloneOf &&
B->Callee->CloneOf)
4529 return *
A->ContextIds.begin() < *
B->ContextIds.begin();
4534 for (
auto Entry : OrigNodeToCloneEdges) {
4537 auto &CalleeEdges =
Entry.second;
4538 auto NumCalleeClones = CalleeEdges.size();
4540 if (NumCalleeClones == 1)
4551 DenseSet<ContextNode *> OtherCallersToShareMerge;
4552 findOtherCallersToShareMerge(Node, CalleeEdges, ContextIdToAllocationNode,
4553 OtherCallersToShareMerge);
4558 ContextNode *MergeNode =
nullptr;
4559 DenseMap<ContextNode *, unsigned> CallerToMoveCount;
4560 for (
auto CalleeEdge : CalleeEdges) {
4561 auto *OrigCallee = CalleeEdge->Callee;
4567 if (CalleeEdge->Callee->CallerEdges.size() == 1) {
4568 MergeNode = OrigCallee;
4569 NonNewMergedNodes++;
4576 if (!OtherCallersToShareMerge.
empty()) {
4577 bool MoveAllCallerEdges =
true;
4578 for (
auto CalleeCallerE : OrigCallee->CallerEdges) {
4579 if (CalleeCallerE == CalleeEdge)
4581 if (!OtherCallersToShareMerge.
contains(CalleeCallerE->Caller)) {
4582 MoveAllCallerEdges =
false;
4588 if (MoveAllCallerEdges) {
4589 MergeNode = OrigCallee;
4590 NonNewMergedNodes++;
4597 assert(MergeNode != OrigCallee);
4598 moveEdgeToExistingCalleeClone(CalleeEdge, MergeNode,
4601 MergeNode = moveEdgeToNewCalleeClone(CalleeEdge);
4606 if (!OtherCallersToShareMerge.
empty()) {
4610 auto OrigCalleeCallerEdges = OrigCallee->CallerEdges;
4611 for (
auto &CalleeCallerE : OrigCalleeCallerEdges) {
4612 if (CalleeCallerE == CalleeEdge)
4614 if (!OtherCallersToShareMerge.
contains(CalleeCallerE->Caller))
4616 CallerToMoveCount[CalleeCallerE->Caller]++;
4617 moveEdgeToExistingCalleeClone(CalleeCallerE, MergeNode,
4621 removeNoneTypeCalleeEdges(OrigCallee);
4622 removeNoneTypeCalleeEdges(MergeNode);
4640template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4641void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
4642 findOtherCallersToShareMerge(
4644 std::vector<std::shared_ptr<ContextEdge>> &CalleeEdges,
4645 DenseMap<uint32_t, ContextNode *> &ContextIdToAllocationNode,
4646 DenseSet<ContextNode *> &OtherCallersToShareMerge) {
4647 auto NumCalleeClones = CalleeEdges.size();
4650 DenseMap<ContextNode *, unsigned> OtherCallersToSharedCalleeEdgeCount;
4653 unsigned PossibleOtherCallerNodes = 0;
4657 if (CalleeEdges[0]->
Callee->CallerEdges.size() < 2)
4663 DenseMap<ContextEdge *, DenseSet<ContextNode *>> CalleeEdgeToAllocNodes;
4664 for (
auto CalleeEdge : CalleeEdges) {
4665 assert(CalleeEdge->Callee->CallerEdges.size() > 1);
4668 for (
auto CalleeCallerEdges : CalleeEdge->Callee->CallerEdges) {
4669 if (CalleeCallerEdges->Caller == Node) {
4670 assert(CalleeCallerEdges == CalleeEdge);
4673 OtherCallersToSharedCalleeEdgeCount[CalleeCallerEdges->Caller]++;
4676 if (OtherCallersToSharedCalleeEdgeCount[CalleeCallerEdges->Caller] ==
4678 PossibleOtherCallerNodes++;
4682 for (
auto Id : CalleeEdge->getContextIds()) {
4683 auto *
Alloc = ContextIdToAllocationNode.
lookup(Id);
4687 MissingAllocForContextId++;
4690 CalleeEdgeToAllocNodes[CalleeEdge.get()].
insert(
Alloc);
4697 for (
auto CalleeEdge : CalleeEdges) {
4698 assert(CalleeEdge->Callee->CallerEdges.size() > 1);
4700 if (!PossibleOtherCallerNodes)
4702 auto &CurCalleeAllocNodes = CalleeEdgeToAllocNodes[CalleeEdge.get()];
4704 for (
auto &CalleeCallerE : CalleeEdge->Callee->CallerEdges) {
4706 if (CalleeCallerE == CalleeEdge)
4710 if (OtherCallersToSharedCalleeEdgeCount[CalleeCallerE->Caller] !=
4715 for (
auto Id : CalleeCallerE->getContextIds()) {
4716 auto *
Alloc = ContextIdToAllocationNode.
lookup(Id);
4721 if (!CurCalleeAllocNodes.contains(
Alloc)) {
4722 OtherCallersToSharedCalleeEdgeCount[CalleeCallerE->Caller] = 0;
4723 PossibleOtherCallerNodes--;
4730 if (!PossibleOtherCallerNodes)
4735 for (
auto &[OtherCaller,
Count] : OtherCallersToSharedCalleeEdgeCount) {
4736 if (
Count != NumCalleeClones)
4738 OtherCallersToShareMerge.
insert(OtherCaller);
4783template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4784bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
4791 DenseMap<ContextNode *, FuncInfo> CallsiteToCalleeFuncCloneMap;
4795 auto RecordCalleeFuncOfCallsite = [&](ContextNode *
Caller,
4796 const FuncInfo &CalleeFunc) {
4798 CallsiteToCalleeFuncCloneMap[
Caller] = CalleeFunc;
4802 struct FuncCloneInfo {
4807 DenseMap<CallInfo, CallInfo> CallMap;
4835 DenseMap<const ContextNode *, std::map<unsigned, SmallVector<CallInfo, 0>>>
4836 UnassignedCallClones;
4840 for (
auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) {
4841 FuncInfo OrigFunc(Func);
4846 std::vector<FuncCloneInfo> FuncCloneInfos;
4847 for (
auto &
Call : CallsWithMetadata) {
4848 ContextNode *
Node = getNodeForInst(
Call);
4852 if (!Node ||
Node->Clones.empty())
4855 "Not having a call should have prevented cloning");
4859 std::map<FuncInfo, ContextNode *> FuncCloneToCurNodeCloneMap;
4863 auto AssignCallsiteCloneToFuncClone = [&](
const FuncInfo &FuncClone,
4865 ContextNode *CallsiteClone,
4868 FuncCloneToCurNodeCloneMap[FuncClone] = CallsiteClone;
4870 assert(FuncCloneInfos.size() > FuncClone.cloneNo());
4871 DenseMap<CallInfo, CallInfo> &CallMap =
4872 FuncCloneInfos[FuncClone.cloneNo()].CallMap;
4873 CallInfo CallClone(
Call);
4874 if (
auto It = CallMap.
find(
Call); It != CallMap.
end())
4875 CallClone = It->second;
4876 CallsiteClone->setCall(CallClone);
4878 for (
auto &MatchingCall :
Node->MatchingCalls) {
4879 CallInfo CallClone(MatchingCall);
4880 if (
auto It = CallMap.
find(MatchingCall); It != CallMap.
end())
4881 CallClone = It->second;
4883 MatchingCall = CallClone;
4891 auto MoveEdgeToNewCalleeCloneAndSetUp =
4892 [&](
const std::shared_ptr<ContextEdge> &
Edge) {
4893 ContextNode *OrigCallee =
Edge->Callee;
4894 ContextNode *NewClone = moveEdgeToNewCalleeClone(
Edge);
4895 removeNoneTypeCalleeEdges(NewClone);
4896 assert(NewClone->AllocTypes != (uint8_t)AllocationType::None);
4900 if (CallsiteToCalleeFuncCloneMap.
count(OrigCallee))
4901 RecordCalleeFuncOfCallsite(
4902 NewClone, CallsiteToCalleeFuncCloneMap[OrigCallee]);
4909 std::deque<ContextNode *> ClonesWorklist;
4911 if (!
Node->emptyContextIds())
4912 ClonesWorklist.push_back(Node);
4918 unsigned NodeCloneCount = 0;
4919 while (!ClonesWorklist.empty()) {
4920 ContextNode *Clone = ClonesWorklist.front();
4921 ClonesWorklist.pop_front();
4930 if (FuncCloneInfos.size() < NodeCloneCount) {
4932 if (NodeCloneCount == 1) {
4937 Clone->CallerEdges, [&](
const std::shared_ptr<ContextEdge> &
E) {
4938 return CallsiteToCalleeFuncCloneMap.count(E->Caller);
4942 FuncCloneInfos.push_back(
4943 {OrigFunc, DenseMap<CallInfo, CallInfo>()});
4944 AssignCallsiteCloneToFuncClone(
4945 OrigFunc,
Call, Clone,
4946 AllocationCallToContextNodeMap.count(
Call));
4947 for (
auto &CE : Clone->CallerEdges) {
4949 if (!
CE->Caller->hasCall())
4951 RecordCalleeFuncOfCallsite(
CE->Caller, OrigFunc);
4961 FuncInfo PreviousAssignedFuncClone;
4963 Clone->CallerEdges, [&](
const std::shared_ptr<ContextEdge> &
E) {
4964 return CallsiteToCalleeFuncCloneMap.count(E->Caller);
4966 bool CallerAssignedToCloneOfFunc =
false;
4967 if (EI != Clone->CallerEdges.end()) {
4968 const std::shared_ptr<ContextEdge> &
Edge = *EI;
4969 PreviousAssignedFuncClone =
4970 CallsiteToCalleeFuncCloneMap[
Edge->Caller];
4971 CallerAssignedToCloneOfFunc =
true;
4976 DenseMap<CallInfo, CallInfo> NewCallMap;
4977 unsigned CloneNo = FuncCloneInfos.size();
4978 assert(CloneNo > 0 &&
"Clone 0 is the original function, which "
4979 "should already exist in the map");
4980 FuncInfo NewFuncClone = cloneFunctionForCallsite(
4981 OrigFunc,
Call, NewCallMap, CallsWithMetadata, CloneNo);
4982 FuncCloneInfos.push_back({NewFuncClone, std::move(NewCallMap)});
4983 FunctionClonesAnalysis++;
4989 if (!CallerAssignedToCloneOfFunc) {
4990 AssignCallsiteCloneToFuncClone(
4991 NewFuncClone,
Call, Clone,
4992 AllocationCallToContextNodeMap.count(
Call));
4993 for (
auto &CE : Clone->CallerEdges) {
4995 if (!
CE->Caller->hasCall())
4997 RecordCalleeFuncOfCallsite(
CE->Caller, NewFuncClone);
5009 auto CallerEdges = Clone->CallerEdges;
5010 for (
auto CE : CallerEdges) {
5012 if (
CE->isRemoved()) {
5018 if (!
CE->Caller->hasCall())
5021 if (!CallsiteToCalleeFuncCloneMap.
count(
CE->Caller) ||
5025 CallsiteToCalleeFuncCloneMap[
CE->Caller] !=
5026 PreviousAssignedFuncClone)
5029 RecordCalleeFuncOfCallsite(
CE->Caller, NewFuncClone);
5042 auto CalleeEdges =
CE->Caller->CalleeEdges;
5043 for (
auto CalleeEdge : CalleeEdges) {
5046 if (CalleeEdge->isRemoved()) {
5051 ContextNode *
Callee = CalleeEdge->Callee;
5055 if (Callee == Clone || !
Callee->hasCall())
5060 if (Callee == CalleeEdge->Caller)
5062 ContextNode *NewClone =
5063 MoveEdgeToNewCalleeCloneAndSetUp(CalleeEdge);
5066 removeNoneTypeCalleeEdges(Callee);
5074 CallInfo OrigCall(
Callee->getOrigNode()->Call);
5075 OrigCall.setCloneNo(0);
5076 DenseMap<CallInfo, CallInfo> &CallMap =
5077 FuncCloneInfos[NewFuncClone.cloneNo()].CallMap;
5079 CallInfo NewCall(CallMap[OrigCall]);
5081 NewClone->setCall(NewCall);
5083 for (
auto &MatchingCall : NewClone->MatchingCalls) {
5084 CallInfo OrigMatchingCall(MatchingCall);
5085 OrigMatchingCall.setCloneNo(0);
5087 CallInfo NewCall(CallMap[OrigMatchingCall]);
5090 MatchingCall = NewCall;
5099 auto FindFirstAvailFuncClone = [&]() {
5104 for (
auto &CF : FuncCloneInfos) {
5105 if (!FuncCloneToCurNodeCloneMap.count(CF.FuncClone))
5106 return CF.FuncClone;
5109 "Expected an available func clone for this callsite clone");
5126 std::map<FuncInfo, ContextNode *> FuncCloneToNewCallsiteCloneMap;
5127 FuncInfo FuncCloneAssignedToCurCallsiteClone;
5131 auto CloneCallerEdges = Clone->CallerEdges;
5132 for (
auto &
Edge : CloneCallerEdges) {
5136 if (
Edge->isRemoved())
5139 if (!
Edge->Caller->hasCall())
5143 if (CallsiteToCalleeFuncCloneMap.
count(
Edge->Caller)) {
5144 FuncInfo FuncCloneCalledByCaller =
5145 CallsiteToCalleeFuncCloneMap[
Edge->Caller];
5155 if ((FuncCloneToCurNodeCloneMap.count(FuncCloneCalledByCaller) &&
5156 FuncCloneToCurNodeCloneMap[FuncCloneCalledByCaller] !=
5164 (FuncCloneAssignedToCurCallsiteClone &&
5165 FuncCloneAssignedToCurCallsiteClone !=
5166 FuncCloneCalledByCaller)) {
5181 if (FuncCloneToNewCallsiteCloneMap.count(
5182 FuncCloneCalledByCaller)) {
5183 ContextNode *NewClone =
5184 FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller];
5185 moveEdgeToExistingCalleeClone(
Edge, NewClone);
5187 removeNoneTypeCalleeEdges(NewClone);
5190 ContextNode *NewClone = MoveEdgeToNewCalleeCloneAndSetUp(
Edge);
5191 FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller] =
5194 ClonesWorklist.push_back(NewClone);
5198 removeNoneTypeCalleeEdges(Clone);
5206 if (!FuncCloneAssignedToCurCallsiteClone) {
5207 FuncCloneAssignedToCurCallsiteClone = FuncCloneCalledByCaller;
5209 AssignCallsiteCloneToFuncClone(
5210 FuncCloneCalledByCaller,
Call, Clone,
5211 AllocationCallToContextNodeMap.count(
Call));
5215 assert(FuncCloneAssignedToCurCallsiteClone ==
5216 FuncCloneCalledByCaller);
5225 if (!FuncCloneAssignedToCurCallsiteClone) {
5226 FuncCloneAssignedToCurCallsiteClone = FindFirstAvailFuncClone();
5227 assert(FuncCloneAssignedToCurCallsiteClone);
5229 AssignCallsiteCloneToFuncClone(
5230 FuncCloneAssignedToCurCallsiteClone,
Call, Clone,
5231 AllocationCallToContextNodeMap.count(
Call));
5233 assert(FuncCloneToCurNodeCloneMap
5234 [FuncCloneAssignedToCurCallsiteClone] == Clone);
5236 RecordCalleeFuncOfCallsite(
Edge->Caller,
5237 FuncCloneAssignedToCurCallsiteClone);
5257 if (!FuncCloneAssignedToCurCallsiteClone) {
5258 FuncCloneAssignedToCurCallsiteClone = FindFirstAvailFuncClone();
5259 assert(FuncCloneAssignedToCurCallsiteClone &&
5260 "No available func clone for this callsite clone");
5261 AssignCallsiteCloneToFuncClone(
5262 FuncCloneAssignedToCurCallsiteClone,
Call, Clone,
5263 AllocationCallToContextNodeMap.contains(
Call));
5268 for (
const auto &PE :
Node->CalleeEdges)
5270 for (
const auto &CE :
Node->CallerEdges)
5272 for (
auto *Clone :
Node->Clones) {
5274 for (
const auto &PE : Clone->CalleeEdges)
5276 for (
const auto &CE : Clone->CallerEdges)
5282 if (FuncCloneInfos.size() < 2)
5288 for (
auto &
Call : CallsWithMetadata) {
5289 ContextNode *
Node = getNodeForInst(
Call);
5290 if (!Node || !
Node->hasCall() ||
Node->emptyContextIds())
5296 if (
Node->Clones.size() + 1 >= FuncCloneInfos.size())
5300 DenseSet<unsigned> NodeCallClones;
5301 for (
auto *
C :
Node->Clones)
5302 NodeCallClones.
insert(
C->Call.cloneNo());
5305 for (
auto &FC : FuncCloneInfos) {
5310 if (++
I == 1 || NodeCallClones.
contains(
I)) {
5315 auto &CallVector = UnassignedCallClones[
Node][
I];
5316 DenseMap<CallInfo, CallInfo> &CallMap =
FC.CallMap;
5317 if (
auto It = CallMap.
find(
Call); It != CallMap.
end()) {
5318 CallInfo CallClone = It->second;
5319 CallVector.push_back(CallClone);
5323 assert(
false &&
"Expected to find call in CallMap");
5326 for (
auto &MatchingCall :
Node->MatchingCalls) {
5327 if (
auto It = CallMap.
find(MatchingCall); It != CallMap.
end()) {
5328 CallInfo CallClone = It->second;
5329 CallVector.push_back(CallClone);
5333 assert(
false &&
"Expected to find call in CallMap");
5341 (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
5343 auto UpdateCalls = [&](ContextNode *
Node,
5344 DenseSet<const ContextNode *> &Visited,
5345 auto &&UpdateCalls) {
5346 auto Inserted = Visited.insert(Node);
5350 for (
auto *Clone :
Node->Clones)
5351 UpdateCalls(Clone, Visited, UpdateCalls);
5353 for (
auto &
Edge :
Node->CallerEdges)
5354 UpdateCalls(
Edge->Caller, Visited, UpdateCalls);
5358 if (!
Node->hasCall() ||
Node->emptyContextIds())
5361 if (
Node->IsAllocation) {
5362 auto AT = allocTypeToUse(
Node->AllocTypes);
5368 !ContextIdToContextSizeInfos.empty()) {
5369 uint64_t TotalCold = 0;
5371 for (
auto Id :
Node->getContextIds()) {
5372 auto TypeI = ContextIdToAllocationType.find(Id);
5373 assert(TypeI != ContextIdToAllocationType.end());
5374 auto CSI = ContextIdToContextSizeInfos.find(Id);
5375 if (CSI != ContextIdToContextSizeInfos.end()) {
5376 for (
auto &
Info : CSI->second) {
5378 if (TypeI->second == AllocationType::Cold)
5379 TotalCold +=
Info.TotalSize;
5384 AT = AllocationType::Cold;
5386 updateAllocationCall(
Node->Call, AT);
5391 if (!CallsiteToCalleeFuncCloneMap.
count(Node))
5394 auto CalleeFunc = CallsiteToCalleeFuncCloneMap[
Node];
5395 updateCall(
Node->Call, CalleeFunc);
5397 for (
auto &
Call :
Node->MatchingCalls)
5398 updateCall(
Call, CalleeFunc);
5402 if (!UnassignedCallClones.
contains(Node))
5404 DenseSet<unsigned> NodeCallClones;
5405 for (
auto *
C :
Node->Clones)
5406 NodeCallClones.
insert(
C->Call.cloneNo());
5408 auto &ClonedCalls = UnassignedCallClones[
Node];
5409 for (
auto &[CloneNo, CallVector] : ClonedCalls) {
5413 if (NodeCallClones.
contains(CloneNo))
5416 for (
auto &
Call : CallVector)
5417 updateCall(
Call, CalleeFunc);
5426 DenseSet<const ContextNode *> Visited;
5427 for (
auto &Entry : AllocationCallToContextNodeMap)
5428 UpdateCalls(
Entry.second, Visited, UpdateCalls);
5439 for (
auto &SN : FS->callsites()) {
5444 SN.Clones.size() >
I &&
5445 "Callsite summary has fewer entries than other summaries in function");
5446 if (SN.Clones.size() <=
I || !SN.Clones[
I])
5453 for (
auto &AN : FS->allocs()) {
5457 assert(AN.Versions.size() >
I &&
5458 "Alloc summary has fewer entries than other summaries in function");
5459 if (AN.Versions.size() <=
I ||
5476 NewGV->takeName(DeclGV);
5483 auto CloneFuncAliases = [&](
Function *NewF,
unsigned I) {
5484 if (!FuncToAliasMap.count(&
F))
5486 for (
auto *
A : FuncToAliasMap[&
F]) {
5488 auto *PrevA = M.getNamedAlias(AliasName);
5490 A->getType()->getPointerAddressSpace(),
5491 A->getLinkage(), AliasName, NewF);
5492 NewA->copyAttributesFrom(
A);
5494 TakeDeclNameAndReplace(PrevA, NewA);
5503 FunctionsClonedThinBackend++;
5520 for (
unsigned I = 1;
I < NumClones;
I++) {
5521 VMaps.
emplace_back(std::make_unique<ValueToValueMapTy>());
5528 FunctionCloneDuplicatesThinBackend++;
5529 auto *Func = HashToFunc[Hash];
5530 if (Func->hasAvailableExternallyLinkage()) {
5536 auto Decl = M.getOrInsertFunction(Name, Func->getFunctionType());
5538 <<
"created clone decl " <<
ore::NV(
"Decl", Decl.getCallee()));
5541 auto *PrevF = M.getFunction(Name);
5544 TakeDeclNameAndReplace(PrevF, Alias);
5546 <<
"created clone alias " <<
ore::NV(
"Alias", Alias));
5549 CloneFuncAliases(Func,
I);
5553 HashToFunc[Hash] = NewF;
5554 FunctionClonesThinBackend++;
5557 for (
auto &BB : *NewF) {
5558 for (
auto &Inst : BB) {
5559 Inst.setMetadata(LLVMContext::MD_memprof,
nullptr);
5560 Inst.setMetadata(LLVMContext::MD_callsite,
nullptr);
5565 TakeDeclNameAndReplace(PrevF, NewF);
5567 NewF->setName(Name);
5570 <<
"created clone " <<
ore::NV(
"NewFunction", NewF));
5573 CloneFuncAliases(NewF,
I);
5582 const Function *CallingFunc =
nullptr) {
5601 auto SrcFileMD =
F.getMetadata(
"thinlto_src_file");
5607 if (!SrcFileMD &&
F.isDeclaration()) {
5611 SrcFileMD = CallingFunc->getMetadata(
"thinlto_src_file");
5616 assert(SrcFileMD || OrigName ==
F.getName());
5618 StringRef SrcFile = M.getSourceFileName();
5630 if (!TheFnVI && OrigName ==
F.getName() &&
F.hasLocalLinkage() &&
5631 F.getName().contains(
'.')) {
5632 OrigName =
F.getName().rsplit(
'.').first;
5641 assert(TheFnVI ||
F.isDeclaration());
5645bool MemProfContextDisambiguation::initializeIndirectCallPromotionInfo(
5647 ICallAnalysis = std::make_unique<ICallPromotionAnalysis>();
5648 Symtab = std::make_unique<InstrProfSymtab>();
5659 if (
Error E = Symtab->create(M,
true,
false)) {
5660 std::string SymtabFailure =
toString(std::move(
E));
5661 M.getContext().emitError(
"Failed to create symtab: " + SymtabFailure);
5674 auto MIBIter = AllocNode.
MIBs.begin();
5675 for (
auto &MDOp : MemProfMD->
operands()) {
5677 auto StackIdIndexIter = MIBIter->StackIdIndices.begin();
5682 auto ContextIterBegin =
5686 (ContextIterBegin != StackContext.
end() && *ContextIterBegin == 0) ? 1
5688 for (
auto ContextIter = ContextIterBegin; ContextIter != StackContext.
end();
5693 if (LastStackContextId == *ContextIter)
5695 LastStackContextId = *ContextIter;
5696 assert(StackIdIndexIter != MIBIter->StackIdIndices.end());
5706bool MemProfContextDisambiguation::applyImport(
Module &M) {
5713 std::map<const Function *, SmallPtrSet<const GlobalAlias *, 1>>
5715 for (
auto &
A :
M.aliases()) {
5716 auto *Aliasee =
A.getAliaseeObject();
5718 FuncToAliasMap[
F].insert(&
A);
5721 if (!initializeIndirectCallPromotionInfo(M))
5728 OptimizationRemarkEmitter ORE(&
F);
5731 bool ClonesCreated =
false;
5732 unsigned NumClonesCreated = 0;
5733 auto CloneFuncIfNeeded = [&](
unsigned NumClones, FunctionSummary *
FS) {
5743 if (ClonesCreated) {
5744 assert(NumClonesCreated == NumClones);
5751 ClonesCreated =
true;
5752 NumClonesCreated = NumClones;
5755 auto CloneCallsite = [&](
const CallsiteInfo &StackNode, CallBase *CB,
5756 Function *CalledFunction, FunctionSummary *
FS) {
5758 CloneFuncIfNeeded(StackNode.
Clones.
size(), FS);
5770 if (CalledFunction != CB->getCalledOperand() &&
5771 (!GA || CalledFunction != GA->getAliaseeObject())) {
5772 SkippedCallsCloning++;
5778 auto CalleeOrigName = CalledFunction->getName();
5779 for (
unsigned J = 0; J < StackNode.
Clones.
size(); J++) {
5782 if (J > 0 && VMaps[J - 1]->
empty())
5786 if (!StackNode.
Clones[J])
5788 auto NewF =
M.getOrInsertFunction(
5790 CalledFunction->getFunctionType());
5804 ORE.emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofCall", CBClone)
5805 <<
ore::NV(
"Call", CBClone) <<
" in clone "
5807 <<
" assigned to call function clone "
5808 <<
ore::NV(
"Callee", NewF.getCallee()));
5822 ImportSummary->findSummaryInModule(TheFnVI,
M.getModuleIdentifier());
5826 auto SrcModuleMD =
F.getMetadata(
"thinlto_src_module");
5828 "enable-import-metadata is needed to emit thinlto_src_module");
5829 StringRef SrcModule =
5832 if (GVS->modulePath() == SrcModule) {
5833 GVSummary = GVS.get();
5837 assert(GVSummary && GVSummary->modulePath() == SrcModule);
5847 if (
FS->allocs().empty() &&
FS->callsites().empty())
5850 auto SI =
FS->callsites().begin();
5851 auto AI =
FS->allocs().begin();
5856 DenseMap<ValueInfo, CallsiteInfo> MapTailCallCalleeVIToCallsite;
5859 for (
auto CallsiteIt =
FS->callsites().rbegin();
5860 CallsiteIt !=
FS->callsites().rend(); CallsiteIt++) {
5861 auto &Callsite = *CallsiteIt;
5865 if (!Callsite.StackIdIndices.empty())
5867 MapTailCallCalleeVIToCallsite.
insert({Callsite.Callee, Callsite});
5876 for (
auto &BB :
F) {
5877 for (
auto &
I : BB) {
5883 auto *CalledValue = CB->getCalledOperand();
5884 auto *CalledFunction = CB->getCalledFunction();
5885 if (CalledValue && !CalledFunction) {
5886 CalledValue = CalledValue->stripPointerCasts();
5893 assert(!CalledFunction &&
5894 "Expected null called function in callsite for alias");
5898 CallStack<MDNode, MDNode::op_iterator> CallsiteContext(
5899 I.getMetadata(LLVMContext::MD_callsite));
5900 auto *MemProfMD =
I.getMetadata(LLVMContext::MD_memprof);
5906 if (CB->getAttributes().hasFnAttr(
"memprof") && !MemProfMD) {
5907 CB->getAttributes().getFnAttr(
"memprof").getValueAsString() ==
"cold"
5908 ? AllocTypeColdThinBackend++
5909 : AllocTypeNotColdThinBackend++;
5910 OrigAllocsThinBackend++;
5911 AllocVersionsThinBackend++;
5912 if (!MaxAllocVersionsThinBackend)
5913 MaxAllocVersionsThinBackend = 1;
5920 auto &AllocNode = *(AI++);
5928 CloneFuncIfNeeded(AllocNode.Versions.size(), FS);
5930 OrigAllocsThinBackend++;
5931 AllocVersionsThinBackend += AllocNode.Versions.size();
5932 if (MaxAllocVersionsThinBackend < AllocNode.Versions.size())
5933 MaxAllocVersionsThinBackend = AllocNode.Versions.size();
5943 if (AllocNode.Versions.size() == 1 &&
5946 AllocationType::NotCold ||
5948 AllocationType::None);
5949 UnclonableAllocsThinBackend++;
5955 return Type == ((uint8_t)AllocationType::NotCold |
5956 (uint8_t)AllocationType::Cold);
5960 for (
unsigned J = 0; J < AllocNode.Versions.size(); J++) {
5963 if (J > 0 && VMaps[J - 1]->
empty())
5966 if (AllocNode.Versions[J] == (uint8_t)AllocationType::None)
5969 AllocTy == AllocationType::Cold ? AllocTypeColdThinBackend++
5970 : AllocTypeNotColdThinBackend++;
5985 ORE.emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofAttribute", CBClone)
5986 <<
ore::NV(
"AllocationCall", CBClone) <<
" in clone "
5988 <<
" marked with memprof allocation attribute "
5989 <<
ore::NV(
"Attribute", AllocTypeString));
5991 }
else if (!CallsiteContext.empty()) {
5992 if (!CalledFunction) {
5996 assert(!CI || !CI->isInlineAsm());
6006 recordICPInfo(CB,
FS->callsites(), SI, ICallAnalysisInfo);
6012 CloneFuncIfNeeded(NumClones, FS);
6017 assert(SI !=
FS->callsites().end());
6018 auto &StackNode = *(
SI++);
6024 for (
auto StackId : CallsiteContext) {
6026 assert(ImportSummary->getStackIdAtIndex(*StackIdIndexIter) ==
6032 CloneCallsite(StackNode, CB, CalledFunction, FS);
6034 }
else if (CB->isTailCall() && CalledFunction) {
6037 ValueInfo CalleeVI =
6039 if (CalleeVI && MapTailCallCalleeVIToCallsite.
count(CalleeVI)) {
6040 auto Callsite = MapTailCallCalleeVIToCallsite.
find(CalleeVI);
6041 assert(Callsite != MapTailCallCalleeVIToCallsite.
end());
6042 CloneCallsite(Callsite->second, CB, CalledFunction, FS);
6049 performICP(M,
FS->callsites(), VMaps, ICallAnalysisInfo, ORE);
6059 for (
auto &BB :
F) {
6060 for (
auto &
I : BB) {
6063 I.setMetadata(LLVMContext::MD_memprof,
nullptr);
6064 I.setMetadata(LLVMContext::MD_callsite,
nullptr);
6072unsigned MemProfContextDisambiguation::recordICPInfo(
6077 uint32_t NumCandidates;
6078 uint64_t TotalCount;
6079 auto CandidateProfileData =
6080 ICallAnalysis->getPromotionCandidatesForInstruction(
6082 if (CandidateProfileData.empty())
6088 bool ICPNeeded =
false;
6089 unsigned NumClones = 0;
6090 size_t CallsiteInfoStartIndex = std::distance(AllCallsites.
begin(), SI);
6091 for (
const auto &Candidate : CandidateProfileData) {
6093 auto CalleeValueInfo =
6095 ImportSummary->getValueInfo(Candidate.Value);
6098 assert(!CalleeValueInfo ||
SI->Callee == CalleeValueInfo);
6100 auto &StackNode = *(
SI++);
6105 [](
unsigned CloneNo) { return CloneNo != 0; });
6115 ICallAnalysisInfo.
push_back({CB, CandidateProfileData.vec(), NumCandidates,
6116 TotalCount, CallsiteInfoStartIndex});
6120void MemProfContextDisambiguation::performICP(
6122 ArrayRef<std::unique_ptr<ValueToValueMapTy>> VMaps,
6124 OptimizationRemarkEmitter &ORE) {
6131 for (
auto &
Info : ICallAnalysisInfo) {
6134 auto TotalCount =
Info.TotalCount;
6135 unsigned NumPromoted = 0;
6136 unsigned NumClones = 0;
6138 for (
auto &Candidate :
Info.CandidateProfileData) {
6149 Function *TargetFunction = Symtab->getFunction(Candidate.Value);
6150 if (TargetFunction ==
nullptr ||
6158 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnableToFindTarget", CB)
6159 <<
"Memprof cannot promote indirect call: target with md5sum "
6160 <<
ore::NV(
"target md5sum", Candidate.Value) <<
" not found";
6169 const char *Reason =
nullptr;
6172 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnableToPromote", CB)
6173 <<
"Memprof cannot promote indirect call to "
6174 <<
ore::NV(
"TargetFunction", TargetFunction)
6175 <<
" with count of " <<
ore::NV(
"TotalCount", TotalCount)
6186 CallBase *CBClone = CB;
6187 for (
unsigned J = 0; J < NumClones; J++) {
6190 if (J > 0 && VMaps[J - 1]->
empty())
6200 TotalCount, isSamplePGO, &ORE);
6201 auto *TargetToUse = TargetFunction;
6204 if (StackNode.
Clones[J]) {
6223 <<
ore::NV(
"Call", CBClone) <<
" in clone "
6225 <<
" promoted and assigned to call function clone "
6226 <<
ore::NV(
"Callee", TargetToUse));
6230 TotalCount -= Candidate.Count;
6235 CallBase *CBClone = CB;
6236 for (
unsigned J = 0; J < NumClones; J++) {
6239 if (J > 0 && VMaps[J - 1]->
empty())
6245 CBClone->
setMetadata(LLVMContext::MD_prof,
nullptr);
6248 if (TotalCount != 0)
6250 M, *CBClone,
ArrayRef(
Info.CandidateProfileData).slice(NumPromoted),
6251 TotalCount, IPVK_IndirectCallTarget,
Info.NumCandidates);
6256template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
6257bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::process() {
6259 dbgs() <<
"CCG before cloning:\n";
6263 exportToDot(
"postbuild");
6276 dbgs() <<
"CCG after cloning:\n";
6280 exportToDot(
"cloned");
6282 bool Changed = assignFunctions();
6285 dbgs() <<
"CCG after assigning function clones:\n";
6289 exportToDot(
"clonefuncassign");
6292 printTotalSizes(
errs());
6297bool MemProfContextDisambiguation::processModule(
6299 llvm::function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter) {
6304 return applyImport(M);
6317 ModuleCallsiteContextGraph CCG(M, OREGetter);
6318 return CCG.process();
6323 : ImportSummary(Summary), isSamplePGO(isSamplePGO) {
6328 "-memprof-dot-scope=alloc requires -memprof-dot-alloc-id");
6332 "-memprof-dot-scope=context requires -memprof-dot-context-id");
6336 "-memprof-dot-scope=all can't have both -memprof-dot-alloc-id and "
6337 "-memprof-dot-context-id");
6338 if (ImportSummary) {
6348 auto ReadSummaryFile =
6350 if (!ReadSummaryFile) {
6357 if (!ImportSummaryForTestingOrErr) {
6363 ImportSummaryForTesting = std::move(*ImportSummaryForTestingOrErr);
6364 ImportSummary = ImportSummaryForTesting.get();
6373 if (!processModule(M, OREGetter))
6390 IndexCallsiteContextGraph CCG(Index, isPrevailing);
6406 for (
auto &BB :
F) {
6407 for (
auto &
I : BB) {
6411 if (CI->hasFnAttr(
"memprof")) {
6412 CI->removeFnAttr(
"memprof");
6415 if (!CI->hasMetadata(LLVMContext::MD_callsite)) {
6416 assert(!CI->hasMetadata(LLVMContext::MD_memprof));
6422 CI->setMetadata(LLVMContext::MD_memprof,
nullptr);
6423 CI->setMetadata(LLVMContext::MD_callsite,
nullptr);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Prepare AGPR Alloc
Unify divergent function exit nodes
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
Module.h This file contains the declarations for the Module class.
Machine Check Debug Module
This file implements a map that provides insertion order iteration.
static cl::opt< unsigned > TailCallSearchDepth("memprof-tail-call-search-depth", cl::init(5), cl::Hidden, cl::desc("Max depth to recursively search for missing " "frames through tail calls."))
uint64_t ComputeHash(const FunctionSummary *FS, unsigned I)
static cl::opt< DotScope > DotGraphScope("memprof-dot-scope", cl::desc("Scope of graph to export to dot"), cl::Hidden, cl::init(DotScope::All), cl::values(clEnumValN(DotScope::All, "all", "Export full callsite graph"), clEnumValN(DotScope::Alloc, "alloc", "Export only nodes with contexts feeding given " "-memprof-dot-alloc-id"), clEnumValN(DotScope::Context, "context", "Export only nodes with given -memprof-dot-context-id")))
static cl::opt< bool > DoMergeIteration("memprof-merge-iteration", cl::init(true), cl::Hidden, cl::desc("Iteratively apply merging on a node to catch new callers"))
static bool isMemProfClone(const Function &F)
static cl::opt< unsigned > AllocIdForDot("memprof-dot-alloc-id", cl::init(0), cl::Hidden, cl::desc("Id of alloc to export if -memprof-dot-scope=alloc " "or to highlight if -memprof-dot-scope=all"))
static cl::opt< unsigned > ContextIdForDot("memprof-dot-context-id", cl::init(0), cl::Hidden, cl::desc("Id of context to export if -memprof-dot-scope=context or to " "highlight otherwise"))
static cl::opt< bool > ExportToDot("memprof-export-to-dot", cl::init(false), cl::Hidden, cl::desc("Export graph to dot files."))
static void checkEdge(const std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > &Edge)
static cl::opt< bool > AllowRecursiveCallsites("memprof-allow-recursive-callsites", cl::init(true), cl::Hidden, cl::desc("Allow cloning of callsites involved in recursive cycles"))
bool checkColdOrNotCold(uint8_t AllocType)
static ValueInfo findValueInfoForFunc(const Function &F, const Module &M, const ModuleSummaryIndex *ImportSummary, const Function *CallingFunc=nullptr)
static cl::opt< bool > CloneRecursiveContexts("memprof-clone-recursive-contexts", cl::init(true), cl::Hidden, cl::desc("Allow cloning of contexts through recursive cycles"))
static std::string getAllocTypeString(uint8_t AllocTypes)
static cl::opt< unsigned > MemProfICPNoInlineThreshold("memprof-icp-noinline-threshold", cl::init(2), cl::Hidden, cl::desc("Minimum absolute count for promoted target to be inlinable"))
bool DOTGraphTraits< constCallsiteContextGraph< DerivedCCG, FuncTy, CallTy > * >::DoHighlight
static unsigned getMemProfCloneNum(const Function &F)
static SmallVector< std::unique_ptr< ValueToValueMapTy >, 4 > createFunctionClones(Function &F, unsigned NumClones, Module &M, OptimizationRemarkEmitter &ORE, std::map< const Function *, SmallPtrSet< const GlobalAlias *, 1 > > &FuncToAliasMap, FunctionSummary *FS)
static cl::opt< bool > VerifyCCG("memprof-verify-ccg", cl::init(false), cl::Hidden, cl::desc("Perform verification checks on CallingContextGraph."))
static void checkNode(const ContextNode< DerivedCCG, FuncTy, CallTy > *Node, bool CheckEdges=true)
static cl::opt< bool > MergeClones("memprof-merge-clones", cl::init(true), cl::Hidden, cl::desc("Merge clones before assigning functions"))
static std::string getMemProfFuncName(Twine Base, unsigned CloneNo)
static cl::opt< std::string > MemProfImportSummary("memprof-import-summary", cl::desc("Import summary to use for testing the ThinLTO backend via opt"), cl::Hidden)
static const std::string MemProfCloneSuffix
static void updateSubprogramLinkageName(Function *NewFunc, StringRef Name)
static cl::opt< bool > AllowRecursiveContexts("memprof-allow-recursive-contexts", cl::init(true), cl::Hidden, cl::desc("Allow cloning of contexts having recursive cycles"))
static cl::opt< std::string > DotFilePathPrefix("memprof-dot-file-path-prefix", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path prefix of the MemProf dot files."))
static cl::opt< bool > VerifyNodes("memprof-verify-nodes", cl::init(false), cl::Hidden, cl::desc("Perform frequent verification checks on nodes."))
static void checkAllocContextIds(const AllocInfo &AllocNode, const MDNode *MemProfMD, const CallStack< MDNode, MDNode::op_iterator > &CallsiteContext, const ModuleSummaryIndex *ImportSummary)
static cl::opt< bool > DumpCCG("memprof-dump-ccg", cl::init(false), cl::Hidden, cl::desc("Dump CallingContextGraph to stdout after each stage."))
This is the interface to build a ModuleSummaryIndex for a module.
ModuleSummaryIndex.h This file contains the declarations the classes that hold the module index and s...
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
FunctionAnalysisManager FAM
std::pair< BasicBlock *, BasicBlock * > Edge
This file defines generic set operations that may be used on set's of different types,...
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
void print(OutputBuffer &OB) const
ValueInfo getAliaseeVI() const
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
bool empty() const
empty - Check if the array is empty.
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
void addFnAttr(Attribute::AttrKind Kind)
Adds the attribute to the function.
void setCalledOperand(Value *V)
Subprogram description. Uses SubclassData1.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
void reserve(size_type NumEntries)
Grow the densemap so that it can contain at least NumEntries items before resizing again.
Implements a dense probed hash-table based set.
Function summary information to aid decisions and implementation of importing.
FunctionType * getFunctionType() const
Returns the FunctionType for me.
DISubprogram * getSubprogram() const
Get the attached subprogram.
const Function & getFunction() const
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
static LLVM_ABI GlobalAlias * create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent)
If a parent module is specified, the alias is automatically inserted into the end of the specified mo...
Function and variable summary information to aid decisions and implementation of importing.
static LLVM_ABI GUID getGUIDAssumingExternalLinkage(StringRef GlobalName)
Return a 64-bit global unique ID constructed from the name of a global symbol.
static bool isLocalLinkage(LinkageTypes Linkage)
LLVM_ABI bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
uint64_t GUID
Declare a type to represent a global unique identifier for a global value.
Module * getParent()
Get the module that this global value is contained inside of...
LLVM_ABI void eraseFromParent()
This method unlinks 'this' from the containing module and deletes it.
static LLVM_ABI std::string getGlobalIdentifier(StringRef Name, GlobalValue::LinkageTypes Linkage, StringRef FileName)
Return the modified name for a global value suitable to be used as the key for a global lookup (e....
@ InternalLinkage
Rename collisions when linking (static functions).
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
const MDOperand & getOperand(unsigned I) const
ArrayRef< MDOperand > operands() const
unsigned getNumOperands() const
Return number of MDNode operands.
LLVM_ABI TempMDNode clone() const
Create a (temporary) clone of this.
static std::enable_if_t< std::is_base_of< MDNode, T >::value, T * > replaceWithUniqued(std::unique_ptr< T, TempMDNodeDeleter > N)
Replace a temporary node with a uniqued one.
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
This class implements a map that also provides access to all stored values in a deterministic order.
size_type count(const KeyT &Key) const
MemProfContextDisambiguation(const ModuleSummaryIndex *Summary=nullptr, bool isSamplePGO=false)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
Class to hold module path string table and global value map, and encapsulate methods for operating on...
static StringRef getOriginalNameBeforePromote(StringRef Name)
Helper to obtain the unpromoted name for a global value (or the original name if not promoted).
ValueInfo getValueInfo(const GlobalValueSummaryMapTy::value_type &R) const
Return a ValueInfo for the index value_type (convenient when iterating index).
uint64_t getStackIdAtIndex(unsigned Index) const
A Module instance is used to store all the information related to an LLVM module.
LLVMContext & getContext() const
Get the global data context.
A NodeSet contains a set of SUnit DAG nodes with additional information that assigns a priority to th...
A discriminated union of two or more pointer types, with the discriminator in the low bit of the poin...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
A class that wrap the SHA1 algorithm.
LLVM_ABI void update(ArrayRef< uint8_t > Data)
Digest more data.
LLVM_ABI std::array< uint8_t, 20 > result()
Return the current raw 160-bits SHA1 for the digested data since the last call to init().
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
std::pair< iterator, bool > insert(const ValueT &V)
void reserve(size_t Size)
Grow the DenseSet so that it can contain at least NumEntries items before resizing again.
void insert_range(Range &&R)
void swap(DenseSetImpl &RHS)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
bool erase(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
An efficient, type-erasing, non-owning reference to a callable.
Helper class to iterate through stack ids in both metadata (memprof MIB and callsite) and the corresp...
CallStackIterator beginAfterSharedPrefix(const CallStack &Other)
CallStackIterator end() const
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
@ CE
Windows NT (Windows on ARM)
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract(Y &&MD)
Extract a Value from Metadata, if any.
LLVM_ABI AllocationType getMIBAllocType(const MDNode *MIB)
Returns the allocation type from an MIB metadata node.
LLVM_ABI bool metadataMayIncludeContextSizeInfo()
Whether the alloc memprof metadata may include context size info for some MIBs (but possibly not all)...
LLVM_ABI bool hasSingleAllocType(uint8_t AllocTypes)
True if the AllocTypes bitmask contains just a single type.
LLVM_ABI std::string getAllocTypeAttributeString(AllocationType Type)
Returns the string to use in attributes with the given type.
LLVM_ABI MDNode * getMIBStackNode(const MDNode *MIB)
Returns the stack node from an MIB metadata node.
LLVM_ABI void removeAnyExistingAmbiguousAttribute(CallBase *CB)
Removes any existing "ambiguous" memprof attribute.
DiagnosticInfoOptimizationBase::Argument NV
LLVM_ABI CallBase & promoteIndirectCall(CallBase &CB, Function *F, uint64_t Count, uint64_t TotalCount, bool AttachProfToDirectCall, OptimizationRemarkEmitter *ORE)
NodeAddr< NodeBase * > Node
NodeAddr< FuncNode * > Func
friend class Instruction
Iterator for Instructions in a `BasicBlock.
uint64_t read64le(const void *P)
void write32le(void *P, uint32_t V)
This is an optimization pass for GlobalISel generic memory operations.
cl::opt< unsigned > MinClonedColdBytePercent("memprof-cloning-cold-threshold", cl::init(100), cl::Hidden, cl::desc("Min percent of cold bytes to hint alloc cold during cloning"))
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
LLVM_ABI void logAllUnhandledErrors(Error E, raw_ostream &OS, Twine ErrorBanner={})
Log all errors (if any) in E to OS.
FunctionAddr VTableAddr Value
void stable_sort(R &&Range)
cl::opt< bool > MemProfReportHintedSizes("memprof-report-hinted-sizes", cl::init(false), cl::Hidden, cl::desc("Report total allocation sizes of hinted allocations"))
LLVM_ABI bool isLegalToPromote(const CallBase &CB, Function *Callee, const char **FailureReason=nullptr)
Return true if the given indirect call site can be made to call Callee.
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI bool mayHaveMemprofSummary(const CallBase *CB)
Returns true if the instruction could have memprof metadata, used to ensure consistency between summa...
constexpr from_range_t from_range
static cl::opt< bool > MemProfRequireDefinitionForPromotion("memprof-require-definition-for-promotion", cl::init(false), cl::Hidden, cl::desc("Require target function definition when promoting indirect calls"))
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
cl::opt< unsigned > MemProfTopNImportant("memprof-top-n-important", cl::init(10), cl::Hidden, cl::desc("Number of largest cold contexts to consider important"))
bool set_is_subset(const S1Ty &S1, const S2Ty &S2)
set_is_subset(A, B) - Return true iff A in B
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
void set_subtract(S1Ty &S1, const S2Ty &S2)
set_subtract(A, B) - Compute A := A - B
InnerAnalysisManagerProxy< FunctionAnalysisManager, Module > FunctionAnalysisManagerModuleProxy
Provide the FunctionAnalysisManager to Module proxy.
raw_ostream & WriteGraph(raw_ostream &O, const GraphType &G, bool ShortNames=false, const Twine &Title="")
bool set_intersects(const S1Ty &S1, const S2Ty &S2)
set_intersects(A, B) - Return true iff A ^ B is non empty
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
LLVM_ABI Expected< std::unique_ptr< ModuleSummaryIndex > > getModuleSummaryIndex(MemoryBufferRef Buffer)
Parse the specified bitcode buffer, returning the module summary index.
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
cl::opt< unsigned > MaxSummaryIndirectEdges("module-summary-max-indirect-edges", cl::init(0), cl::Hidden, cl::desc("Max number of summary edges added from " "indirect call profile metadata"))
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
FunctionAddr VTableAddr Count
bool set_union(S1Ty &S1, const S2Ty &S2)
set_union(A, B) - Compute A := A u B, return whether A changed.
cl::opt< bool > SupportsHotColdNew
Indicate we are linking with an allocator that supports hot/cold operator new interfaces.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
S1Ty set_intersection(const S1Ty &S1, const S2Ty &S2)
set_intersection(A, B) - Return A ^ B
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
cl::opt< bool > EnableMemProfContextDisambiguation
Enable MemProf context disambiguation for thin link.
S1Ty set_difference(const S1Ty &S1, const S2Ty &S2)
set_difference(A, B) - Return A - B
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Expected< T > errorOrToExpected(ErrorOr< T > &&EO)
Convert an ErrorOr<T> to an Expected<T>.
ArrayRef(const T &OneElt) -> ArrayRef< T >
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
LLVM_ABI Function * CloneFunction(Function *F, ValueToValueMapTy &VMap, ClonedCodeInfo *CodeInfo=nullptr)
Return a copy of the specified function and add it to that function's module.
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
cl::opt< bool > MemProfFixupImportant("memprof-fixup-important", cl::init(true), cl::Hidden, cl::desc("Enables edge fixup for important contexts"))
DOTGraphTraits(bool IsSimple=false)
typename GTraits::NodeRef NodeRef
static std::string getEdgeAttributes(NodeRef, ChildIteratorType ChildIter, GraphType G)
const CallsiteContextGraph< DerivedCCG, FuncTy, CallTy > * GraphType
typename GTraits::ChildIteratorType ChildIteratorType
static std::string getNodeAttributes(NodeRef Node, GraphType G)
static bool isNodeHidden(NodeRef Node, GraphType G)
static std::string getNodeLabel(NodeRef Node, GraphType G)
GraphTraits< GraphType > GTraits
static NodeRef getNode(const NodePtrTy &P)
static const ContextNode< DerivedCCG, FuncTy, CallTy > * GetCallee(const EdgePtrTy &P)
static ChildIteratorType child_end(NodeRef N)
std::unique_ptr< ContextNode< DerivedCCG, FuncTy, CallTy > > NodePtrTy
mapped_iterator< typename std::vector< std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > >::const_iterator, decltype(&GetCallee)> ChildIteratorType
const CallsiteContextGraph< DerivedCCG, FuncTy, CallTy > * GraphType
const ContextNode< DerivedCCG, FuncTy, CallTy > * NodeRef
mapped_iterator< typename std::vector< NodePtrTy >::const_iterator, decltype(&getNode)> nodes_iterator
static ChildIteratorType child_begin(NodeRef N)
static NodeRef getEntryNode(GraphType G)
static nodes_iterator nodes_begin(GraphType G)
static nodes_iterator nodes_end(GraphType G)
std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > EdgePtrTy
Summary of memprof metadata on allocations.
std::vector< MIBInfo > MIBs
SmallVector< unsigned > StackIdIndices
SmallVector< unsigned > Clones
DOTGraphTraits - Template class that can be specialized to customize how graphs are converted to 'dot...
DefaultDOTGraphTraits(bool simple=false)
An information struct used to provide DenseMap with the various necessary components for a given valu...
typename GraphType::UnknownGraphTypeError NodeRef
Struct that holds a reference to a particular GUID in a global value summary.
ArrayRef< std::unique_ptr< GlobalValueSummary > > getSummaryList() const
GlobalValue::GUID getGUID() const
PointerUnion< CallsiteInfo *, AllocInfo * > SimpleType
static SimpleType getSimplifiedValue(IndexCall &Val)
const PointerUnion< CallsiteInfo *, AllocInfo * > SimpleType
static SimpleType getSimplifiedValue(const IndexCall &Val)
Define a template that can be specialized by smart pointers to reflect the fact that they are automat...