52#include <unordered_map>
57#define DEBUG_TYPE "memprof-context-disambiguation"
60 "Number of function clones created during whole program analysis");
62 "Number of function clones created during ThinLTO backend");
64 "Number of functions that had clones created during ThinLTO backend");
66 FunctionCloneDuplicatesThinBackend,
67 "Number of function clone duplicates detected during ThinLTO backend");
68STATISTIC(AllocTypeNotCold,
"Number of not cold static allocations (possibly "
69 "cloned) during whole program analysis");
70STATISTIC(AllocTypeCold,
"Number of cold static allocations (possibly cloned) "
71 "during whole program analysis");
73 "Number of not cold static allocations (possibly cloned) during "
75STATISTIC(AllocTypeColdThinBackend,
"Number of cold static allocations "
76 "(possibly cloned) during ThinLTO backend");
78 "Number of original (not cloned) allocations with memprof profiles "
79 "during ThinLTO backend");
81 AllocVersionsThinBackend,
82 "Number of allocation versions (including clones) during ThinLTO backend");
84 "Maximum number of allocation versions created for an original "
85 "allocation during ThinLTO backend");
87 "Number of unclonable ambigous allocations during ThinLTO backend");
89 "Number of edges removed due to mismatched callees (profiled vs IR)");
91 "Number of profiled callees found via tail calls");
93 "Aggregate depth of profiled callees found via tail calls");
95 "Maximum depth of profiled callees found via tail calls");
97 "Number of profiled callees found via multiple tail call chains");
98STATISTIC(DeferredBackedges,
"Number of backedges with deferred cloning");
99STATISTIC(NewMergedNodes,
"Number of new nodes created during merging");
100STATISTIC(NonNewMergedNodes,
"Number of non new nodes used during merging");
102 "Number of missing alloc nodes for context ids");
104 "Number of calls skipped during cloning due to unexpected operand");
106 "Number of callsites assigned to call multiple non-matching clones");
107STATISTIC(TotalMergeInvokes,
"Number of merge invocations for nodes");
108STATISTIC(TotalMergeIters,
"Number of merge iterations for nodes");
109STATISTIC(MaxMergeIters,
"Max merge iterations for nodes");
110STATISTIC(NumImportantContextIds,
"Number of important context ids");
111STATISTIC(NumFixupEdgeIdsInserted,
"Number of fixup edge ids inserted");
112STATISTIC(NumFixupEdgesAdded,
"Number of fixup edges added");
113STATISTIC(NumFixedContexts,
"Number of contexts with fixed edges");
115 "Number of aliasees prevailing in a different module than its alias");
120 cl::desc(
"Specify the path prefix of the MemProf dot files."));
124 cl::desc(
"Export graph to dot files."));
129 cl::desc(
"Iteratively apply merging on a node to catch new callers"));
139 "memprof-dot-scope",
cl::desc(
"Scope of graph to export to dot"),
144 "Export only nodes with contexts feeding given "
145 "-memprof-dot-alloc-id"),
147 "Export only nodes with given -memprof-dot-context-id")));
151 cl::desc(
"Id of alloc to export if -memprof-dot-scope=alloc "
152 "or to highlight if -memprof-dot-scope=all"));
156 cl::desc(
"Id of context to export if -memprof-dot-scope=context or to "
157 "highlight otherwise"));
161 cl::desc(
"Dump CallingContextGraph to stdout after each stage."));
165 cl::desc(
"Perform verification checks on CallingContextGraph."));
169 cl::desc(
"Perform frequent verification checks on nodes."));
172 "memprof-import-summary",
173 cl::desc(
"Import summary to use for testing the ThinLTO backend via opt"),
179 cl::desc(
"Max depth to recursively search for missing "
180 "frames through tail calls."));
185 cl::desc(
"Allow cloning of callsites involved in recursive cycles"));
189 cl::desc(
"Allow cloning of contexts through recursive cycles"));
196 cl::desc(
"Merge clones before assigning functions"));
205 cl::desc(
"Allow cloning of contexts having recursive cycles"));
211 cl::desc(
"Minimum absolute count for promoted target to be inlinable"));
222 cl::desc(
"Linking with hot/cold operator new interfaces"));
227 "Require target function definition when promoting indirect calls"));
234 cl::desc(
"Number of largest cold contexts to consider important"));
238 cl::desc(
"Enables edge fixup for important contexts"));
260template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
261class CallsiteContextGraph {
263 CallsiteContextGraph() =
default;
264 CallsiteContextGraph(
const CallsiteContextGraph &) =
default;
265 CallsiteContextGraph(CallsiteContextGraph &&) =
default;
272 void identifyClones();
279 bool assignFunctions();
286 const CallsiteContextGraph &CCG) {
292 const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>;
294 const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>;
296 void exportToDot(std::string Label)
const;
299 struct FuncInfo final
300 :
public std::pair<FuncTy *, unsigned > {
301 using Base = std::pair<FuncTy *, unsigned>;
303 FuncInfo(FuncTy *
F =
nullptr,
unsigned CloneNo = 0) :
Base(
F, CloneNo) {}
304 explicit operator bool()
const {
return this->first !=
nullptr; }
305 FuncTy *func()
const {
return this->first; }
306 unsigned cloneNo()
const {
return this->second; }
310 struct CallInfo final :
public std::pair<CallTy, unsigned > {
311 using Base = std::pair<CallTy, unsigned>;
313 CallInfo(CallTy
Call =
nullptr,
unsigned CloneNo = 0)
315 explicit operator bool()
const {
return (
bool)this->first; }
316 CallTy call()
const {
return this->first; }
317 unsigned cloneNo()
const {
return this->second; }
318 void setCloneNo(
unsigned N) { this->second =
N; }
320 if (!
operator bool()) {
326 OS <<
"\t(clone " << cloneNo() <<
")";
352 bool Recursive =
false;
379 std::vector<std::shared_ptr<ContextEdge>> CalleeEdges;
383 std::vector<std::shared_ptr<ContextEdge>> CallerEdges;
387 bool useCallerEdgesForContextInfo()
const {
392 assert(!CalleeEdges.empty() || CallerEdges.empty() || IsAllocation ||
410 for (
auto &Edge : CalleeEdges.empty() ? CallerEdges : CalleeEdges)
411 Count += Edge->getContextIds().size();
415 CalleeEdges, useCallerEdgesForContextInfo()
417 : std::vector<std::shared_ptr<ContextEdge>>());
418 for (
const auto &Edge : Edges)
425 uint8_t computeAllocType()
const {
430 CalleeEdges, useCallerEdgesForContextInfo()
432 : std::vector<std::shared_ptr<ContextEdge>>());
433 for (
const auto &Edge : Edges) {
444 bool emptyContextIds()
const {
446 CalleeEdges, useCallerEdgesForContextInfo()
448 : std::vector<std::shared_ptr<ContextEdge>>());
449 for (
const auto &Edge : Edges) {
450 if (!Edge->getContextIds().empty())
457 std::vector<ContextNode *> Clones;
460 ContextNode *CloneOf =
nullptr;
462 ContextNode(
bool IsAllocation) : IsAllocation(IsAllocation),
Call() {}
464 ContextNode(
bool IsAllocation, CallInfo
C)
465 : IsAllocation(IsAllocation),
Call(
C) {}
467 void addClone(ContextNode *Clone) {
469 CloneOf->Clones.push_back(Clone);
470 Clone->CloneOf = CloneOf;
472 Clones.push_back(Clone);
474 Clone->CloneOf =
this;
478 ContextNode *getOrigNode() {
485 unsigned int ContextId);
487 ContextEdge *findEdgeFromCallee(
const ContextNode *Callee);
488 ContextEdge *findEdgeFromCaller(
const ContextNode *Caller);
489 void eraseCalleeEdge(
const ContextEdge *Edge);
490 void eraseCallerEdge(
const ContextEdge *Edge);
492 void setCall(CallInfo
C) {
Call =
C; }
494 bool hasCall()
const {
return (
bool)
Call.call(); }
500 bool isRemoved()
const {
536 bool IsBackedge =
false;
543 : Callee(Callee), Caller(Caller), AllocTypes(
AllocType),
544 ContextIds(std::move(ContextIds)) {}
550 inline void clear() {
560 inline bool isRemoved()
const {
561 if (Callee || Caller)
582 void removeNoneTypeCalleeEdges(ContextNode *
Node);
583 void removeNoneTypeCallerEdges(ContextNode *
Node);
585 recursivelyRemoveNoneTypeCalleeEdges(ContextNode *
Node,
591 template <
class NodeT,
class IteratorT>
592 std::vector<uint64_t>
597 ContextNode *addAllocNode(CallInfo
Call,
const FuncTy *
F);
600 template <
class NodeT,
class IteratorT>
601 void addStackNodesForMIB(
605 std::map<uint64_t, uint32_t> &TotalSizeToContextIdTopNCold);
610 void updateStackNodes();
619 void fixupImportantContexts();
623 void handleCallsitesWithMultipleTargets();
626 void markBackedges();
636 bool partitionCallsByCallee(
638 std::vector<std::pair<CallInfo, ContextNode *>> &NewCallToNode);
645 std::map<const ContextNode *, const FuncTy *> NodeToCallingFunc;
652 using EdgeIter =
typename std::vector<std::shared_ptr<ContextEdge>>
::iterator;
657 struct CallContextInfo {
661 std::vector<uint64_t> StackIds;
675 void removeEdgeFromGraph(ContextEdge *Edge, EdgeIter *EI =
nullptr,
676 bool CalleeIter =
true);
684 void assignStackNodesPostOrder(
698 void propagateDuplicateContextIds(
704 void connectNewNode(ContextNode *NewNode, ContextNode *OrigNode,
712 return static_cast<const DerivedCCG *
>(
this)->getStackId(IdOrIndex);
722 calleesMatch(CallTy
Call, EdgeIter &EI,
727 const FuncTy *getCalleeFunc(CallTy
Call) {
728 return static_cast<DerivedCCG *
>(
this)->getCalleeFunc(
Call);
734 bool calleeMatchesFunc(
735 CallTy
Call,
const FuncTy *Func,
const FuncTy *CallerFunc,
736 std::vector<std::pair<CallTy, FuncTy *>> &FoundCalleeChain) {
737 return static_cast<DerivedCCG *
>(
this)->calleeMatchesFunc(
738 Call, Func, CallerFunc, FoundCalleeChain);
742 bool sameCallee(CallTy Call1, CallTy Call2) {
743 return static_cast<DerivedCCG *
>(
this)->sameCallee(Call1, Call2);
748 std::vector<uint64_t> getStackIdsWithContextNodesForCall(CallTy
Call) {
749 return static_cast<DerivedCCG *
>(
this)->getStackIdsWithContextNodesForCall(
755 return static_cast<DerivedCCG *
>(
this)->getLastStackId(
Call);
761 static_cast<DerivedCCG *
>(
this)->updateAllocationCall(
Call,
AllocType);
766 return static_cast<const DerivedCCG *
>(
this)->getAllocationCallType(
Call);
771 void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc) {
772 static_cast<DerivedCCG *
>(
this)->updateCall(CallerCall, CalleeFunc);
778 FuncInfo cloneFunctionForCallsite(
780 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
781 return static_cast<DerivedCCG *
>(
this)->cloneFunctionForCallsite(
782 Func,
Call, CallMap, CallsWithMetadataInFunc, CloneNo);
787 std::string getLabel(
const FuncTy *Func,
const CallTy
Call,
788 unsigned CloneNo)
const {
789 return static_cast<const DerivedCCG *
>(
this)->getLabel(Func,
Call, CloneNo);
793 ContextNode *createNewNode(
bool IsAllocation,
const FuncTy *
F =
nullptr,
794 CallInfo
C = CallInfo()) {
795 NodeOwner.push_back(std::make_unique<ContextNode>(IsAllocation,
C));
796 auto *NewNode = NodeOwner.back().get();
798 NodeToCallingFunc[NewNode] =
F;
799 NewNode->NodeId = NodeOwner.size();
804 ContextNode *getNodeForInst(
const CallInfo &
C);
805 ContextNode *getNodeForAlloc(
const CallInfo &
C);
806 ContextNode *getNodeForStackId(
uint64_t StackId);
828 moveEdgeToNewCalleeClone(
const std::shared_ptr<ContextEdge> &Edge,
835 void moveEdgeToExistingCalleeClone(
const std::shared_ptr<ContextEdge> &Edge,
836 ContextNode *NewCallee,
837 bool NewClone =
false,
845 void moveCalleeEdgeToNewCaller(
const std::shared_ptr<ContextEdge> &Edge,
846 ContextNode *NewCaller);
857 void mergeNodeCalleeClones(
862 void findOtherCallersToShareMerge(
863 ContextNode *
Node, std::vector<std::shared_ptr<ContextEdge>> &CalleeEdges,
891 struct ImportantContextInfo {
893 std::vector<uint64_t> StackIds;
896 unsigned MaxLength = 0;
900 std::map<std::vector<uint64_t>, ContextNode *> StackIdsToNode;
909 void recordStackNode(std::vector<uint64_t> &StackIds, ContextNode *
Node,
923 auto Size = StackIds.size();
924 for (
auto Id : Ids) {
925 auto &Entry = ImportantContextIdInfo[Id];
926 Entry.StackIdsToNode[StackIds] =
Node;
928 if (
Size > Entry.MaxLength)
929 Entry.MaxLength =
Size;
938 std::vector<std::unique_ptr<ContextNode>> NodeOwner;
944 unsigned int LastContextId = 0;
947template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
949 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode;
950template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
952 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge;
953template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
955 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::FuncInfo;
956template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
958 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::CallInfo;
961class ModuleCallsiteContextGraph
962 :
public CallsiteContextGraph<ModuleCallsiteContextGraph, Function,
965 ModuleCallsiteContextGraph(
967 llvm::function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter);
970 friend CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
973 uint64_t getStackId(uint64_t IdOrIndex)
const;
975 bool calleeMatchesFunc(
976 Instruction *
Call,
const Function *Func,
const Function *CallerFunc,
977 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain);
978 bool sameCallee(Instruction *Call1, Instruction *Call2);
979 bool findProfiledCalleeThroughTailCalls(
980 const Function *ProfiledCallee,
Value *CurCallee,
unsigned Depth,
981 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain,
982 bool &FoundMultipleCalleeChains);
983 uint64_t getLastStackId(Instruction *
Call);
984 std::vector<uint64_t> getStackIdsWithContextNodesForCall(Instruction *
Call);
987 void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc);
988 CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
990 cloneFunctionForCallsite(FuncInfo &Func, CallInfo &
Call,
991 DenseMap<CallInfo, CallInfo> &CallMap,
992 std::vector<CallInfo> &CallsWithMetadataInFunc,
994 std::string getLabel(
const Function *Func,
const Instruction *
Call,
995 unsigned CloneNo)
const;
998 llvm::function_ref<OptimizationRemarkEmitter &(
Function *)> OREGetter;
1004struct IndexCall :
public PointerUnion<CallsiteInfo *, AllocInfo *> {
1005 IndexCall() : PointerUnion() {}
1006 IndexCall(std::nullptr_t) : IndexCall() {}
1007 IndexCall(CallsiteInfo *StackNode) : PointerUnion(StackNode) {}
1008 IndexCall(AllocInfo *AllocNode) : PointerUnion(AllocNode) {}
1009 IndexCall(PointerUnion PT) : PointerUnion(PT) {}
1011 IndexCall *operator->() {
return this; }
1013 void print(raw_ostream &OS)
const {
1014 PointerUnion<CallsiteInfo *, AllocInfo *>
Base = *
this;
1039class IndexCallsiteContextGraph
1040 :
public CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
1043 IndexCallsiteContextGraph(
1044 ModuleSummaryIndex &Index,
1048 ~IndexCallsiteContextGraph() {
1053 for (
auto &
I : FunctionCalleesToSynthesizedCallsiteInfos) {
1055 for (
auto &Callsite :
I.second)
1056 FS->addCallsite(*Callsite.second);
1061 friend CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
1064 uint64_t getStackId(uint64_t IdOrIndex)
const;
1065 const FunctionSummary *getCalleeFunc(IndexCall &
Call);
1066 bool calleeMatchesFunc(
1067 IndexCall &
Call,
const FunctionSummary *Func,
1068 const FunctionSummary *CallerFunc,
1069 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain);
1070 bool sameCallee(IndexCall &Call1, IndexCall &Call2);
1071 bool findProfiledCalleeThroughTailCalls(
1072 ValueInfo ProfiledCallee, ValueInfo CurCallee,
unsigned Depth,
1073 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain,
1074 bool &FoundMultipleCalleeChains);
1075 uint64_t getLastStackId(IndexCall &
Call);
1076 std::vector<uint64_t> getStackIdsWithContextNodesForCall(IndexCall &
Call);
1079 void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc);
1080 CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
1081 IndexCall>::FuncInfo
1082 cloneFunctionForCallsite(FuncInfo &Func, CallInfo &
Call,
1083 DenseMap<CallInfo, CallInfo> &CallMap,
1084 std::vector<CallInfo> &CallsWithMetadataInFunc,
1086 std::string getLabel(
const FunctionSummary *Func,
const IndexCall &
Call,
1087 unsigned CloneNo)
const;
1088 DenseSet<GlobalValue::GUID> findAliaseeGUIDsPrevailingInDifferentModule();
1092 std::map<const FunctionSummary *, ValueInfo> FSToVIMap;
1094 const ModuleSummaryIndex &Index;
1102 std::unordered_map<FunctionSummary *,
1103 std::map<ValueInfo, std::unique_ptr<CallsiteInfo>>>
1104 FunctionCalleesToSynthesizedCallsiteInfos;
1115 :
public DenseMapInfo<std::pair<IndexCall, unsigned>> {};
1118 :
public DenseMapInfo<PointerUnion<CallsiteInfo *, AllocInfo *>> {};
1139template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1140bool allocTypesMatch(
1141 const std::vector<uint8_t> &InAllocTypes,
1142 const std::vector<std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>>
1146 assert(InAllocTypes.size() == Edges.size());
1148 InAllocTypes.begin(), InAllocTypes.end(), Edges.begin(), Edges.end(),
1150 const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &r) {
1154 if (l == (uint8_t)AllocationType::None ||
1155 r->AllocTypes == (uint8_t)AllocationType::None)
1157 return allocTypeToUse(l) == allocTypeToUse(r->AllocTypes);
1166template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1167bool allocTypesMatchClone(
1168 const std::vector<uint8_t> &InAllocTypes,
1169 const ContextNode<DerivedCCG, FuncTy, CallTy> *Clone) {
1170 const ContextNode<DerivedCCG, FuncTy, CallTy> *
Node = Clone->CloneOf;
1174 assert(InAllocTypes.size() ==
Node->CalleeEdges.size());
1178 for (
const auto &
E : Clone->CalleeEdges) {
1180 EdgeCalleeMap[
E->Callee] =
E->AllocTypes;
1184 for (
unsigned I = 0;
I <
Node->CalleeEdges.size();
I++) {
1185 auto Iter = EdgeCalleeMap.
find(
Node->CalleeEdges[
I]->Callee);
1187 if (Iter == EdgeCalleeMap.
end())
1195 if (allocTypeToUse(Iter->second) != allocTypeToUse(InAllocTypes[
I]))
1203template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1204typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1205CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForInst(
1206 const CallInfo &
C) {
1207 ContextNode *
Node = getNodeForAlloc(
C);
1211 return NonAllocationCallToContextNodeMap.lookup(
C);
1214template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1215typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1216CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForAlloc(
1217 const CallInfo &
C) {
1218 return AllocationCallToContextNodeMap.lookup(
C);
1221template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1222typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1223CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForStackId(
1225 auto StackEntryNode = StackEntryIdToContextNodeMap.find(StackId);
1226 if (StackEntryNode != StackEntryIdToContextNodeMap.end())
1227 return StackEntryNode->second;
1231template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1232void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1234 unsigned int ContextId) {
1235 for (
auto &
Edge : CallerEdges) {
1236 if (
Edge->Caller == Caller) {
1238 Edge->getContextIds().insert(ContextId);
1242 std::shared_ptr<ContextEdge>
Edge = std::make_shared<ContextEdge>(
1243 this, Caller, (uint8_t)
AllocType, DenseSet<uint32_t>({ContextId}));
1244 CallerEdges.push_back(
Edge);
1248template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1249void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::removeEdgeFromGraph(
1250 ContextEdge *
Edge, EdgeIter *EI,
bool CalleeIter) {
1266 auto CalleeCallerCount =
Callee->CallerEdges.size();
1267 auto CallerCalleeCount =
Caller->CalleeEdges.size();
1272 }
else if (CalleeIter) {
1274 *EI =
Caller->CalleeEdges.erase(*EI);
1277 *EI =
Callee->CallerEdges.erase(*EI);
1279 assert(
Callee->CallerEdges.size() < CalleeCallerCount);
1280 assert(
Caller->CalleeEdges.size() < CallerCalleeCount);
1283template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1284void CallsiteContextGraph<
1285 DerivedCCG, FuncTy, CallTy>::removeNoneTypeCalleeEdges(ContextNode *Node) {
1286 for (
auto EI =
Node->CalleeEdges.begin(); EI !=
Node->CalleeEdges.end();) {
1288 if (
Edge->AllocTypes == (uint8_t)AllocationType::None) {
1290 removeEdgeFromGraph(
Edge.get(), &EI,
true);
1296template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1297void CallsiteContextGraph<
1298 DerivedCCG, FuncTy, CallTy>::removeNoneTypeCallerEdges(ContextNode *Node) {
1299 for (
auto EI =
Node->CallerEdges.begin(); EI !=
Node->CallerEdges.end();) {
1301 if (
Edge->AllocTypes == (uint8_t)AllocationType::None) {
1303 Edge->Caller->eraseCalleeEdge(
Edge.get());
1304 EI =
Node->CallerEdges.erase(EI);
1310template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1311typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge *
1312CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1313 findEdgeFromCallee(
const ContextNode *Callee) {
1314 for (
const auto &
Edge : CalleeEdges)
1315 if (
Edge->Callee == Callee)
1320template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1321typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge *
1322CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1323 findEdgeFromCaller(
const ContextNode *Caller) {
1324 for (
const auto &
Edge : CallerEdges)
1325 if (
Edge->Caller == Caller)
1330template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1331void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1332 eraseCalleeEdge(
const ContextEdge *
Edge) {
1334 CalleeEdges, [
Edge](
const std::shared_ptr<ContextEdge> &CalleeEdge) {
1335 return CalleeEdge.get() ==
Edge;
1337 assert(EI != CalleeEdges.end());
1338 CalleeEdges.erase(EI);
1341template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1342void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1343 eraseCallerEdge(
const ContextEdge *
Edge) {
1345 CallerEdges, [
Edge](
const std::shared_ptr<ContextEdge> &CallerEdge) {
1346 return CallerEdge.get() ==
Edge;
1348 assert(EI != CallerEdges.end());
1349 CallerEdges.erase(EI);
1352template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1353uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::computeAllocType(
1354 DenseSet<uint32_t> &ContextIds)
const {
1356 (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
1357 uint8_t
AllocType = (uint8_t)AllocationType::None;
1358 for (
auto Id : ContextIds) {
1359 AllocType |= (uint8_t)ContextIdToAllocationType.at(Id);
1367template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1369CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypesImpl(
1370 const DenseSet<uint32_t> &Node1Ids,
1371 const DenseSet<uint32_t> &Node2Ids)
const {
1373 (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
1374 uint8_t
AllocType = (uint8_t)AllocationType::None;
1375 for (
auto Id : Node1Ids) {
1376 if (!Node2Ids.
count(Id))
1378 AllocType |= (uint8_t)ContextIdToAllocationType.at(Id);
1386template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1387uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypes(
1388 const DenseSet<uint32_t> &Node1Ids,
1389 const DenseSet<uint32_t> &Node2Ids)
const {
1390 if (Node1Ids.
size() < Node2Ids.
size())
1391 return intersectAllocTypesImpl(Node1Ids, Node2Ids);
1393 return intersectAllocTypesImpl(Node2Ids, Node1Ids);
1396template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1397typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1398CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addAllocNode(
1399 CallInfo
Call,
const FuncTy *
F) {
1401 ContextNode *AllocNode = createNewNode(
true,
F,
Call);
1402 AllocationCallToContextNodeMap[
Call] = AllocNode;
1404 AllocNode->OrigStackOrAllocId = LastContextId;
1407 AllocNode->AllocTypes = (uint8_t)AllocationType::None;
1423template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1424template <
class NodeT,
class IteratorT>
1425void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addStackNodesForMIB(
1426 ContextNode *AllocNode, CallStack<NodeT, IteratorT> &StackContext,
1429 std::map<uint64_t, uint32_t> &TotalSizeToContextIdTopNCold) {
1435 ContextIdToAllocationType[++LastContextId] =
AllocType;
1437 bool IsImportant =
false;
1438 if (!ContextSizeInfo.
empty()) {
1439 auto &
Entry = ContextIdToContextSizeInfos[LastContextId];
1443 uint64_t TotalCold = 0;
1444 for (
auto &CSI : ContextSizeInfo)
1445 TotalCold += CSI.TotalSize;
1451 TotalCold > TotalSizeToContextIdTopNCold.begin()->first) {
1454 auto IdToRemove = TotalSizeToContextIdTopNCold.begin()->second;
1455 TotalSizeToContextIdTopNCold.erase(
1456 TotalSizeToContextIdTopNCold.begin());
1457 assert(ImportantContextIdInfo.count(IdToRemove));
1458 ImportantContextIdInfo.erase(IdToRemove);
1460 TotalSizeToContextIdTopNCold[TotalCold] = LastContextId;
1464 Entry.insert(
Entry.begin(), ContextSizeInfo.begin(), ContextSizeInfo.end());
1468 AllocNode->AllocTypes |= (uint8_t)
AllocType;
1473 ContextNode *PrevNode = AllocNode;
1477 SmallSet<uint64_t, 8> StackIdSet;
1480 ContextIter != StackContext.
end(); ++ContextIter) {
1481 auto StackId = getStackId(*ContextIter);
1483 ImportantContextIdInfo[LastContextId].StackIds.push_back(StackId);
1484 ContextNode *StackNode = getNodeForStackId(StackId);
1486 StackNode = createNewNode(
false);
1487 StackEntryIdToContextNodeMap[StackId] = StackNode;
1488 StackNode->OrigStackOrAllocId = StackId;
1493 auto Ins = StackIdSet.
insert(StackId);
1495 StackNode->Recursive =
true;
1497 StackNode->AllocTypes |= (uint8_t)
AllocType;
1498 PrevNode->addOrUpdateCallerEdge(StackNode,
AllocType, LastContextId);
1499 PrevNode = StackNode;
1503template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1505CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::duplicateContextIds(
1506 const DenseSet<uint32_t> &StackSequenceContextIds,
1507 DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds) {
1508 DenseSet<uint32_t> NewContextIds;
1509 for (
auto OldId : StackSequenceContextIds) {
1510 NewContextIds.
insert(++LastContextId);
1511 OldToNewContextIds[OldId].insert(LastContextId);
1512 assert(ContextIdToAllocationType.count(OldId));
1514 ContextIdToAllocationType[LastContextId] = ContextIdToAllocationType[OldId];
1515 auto CSI = ContextIdToContextSizeInfos.find(OldId);
1516 if (CSI != ContextIdToContextSizeInfos.end())
1517 ContextIdToContextSizeInfos[LastContextId] = CSI->second;
1518 if (DotAllocContextIds.
contains(OldId))
1519 DotAllocContextIds.
insert(LastContextId);
1521 return NewContextIds;
1524template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1525void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
1526 propagateDuplicateContextIds(
1527 const DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds) {
1529 auto GetNewIds = [&OldToNewContextIds](
const DenseSet<uint32_t> &ContextIds) {
1530 DenseSet<uint32_t> NewIds;
1531 for (
auto Id : ContextIds)
1532 if (
auto NewId = OldToNewContextIds.find(Id);
1533 NewId != OldToNewContextIds.end())
1539 auto UpdateCallers = [&](ContextNode *
Node,
1540 DenseSet<const ContextEdge *> &Visited,
1541 auto &&UpdateCallers) ->
void {
1542 for (
const auto &
Edge :
Node->CallerEdges) {
1546 ContextNode *NextNode =
Edge->Caller;
1547 DenseSet<uint32_t> NewIdsToAdd = GetNewIds(
Edge->getContextIds());
1550 if (!NewIdsToAdd.
empty()) {
1551 Edge->getContextIds().insert_range(NewIdsToAdd);
1552 UpdateCallers(NextNode, Visited, UpdateCallers);
1557 DenseSet<const ContextEdge *> Visited;
1558 for (
auto &Entry : AllocationCallToContextNodeMap) {
1560 UpdateCallers(Node, Visited, UpdateCallers);
1564template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1565void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::connectNewNode(
1566 ContextNode *NewNode, ContextNode *OrigNode,
bool TowardsCallee,
1569 DenseSet<uint32_t> RemainingContextIds) {
1571 TowardsCallee ? OrigNode->CalleeEdges : OrigNode->CallerEdges;
1572 DenseSet<uint32_t> RecursiveContextIds;
1573 DenseSet<uint32_t> AllCallerContextIds;
1578 for (
auto &CE : OrigEdges) {
1579 AllCallerContextIds.
reserve(
CE->getContextIds().size());
1580 for (
auto Id :
CE->getContextIds())
1581 if (!AllCallerContextIds.
insert(Id).second)
1582 RecursiveContextIds.
insert(Id);
1586 for (
auto EI = OrigEdges.begin(); EI != OrigEdges.end();) {
1588 DenseSet<uint32_t> NewEdgeContextIds;
1589 DenseSet<uint32_t> NotFoundContextIds;
1593 set_subtract(
Edge->getContextIds(), RemainingContextIds, NewEdgeContextIds,
1594 NotFoundContextIds);
1597 if (RecursiveContextIds.
empty()) {
1600 RemainingContextIds.
swap(NotFoundContextIds);
1610 DenseSet<uint32_t> NonRecursiveRemainingCurEdgeIds =
1612 set_subtract(RemainingContextIds, NonRecursiveRemainingCurEdgeIds);
1615 if (NewEdgeContextIds.
empty()) {
1619 if (TowardsCallee) {
1620 uint8_t NewAllocType = computeAllocType(NewEdgeContextIds);
1621 auto NewEdge = std::make_shared<ContextEdge>(
1622 Edge->Callee, NewNode, NewAllocType, std::move(NewEdgeContextIds));
1623 NewNode->CalleeEdges.push_back(NewEdge);
1624 NewEdge->Callee->CallerEdges.push_back(NewEdge);
1626 uint8_t NewAllocType = computeAllocType(NewEdgeContextIds);
1627 auto NewEdge = std::make_shared<ContextEdge>(
1628 NewNode,
Edge->Caller, NewAllocType, std::move(NewEdgeContextIds));
1629 NewNode->CallerEdges.push_back(NewEdge);
1630 NewEdge->Caller->CalleeEdges.push_back(NewEdge);
1633 if (
Edge->getContextIds().empty()) {
1634 removeEdgeFromGraph(
Edge.get(), &EI, TowardsCallee);
1641template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1643 const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &Edge) {
1647 assert(!Edge->ContextIds.empty());
1650template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1652 bool CheckEdges =
true) {
1653 if (
Node->isRemoved())
1657 auto NodeContextIds =
Node->getContextIds();
1661 if (
Node->CallerEdges.size()) {
1663 Node->CallerEdges.front()->ContextIds);
1667 set_union(CallerEdgeContextIds, Edge->ContextIds);
1674 NodeContextIds == CallerEdgeContextIds ||
1677 if (
Node->CalleeEdges.size()) {
1679 Node->CalleeEdges.front()->ContextIds);
1683 set_union(CalleeEdgeContextIds, Edge->getContextIds());
1689 NodeContextIds == CalleeEdgeContextIds);
1698 for (
const auto &
E :
Node->CalleeEdges)
1704template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1705void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
1706 assignStackNodesPostOrder(ContextNode *Node,
1707 DenseSet<const ContextNode *> &Visited,
1708 DenseMap<uint64_t, std::vector<CallContextInfo>>
1709 &StackIdToMatchingCalls,
1710 DenseMap<CallInfo, CallInfo> &CallToMatchingCall,
1711 const DenseSet<uint32_t> &ImportantContextIds) {
1719 auto CallerEdges =
Node->CallerEdges;
1720 for (
auto &
Edge : CallerEdges) {
1722 if (
Edge->isRemoved()) {
1726 assignStackNodesPostOrder(
Edge->Caller, Visited, StackIdToMatchingCalls,
1727 CallToMatchingCall, ImportantContextIds);
1736 if (
Node->IsAllocation ||
1737 !StackIdToMatchingCalls.count(
Node->OrigStackOrAllocId))
1740 auto &Calls = StackIdToMatchingCalls[
Node->OrigStackOrAllocId];
1744 if (Calls.size() == 1) {
1745 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[0];
1746 if (Ids.size() == 1) {
1747 assert(SavedContextIds.empty());
1749 assert(Node == getNodeForStackId(Ids[0]));
1750 if (
Node->Recursive)
1753 NonAllocationCallToContextNodeMap[
Call] =
Node;
1755 recordStackNode(Ids, Node,
Node->getContextIds(), ImportantContextIds);
1763 uint64_t LastId =
Node->OrigStackOrAllocId;
1764 ContextNode *LastNode = getNodeForStackId(LastId);
1767 assert(LastNode == Node);
1769 ContextNode *LastNode =
Node;
1774 DenseSet<uint32_t> LastNodeContextIds = LastNode->getContextIds();
1776 [[maybe_unused]]
bool PrevIterCreatedNode =
false;
1777 bool CreatedNode =
false;
1778 for (
unsigned I = 0;
I < Calls.size();
1779 I++, PrevIterCreatedNode = CreatedNode) {
1780 CreatedNode =
false;
1781 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[
I];
1784 if (SavedContextIds.empty()) {
1791 auto MatchingCall = CallToMatchingCall[
Call];
1792 if (!NonAllocationCallToContextNodeMap.contains(MatchingCall)) {
1796 assert(
I > 0 && !PrevIterCreatedNode);
1799 NonAllocationCallToContextNodeMap[MatchingCall]->MatchingCalls.push_back(
1804 assert(LastId == Ids.back());
1813 ContextNode *PrevNode = LastNode;
1817 for (
auto IdIter = Ids.rbegin() + 1; IdIter != Ids.rend(); IdIter++) {
1819 ContextNode *CurNode = getNodeForStackId(Id);
1823 assert(!CurNode->Recursive);
1825 auto *
Edge = CurNode->findEdgeFromCaller(PrevNode);
1837 if (SavedContextIds.empty()) {
1846 ContextNode *NewNode = createNewNode(
false, Func,
Call);
1847 NonAllocationCallToContextNodeMap[
Call] = NewNode;
1849 NewNode->AllocTypes = computeAllocType(SavedContextIds);
1851 ContextNode *FirstNode = getNodeForStackId(Ids[0]);
1857 connectNewNode(NewNode, FirstNode,
true, SavedContextIds);
1862 connectNewNode(NewNode, LastNode,
false, SavedContextIds);
1867 for (
auto Id : Ids) {
1868 ContextNode *CurNode = getNodeForStackId(Id);
1875 auto *PrevEdge = CurNode->findEdgeFromCallee(PrevNode);
1882 set_subtract(PrevEdge->getContextIds(), SavedContextIds);
1883 if (PrevEdge->getContextIds().empty())
1884 removeEdgeFromGraph(PrevEdge);
1889 CurNode->AllocTypes = CurNode->CalleeEdges.empty()
1890 ? (uint8_t)AllocationType::None
1891 : CurNode->computeAllocType();
1895 recordStackNode(Ids, NewNode, SavedContextIds, ImportantContextIds);
1899 for (
auto Id : Ids) {
1900 ContextNode *CurNode = getNodeForStackId(Id);
1909template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1910void CallsiteContextGraph<DerivedCCG, FuncTy,
1911 CallTy>::fixupImportantContexts() {
1912 if (ImportantContextIdInfo.empty())
1916 NumImportantContextIds = ImportantContextIdInfo.size();
1922 exportToDot(
"beforestackfixup");
1947 for (
auto &[CurContextId,
Info] : ImportantContextIdInfo) {
1948 if (
Info.StackIdsToNode.empty())
1951 ContextNode *PrevNode =
nullptr;
1952 ContextNode *CurNode =
nullptr;
1953 DenseSet<const ContextEdge *> VisitedEdges;
1954 ArrayRef<uint64_t> AllStackIds(
Info.StackIds);
1957 for (
unsigned I = 0;
I < AllStackIds.size();
I++, PrevNode = CurNode) {
1961 auto LenToEnd = AllStackIds.size() -
I;
1969 auto CheckStackIds = AllStackIds.slice(
I, Len);
1970 auto EntryIt =
Info.StackIdsToNode.find(CheckStackIds);
1971 if (EntryIt ==
Info.StackIdsToNode.end())
1973 CurNode = EntryIt->second;
1990 auto *CurEdge = PrevNode->findEdgeFromCaller(CurNode);
1993 if (CurEdge->getContextIds().insert(CurContextId).second) {
1994 NumFixupEdgeIdsInserted++;
1999 NumFixupEdgesAdded++;
2000 DenseSet<uint32_t> ContextIds({CurContextId});
2001 auto AllocType = computeAllocType(ContextIds);
2002 auto NewEdge = std::make_shared<ContextEdge>(
2003 PrevNode, CurNode,
AllocType, std::move(ContextIds));
2004 PrevNode->CallerEdges.push_back(NewEdge);
2005 CurNode->CalleeEdges.push_back(NewEdge);
2007 CurEdge = NewEdge.get();
2010 VisitedEdges.
insert(CurEdge);
2013 for (
auto &
Edge : PrevNode->CallerEdges) {
2017 Edge->getContextIds().erase(CurContextId);
2025template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2026void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
2034 DenseMap<uint64_t, std::vector<CallContextInfo>> StackIdToMatchingCalls;
2035 for (
auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) {
2036 for (
auto &
Call : CallsWithMetadata) {
2038 if (AllocationCallToContextNodeMap.count(
Call))
2040 auto StackIdsWithContextNodes =
2041 getStackIdsWithContextNodesForCall(
Call.call());
2044 if (StackIdsWithContextNodes.empty())
2048 StackIdToMatchingCalls[StackIdsWithContextNodes.back()].push_back(
2049 {
Call.call(), StackIdsWithContextNodes,
Func, {}});
2059 DenseMap<uint32_t, DenseSet<uint32_t>> OldToNewContextIds;
2063 DenseMap<CallInfo, CallInfo> CallToMatchingCall;
2064 for (
auto &It : StackIdToMatchingCalls) {
2065 auto &Calls = It.getSecond();
2067 if (Calls.size() == 1) {
2068 auto &Ids = Calls[0].StackIds;
2069 if (Ids.size() == 1)
2082 DenseMap<const FuncTy *, unsigned> FuncToIndex;
2083 for (
const auto &[Idx, CallCtxInfo] :
enumerate(Calls))
2084 FuncToIndex.
insert({CallCtxInfo.Func, Idx});
2087 [&FuncToIndex](
const CallContextInfo &
A,
const CallContextInfo &
B) {
2088 return A.StackIds.size() >
B.StackIds.size() ||
2089 (
A.StackIds.size() ==
B.StackIds.size() &&
2090 (
A.StackIds <
B.StackIds ||
2091 (
A.StackIds ==
B.StackIds &&
2092 FuncToIndex[
A.Func] < FuncToIndex[
B.Func])));
2098 uint64_t LastId = It.getFirst();
2099 ContextNode *LastNode = getNodeForStackId(LastId);
2103 if (LastNode->Recursive)
2108 DenseSet<uint32_t> LastNodeContextIds = LastNode->getContextIds();
2116 DenseSet<const FuncTy *> MatchingIdsFuncSet;
2119 for (
unsigned I = 0;
I < Calls.size();
I++) {
2120 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[
I];
2121 assert(SavedContextIds.empty());
2122 assert(LastId == Ids.back());
2127 if (
I > 0 && Ids != Calls[
I - 1].StackIds)
2128 MatchingIdsFuncSet.
clear();
2135 DenseSet<uint32_t> StackSequenceContextIds = LastNodeContextIds;
2137 ContextNode *PrevNode = LastNode;
2138 ContextNode *CurNode = LastNode;
2143 for (
auto IdIter = Ids.rbegin() + 1; IdIter != Ids.rend(); IdIter++) {
2145 CurNode = getNodeForStackId(Id);
2149 if (CurNode->Recursive) {
2154 auto *
Edge = CurNode->findEdgeFromCaller(PrevNode);
2175 if (StackSequenceContextIds.
empty()) {
2188 if (Ids.back() != getLastStackId(
Call)) {
2189 for (
const auto &PE : LastNode->CallerEdges) {
2190 set_subtract(StackSequenceContextIds, PE->getContextIds());
2191 if (StackSequenceContextIds.
empty())
2195 if (StackSequenceContextIds.
empty())
2207 MatchingIdsFuncSet.
insert(Func);
2214 bool DuplicateContextIds =
false;
2215 for (
unsigned J =
I + 1; J < Calls.size(); J++) {
2216 auto &CallCtxInfo = Calls[J];
2217 auto &NextIds = CallCtxInfo.StackIds;
2220 auto *NextFunc = CallCtxInfo.Func;
2221 if (NextFunc != Func) {
2224 DuplicateContextIds =
true;
2227 auto &NextCall = CallCtxInfo.Call;
2228 CallToMatchingCall[NextCall] =
Call;
2239 OldToNewContextIds.
reserve(OldToNewContextIds.
size() +
2240 StackSequenceContextIds.
size());
2243 ? duplicateContextIds(StackSequenceContextIds, OldToNewContextIds)
2244 : StackSequenceContextIds;
2245 assert(!SavedContextIds.empty());
2247 if (!DuplicateContextIds) {
2251 set_subtract(LastNodeContextIds, StackSequenceContextIds);
2252 if (LastNodeContextIds.
empty())
2259 propagateDuplicateContextIds(OldToNewContextIds);
2269 DenseSet<const ContextNode *> Visited;
2271 ImportantContextIdInfo.keys());
2272 for (
auto &Entry : AllocationCallToContextNodeMap)
2273 assignStackNodesPostOrder(
Entry.second, Visited, StackIdToMatchingCalls,
2274 CallToMatchingCall, ImportantContextIds);
2276 fixupImportantContexts();
2282uint64_t ModuleCallsiteContextGraph::getLastStackId(Instruction *
Call) {
2283 CallStack<MDNode, MDNode::op_iterator> CallsiteContext(
2285 return CallsiteContext.
back();
2288uint64_t IndexCallsiteContextGraph::getLastStackId(IndexCall &
Call) {
2290 CallStack<CallsiteInfo, SmallVector<unsigned>::const_iterator>
2293 return Index.getStackIdAtIndex(CallsiteContext.
back());
2315 auto Pos =
F.getName().find_last_of(
'.');
2318 bool Err =
F.getName().drop_front(Pos + 1).getAsInteger(10, CloneNo);
2324std::string ModuleCallsiteContextGraph::getLabel(
const Function *Func,
2325 const Instruction *
Call,
2326 unsigned CloneNo)
const {
2332std::string IndexCallsiteContextGraph::getLabel(
const FunctionSummary *Func,
2333 const IndexCall &
Call,
2334 unsigned CloneNo)
const {
2335 auto VI = FSToVIMap.find(Func);
2336 assert(VI != FSToVIMap.end());
2339 return CallerName +
" -> alloc";
2342 return CallerName +
" -> " +
2344 Callsite->Clones[CloneNo]);
2348std::vector<uint64_t>
2349ModuleCallsiteContextGraph::getStackIdsWithContextNodesForCall(
2350 Instruction *
Call) {
2351 CallStack<MDNode, MDNode::op_iterator> CallsiteContext(
2353 return getStackIdsWithContextNodes<MDNode, MDNode::op_iterator>(
2357std::vector<uint64_t>
2358IndexCallsiteContextGraph::getStackIdsWithContextNodesForCall(IndexCall &
Call) {
2360 CallStack<CallsiteInfo, SmallVector<unsigned>::const_iterator>
2362 return getStackIdsWithContextNodes<CallsiteInfo,
2363 SmallVector<unsigned>::const_iterator>(
2367template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2368template <
class NodeT,
class IteratorT>
2369std::vector<uint64_t>
2370CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getStackIdsWithContextNodes(
2371 CallStack<NodeT, IteratorT> &CallsiteContext) {
2372 std::vector<uint64_t> StackIds;
2373 for (
auto IdOrIndex : CallsiteContext) {
2374 auto StackId = getStackId(IdOrIndex);
2375 ContextNode *
Node = getNodeForStackId(StackId);
2378 StackIds.push_back(StackId);
2383ModuleCallsiteContextGraph::ModuleCallsiteContextGraph(
2385 llvm::function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter)
2386 :
Mod(
M), OREGetter(OREGetter) {
2390 std::map<uint64_t, uint32_t> TotalSizeToContextIdTopNCold;
2392 std::vector<CallInfo> CallsWithMetadata;
2393 for (
auto &BB :
F) {
2394 for (
auto &
I : BB) {
2397 if (
auto *MemProfMD =
I.getMetadata(LLVMContext::MD_memprof)) {
2398 CallsWithMetadata.push_back(&
I);
2399 auto *AllocNode = addAllocNode(&
I, &
F);
2400 auto *CallsiteMD =
I.getMetadata(LLVMContext::MD_callsite);
2404 for (
auto &MDOp : MemProfMD->operands()) {
2406 std::vector<ContextTotalSize> ContextSizeInfo;
2408 if (MIBMD->getNumOperands() > 2) {
2409 for (
unsigned I = 2;
I < MIBMD->getNumOperands();
I++) {
2410 MDNode *ContextSizePair =
2419 ContextSizeInfo.push_back({FullStackId, TotalSize});
2425 addStackNodesForMIB<MDNode, MDNode::op_iterator>(
2426 AllocNode, StackContext, CallsiteContext,
2428 TotalSizeToContextIdTopNCold);
2433 DotAllocContextIds = AllocNode->getContextIds();
2437 I.setMetadata(LLVMContext::MD_memprof,
nullptr);
2438 I.setMetadata(LLVMContext::MD_callsite,
nullptr);
2441 else if (
I.getMetadata(LLVMContext::MD_callsite)) {
2442 CallsWithMetadata.push_back(&
I);
2446 if (!CallsWithMetadata.empty())
2447 FuncToCallsWithMetadata[&
F] = CallsWithMetadata;
2451 dbgs() <<
"CCG before updating call stack chains:\n";
2456 exportToDot(
"prestackupdate");
2461 exportToDot(
"poststackupdate");
2463 handleCallsitesWithMultipleTargets();
2468 for (
auto &FuncEntry : FuncToCallsWithMetadata)
2469 for (
auto &
Call : FuncEntry.second)
2470 Call.call()->setMetadata(LLVMContext::MD_callsite,
nullptr);
2476IndexCallsiteContextGraph::findAliaseeGUIDsPrevailingInDifferentModule() {
2477 DenseSet<GlobalValue::GUID> AliaseeGUIDs;
2478 for (
auto &
I : Index) {
2480 for (
auto &S :
VI.getSummaryList()) {
2485 auto *AliaseeSummary = &AS->getAliasee();
2493 !isPrevailing(
VI.getGUID(), S.get()))
2498 auto AliaseeGUID = AS->getAliaseeGUID();
2500 if (!isPrevailing(AliaseeGUID, AliaseeSummary))
2501 AliaseeGUIDs.
insert(AliaseeGUID);
2504 AliaseesPrevailingInDiffModuleFromAlias += AliaseeGUIDs.
size();
2505 return AliaseeGUIDs;
2508IndexCallsiteContextGraph::IndexCallsiteContextGraph(
2509 ModuleSummaryIndex &Index,
2519 findAliaseeGUIDsPrevailingInDifferentModule();
2523 std::map<uint64_t, uint32_t> TotalSizeToContextIdTopNCold;
2524 for (
auto &
I : Index) {
2525 auto VI = Index.getValueInfo(
I);
2526 if (GUIDsToSkip.
contains(VI.getGUID()))
2528 for (
auto &S : VI.getSummaryList()) {
2537 !isPrevailing(VI.getGUID(), S.get()))
2542 std::vector<CallInfo> CallsWithMetadata;
2543 if (!
FS->allocs().empty()) {
2544 for (
auto &AN :
FS->mutableAllocs()) {
2549 if (AN.MIBs.empty())
2551 IndexCall AllocCall(&AN);
2552 CallsWithMetadata.push_back(AllocCall);
2553 auto *AllocNode = addAllocNode(AllocCall, FS);
2561 AN.ContextSizeInfos.size() == AN.MIBs.size());
2563 for (
auto &MIB : AN.MIBs) {
2566 std::vector<ContextTotalSize> ContextSizeInfo;
2567 if (!AN.ContextSizeInfos.empty()) {
2568 for (
auto [FullStackId, TotalSize] : AN.ContextSizeInfos[
I])
2569 ContextSizeInfo.push_back({FullStackId, TotalSize});
2571 addStackNodesForMIB<MIBInfo, SmallVector<unsigned>::const_iterator>(
2572 AllocNode, StackContext, EmptyContext, MIB.AllocType,
2573 ContextSizeInfo, TotalSizeToContextIdTopNCold);
2579 DotAllocContextIds = AllocNode->getContextIds();
2585 AN.Versions[0] = (
uint8_t)allocTypeToUse(AllocNode->AllocTypes);
2589 if (!
FS->callsites().empty())
2590 for (
auto &SN :
FS->mutableCallsites()) {
2591 IndexCall StackNodeCall(&SN);
2592 CallsWithMetadata.push_back(StackNodeCall);
2595 if (!CallsWithMetadata.empty())
2596 FuncToCallsWithMetadata[
FS] = CallsWithMetadata;
2598 if (!
FS->allocs().empty() || !
FS->callsites().empty())
2604 dbgs() <<
"CCG before updating call stack chains:\n";
2609 exportToDot(
"prestackupdate");
2614 exportToDot(
"poststackupdate");
2616 handleCallsitesWithMultipleTargets();
2621template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2622void CallsiteContextGraph<DerivedCCG, FuncTy,
2623 CallTy>::handleCallsitesWithMultipleTargets() {
2638 std::vector<std::pair<CallInfo, ContextNode *>> NewCallToNode;
2639 for (
auto &Entry : NonAllocationCallToContextNodeMap) {
2640 auto *
Node = Entry.second;
2649 std::vector<CallInfo> AllCalls;
2650 AllCalls.reserve(
Node->MatchingCalls.size() + 1);
2651 AllCalls.push_back(
Node->Call);
2665 if (partitionCallsByCallee(
Node, AllCalls, NewCallToNode))
2668 auto It = AllCalls.begin();
2670 for (; It != AllCalls.end(); ++It) {
2673 for (
auto EI =
Node->CalleeEdges.begin(); EI !=
Node->CalleeEdges.end();
2676 if (!Edge->Callee->hasCall())
2678 assert(NodeToCallingFunc.count(Edge->Callee));
2680 if (!calleesMatch(
ThisCall.call(), EI, TailCallToContextNodeMap)) {
2689 if (
Node->Call != ThisCall) {
2690 Node->setCall(ThisCall);
2701 Node->MatchingCalls.clear();
2704 if (It == AllCalls.end()) {
2705 RemovedEdgesWithMismatchedCallees++;
2709 Node->setCall(CallInfo());
2714 for (++It; It != AllCalls.end(); ++It) {
2718 Node->MatchingCalls.push_back(ThisCall);
2727 NonAllocationCallToContextNodeMap.remove_if([](
const auto &it) {
2728 return !it.second->hasCall() || it.second->Call != it.first;
2732 for (
auto &[
Call,
Node] : NewCallToNode)
2733 NonAllocationCallToContextNodeMap[
Call] =
Node;
2737 for (
auto &[
Call,
Node] : TailCallToContextNodeMap)
2738 NonAllocationCallToContextNodeMap[
Call] =
Node;
2741template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2742bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::partitionCallsByCallee(
2744 std::vector<std::pair<CallInfo, ContextNode *>> &NewCallToNode) {
2748 struct CallsWithSameCallee {
2749 std::vector<CallInfo> Calls;
2750 ContextNode *
Node =
nullptr;
2756 for (
auto ThisCall : AllCalls) {
2757 auto *
F = getCalleeFunc(
ThisCall.call());
2759 CalleeFuncToCallInfo[
F].Calls.push_back(ThisCall);
2768 for (
const auto &Edge :
Node->CalleeEdges) {
2769 if (!Edge->Callee->hasCall())
2771 const FuncTy *ProfiledCalleeFunc = NodeToCallingFunc[Edge->Callee];
2772 if (CalleeFuncToCallInfo.
contains(ProfiledCalleeFunc))
2773 CalleeNodeToCallInfo[Edge->Callee] =
2774 &CalleeFuncToCallInfo[ProfiledCalleeFunc];
2780 if (CalleeNodeToCallInfo.
empty())
2792 ContextNode *UnmatchedCalleesNode =
nullptr;
2794 bool UsedOrigNode =
false;
2799 auto CalleeEdges =
Node->CalleeEdges;
2800 for (
auto &Edge : CalleeEdges) {
2801 if (!Edge->Callee->hasCall())
2806 ContextNode *CallerNodeToUse =
nullptr;
2810 if (!CalleeNodeToCallInfo.
contains(Edge->Callee)) {
2811 if (!UnmatchedCalleesNode)
2812 UnmatchedCalleesNode =
2813 createNewNode(
false, NodeToCallingFunc[
Node]);
2814 CallerNodeToUse = UnmatchedCalleesNode;
2818 auto *
Info = CalleeNodeToCallInfo[Edge->Callee];
2821 if (!UsedOrigNode) {
2824 Node->MatchingCalls.clear();
2825 UsedOrigNode =
true;
2828 createNewNode(
false, NodeToCallingFunc[
Node]);
2832 Info->Node->setCall(
Info->Calls.front());
2838 NewCallToNode.push_back({
Info->Node->Call,
Info->Node});
2840 CallerNodeToUse =
Info->Node;
2844 if (CallerNodeToUse ==
Node)
2847 moveCalleeEdgeToNewCaller(Edge, CallerNodeToUse);
2854 for (
auto &
I : CalleeNodeToCallInfo)
2855 removeNoneTypeCallerEdges(
I.second->Node);
2856 if (UnmatchedCalleesNode)
2857 removeNoneTypeCallerEdges(UnmatchedCalleesNode);
2858 removeNoneTypeCallerEdges(
Node);
2868uint64_t IndexCallsiteContextGraph::getStackId(uint64_t IdOrIndex)
const {
2871 return Index.getStackIdAtIndex(IdOrIndex);
2874template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2875bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::calleesMatch(
2876 CallTy
Call, EdgeIter &EI,
2877 MapVector<CallInfo, ContextNode *> &TailCallToContextNodeMap) {
2879 const FuncTy *ProfiledCalleeFunc = NodeToCallingFunc[
Edge->Callee];
2880 const FuncTy *CallerFunc = NodeToCallingFunc[
Edge->Caller];
2883 std::vector<std::pair<CallTy, FuncTy *>> FoundCalleeChain;
2884 if (!calleeMatchesFunc(
Call, ProfiledCalleeFunc, CallerFunc,
2889 if (FoundCalleeChain.empty())
2893 auto *CurEdge =
Callee->findEdgeFromCaller(Caller);
2897 CurEdge->ContextIds.insert_range(
Edge->ContextIds);
2898 CurEdge->AllocTypes |=
Edge->AllocTypes;
2903 auto NewEdge = std::make_shared<ContextEdge>(
2904 Callee, Caller,
Edge->AllocTypes,
Edge->ContextIds);
2905 Callee->CallerEdges.push_back(NewEdge);
2906 if (Caller ==
Edge->Caller) {
2910 EI =
Caller->CalleeEdges.insert(EI, NewEdge);
2913 "Iterator position not restored after insert and increment");
2915 Caller->CalleeEdges.push_back(NewEdge);
2920 auto *CurCalleeNode =
Edge->Callee;
2921 for (
auto &[NewCall, Func] : FoundCalleeChain) {
2922 ContextNode *NewNode =
nullptr;
2924 if (TailCallToContextNodeMap.
count(NewCall)) {
2925 NewNode = TailCallToContextNodeMap[NewCall];
2926 NewNode->AllocTypes |=
Edge->AllocTypes;
2928 FuncToCallsWithMetadata[
Func].push_back({NewCall});
2930 NewNode = createNewNode(
false, Func, NewCall);
2931 TailCallToContextNodeMap[NewCall] = NewNode;
2932 NewNode->AllocTypes =
Edge->AllocTypes;
2936 AddEdge(NewNode, CurCalleeNode);
2938 CurCalleeNode = NewNode;
2942 AddEdge(
Edge->Caller, CurCalleeNode);
2950 removeEdgeFromGraph(
Edge.get(), &EI,
true);
2962bool ModuleCallsiteContextGraph::findProfiledCalleeThroughTailCalls(
2963 const Function *ProfiledCallee,
Value *CurCallee,
unsigned Depth,
2964 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain,
2965 bool &FoundMultipleCalleeChains) {
2972 FoundCalleeChain.push_back({Callsite,
F});
2987 bool FoundSingleCalleeChain =
false;
2988 for (
auto &BB : *CalleeFunc) {
2989 for (
auto &
I : BB) {
2991 if (!CB || !CB->isTailCall())
2993 auto *CalledValue = CB->getCalledOperand();
2994 auto *CalledFunction = CB->getCalledFunction();
2995 if (CalledValue && !CalledFunction) {
2996 CalledValue = CalledValue->stripPointerCasts();
3003 assert(!CalledFunction &&
3004 "Expected null called function in callsite for alias");
3007 if (!CalledFunction)
3009 if (CalledFunction == ProfiledCallee) {
3010 if (FoundSingleCalleeChain) {
3011 FoundMultipleCalleeChains =
true;
3014 FoundSingleCalleeChain =
true;
3015 FoundProfiledCalleeCount++;
3016 FoundProfiledCalleeDepth +=
Depth;
3017 if (
Depth > FoundProfiledCalleeMaxDepth)
3018 FoundProfiledCalleeMaxDepth =
Depth;
3019 SaveCallsiteInfo(&
I, CalleeFunc);
3020 }
else if (findProfiledCalleeThroughTailCalls(
3021 ProfiledCallee, CalledFunction,
Depth + 1,
3022 FoundCalleeChain, FoundMultipleCalleeChains)) {
3025 assert(!FoundMultipleCalleeChains);
3026 if (FoundSingleCalleeChain) {
3027 FoundMultipleCalleeChains =
true;
3030 FoundSingleCalleeChain =
true;
3031 SaveCallsiteInfo(&
I, CalleeFunc);
3032 }
else if (FoundMultipleCalleeChains)
3037 return FoundSingleCalleeChain;
3040const Function *ModuleCallsiteContextGraph::getCalleeFunc(Instruction *
Call) {
3042 if (!CB->getCalledOperand() || CB->isIndirectCall())
3044 auto *CalleeVal = CB->getCalledOperand()->stripPointerCasts();
3051bool ModuleCallsiteContextGraph::calleeMatchesFunc(
3052 Instruction *
Call,
const Function *Func,
const Function *CallerFunc,
3053 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain) {
3055 if (!CB->getCalledOperand() || CB->isIndirectCall())
3057 auto *CalleeVal = CB->getCalledOperand()->stripPointerCasts();
3059 if (CalleeFunc == Func)
3062 if (Alias && Alias->getAliasee() == Func)
3073 bool FoundMultipleCalleeChains =
false;
3074 if (!findProfiledCalleeThroughTailCalls(Func, CalleeVal,
Depth,
3076 FoundMultipleCalleeChains)) {
3077 LLVM_DEBUG(
dbgs() <<
"Not found through unique tail call chain: "
3078 <<
Func->getName() <<
" from " << CallerFunc->
getName()
3079 <<
" that actually called " << CalleeVal->getName()
3080 << (FoundMultipleCalleeChains
3081 ?
" (found multiple possible chains)"
3084 if (FoundMultipleCalleeChains)
3085 FoundProfiledCalleeNonUniquelyCount++;
3092bool ModuleCallsiteContextGraph::sameCallee(Instruction *Call1,
3093 Instruction *Call2) {
3095 if (!CB1->getCalledOperand() || CB1->isIndirectCall())
3097 auto *CalleeVal1 = CB1->getCalledOperand()->stripPointerCasts();
3100 if (!CB2->getCalledOperand() || CB2->isIndirectCall())
3102 auto *CalleeVal2 = CB2->getCalledOperand()->stripPointerCasts();
3104 return CalleeFunc1 == CalleeFunc2;
3107bool IndexCallsiteContextGraph::findProfiledCalleeThroughTailCalls(
3108 ValueInfo ProfiledCallee, ValueInfo CurCallee,
unsigned Depth,
3109 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain,
3110 bool &FoundMultipleCalleeChains) {
3116 auto CreateAndSaveCallsiteInfo = [&](ValueInfo
Callee, FunctionSummary *
FS) {
3119 if (!FunctionCalleesToSynthesizedCallsiteInfos.count(FS) ||
3120 !FunctionCalleesToSynthesizedCallsiteInfos[FS].count(Callee))
3123 FunctionCalleesToSynthesizedCallsiteInfos[
FS][
Callee] =
3124 std::make_unique<CallsiteInfo>(Callee, SmallVector<unsigned>());
3125 CallsiteInfo *NewCallsiteInfo =
3126 FunctionCalleesToSynthesizedCallsiteInfos[
FS][
Callee].get();
3127 FoundCalleeChain.push_back({NewCallsiteInfo,
FS});
3134 bool FoundSingleCalleeChain =
false;
3137 !isPrevailing(CurCallee.
getGUID(), S.get()))
3142 auto FSVI = CurCallee;
3145 FSVI = AS->getAliaseeVI();
3146 for (
auto &CallEdge :
FS->calls()) {
3147 if (!CallEdge.second.hasTailCall())
3149 if (CallEdge.first == ProfiledCallee) {
3150 if (FoundSingleCalleeChain) {
3151 FoundMultipleCalleeChains =
true;
3154 FoundSingleCalleeChain =
true;
3155 FoundProfiledCalleeCount++;
3156 FoundProfiledCalleeDepth +=
Depth;
3157 if (
Depth > FoundProfiledCalleeMaxDepth)
3158 FoundProfiledCalleeMaxDepth =
Depth;
3159 CreateAndSaveCallsiteInfo(CallEdge.first, FS);
3161 assert(!FSToVIMap.count(FS) || FSToVIMap[FS] == FSVI);
3162 FSToVIMap[
FS] = FSVI;
3163 }
else if (findProfiledCalleeThroughTailCalls(
3164 ProfiledCallee, CallEdge.first,
Depth + 1,
3165 FoundCalleeChain, FoundMultipleCalleeChains)) {
3168 assert(!FoundMultipleCalleeChains);
3169 if (FoundSingleCalleeChain) {
3170 FoundMultipleCalleeChains =
true;
3173 FoundSingleCalleeChain =
true;
3174 CreateAndSaveCallsiteInfo(CallEdge.first, FS);
3176 assert(!FSToVIMap.count(FS) || FSToVIMap[FS] == FSVI);
3177 FSToVIMap[
FS] = FSVI;
3178 }
else if (FoundMultipleCalleeChains)
3183 return FoundSingleCalleeChain;
3186const FunctionSummary *
3187IndexCallsiteContextGraph::getCalleeFunc(IndexCall &
Call) {
3189 if (
Callee.getSummaryList().empty())
3194bool IndexCallsiteContextGraph::calleeMatchesFunc(
3195 IndexCall &
Call,
const FunctionSummary *Func,
3196 const FunctionSummary *CallerFunc,
3197 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain) {
3201 AliasSummary *Alias =
3202 Callee.getSummaryList().empty()
3205 assert(FSToVIMap.count(Func));
3206 auto FuncVI = FSToVIMap[
Func];
3207 if (Callee == FuncVI ||
3222 bool FoundMultipleCalleeChains =
false;
3223 if (!findProfiledCalleeThroughTailCalls(
3224 FuncVI, Callee,
Depth, FoundCalleeChain, FoundMultipleCalleeChains)) {
3225 LLVM_DEBUG(
dbgs() <<
"Not found through unique tail call chain: " << FuncVI
3226 <<
" from " << FSToVIMap[CallerFunc]
3227 <<
" that actually called " << Callee
3228 << (FoundMultipleCalleeChains
3229 ?
" (found multiple possible chains)"
3232 if (FoundMultipleCalleeChains)
3233 FoundProfiledCalleeNonUniquelyCount++;
3240bool IndexCallsiteContextGraph::sameCallee(IndexCall &Call1, IndexCall &Call2) {
3243 return Callee1 == Callee2;
3246template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3247void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::dump()
3253template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3254void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::print(
3255 raw_ostream &OS)
const {
3256 OS <<
"Node " <<
this <<
"\n";
3260 OS <<
" (recursive)";
3262 if (!MatchingCalls.empty()) {
3263 OS <<
"\tMatchingCalls:\n";
3264 for (
auto &MatchingCall : MatchingCalls) {
3266 MatchingCall.print(OS);
3270 OS <<
"\tNodeId: " <<
NodeId <<
"\n";
3272 OS <<
"\tContextIds:";
3274 auto ContextIds = getContextIds();
3275 std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
3276 std::sort(SortedIds.begin(), SortedIds.end());
3277 for (
auto Id : SortedIds)
3280 OS <<
"\tCalleeEdges:\n";
3281 for (
auto &
Edge : CalleeEdges)
3282 OS <<
"\t\t" << *
Edge <<
" (Callee NodeId: " <<
Edge->Callee->NodeId
3284 OS <<
"\tCallerEdges:\n";
3285 for (
auto &
Edge : CallerEdges)
3286 OS <<
"\t\t" << *
Edge <<
" (Caller NodeId: " <<
Edge->Caller->NodeId
3288 if (!Clones.empty()) {
3291 for (
auto *
C : Clones)
3292 OS <<
LS <<
C <<
" NodeId: " <<
C->NodeId;
3294 }
else if (CloneOf) {
3295 OS <<
"\tClone of " << CloneOf <<
" NodeId: " << CloneOf->NodeId <<
"\n";
3299template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3300void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::dump()
3306template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3307void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::print(
3308 raw_ostream &OS)
const {
3309 OS <<
"Edge from Callee " <<
Callee <<
" to Caller: " <<
Caller
3310 << (IsBackedge ?
" (BE)" :
"")
3312 OS <<
" ContextIds:";
3313 std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
3314 std::sort(SortedIds.begin(), SortedIds.end());
3315 for (
auto Id : SortedIds)
3319template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3320void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::dump()
const {
3324template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3325void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::print(
3326 raw_ostream &OS)
const {
3327 OS <<
"Callsite Context Graph:\n";
3328 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3330 if (
Node->isRemoved())
3337template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3338void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::printTotalSizes(
3339 raw_ostream &OS)
const {
3340 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3342 if (
Node->isRemoved())
3344 if (!
Node->IsAllocation)
3346 DenseSet<uint32_t> ContextIds =
Node->getContextIds();
3347 auto AllocTypeFromCall = getAllocationCallType(
Node->Call);
3348 std::vector<uint32_t> SortedIds(ContextIds.
begin(), ContextIds.
end());
3349 std::sort(SortedIds.begin(), SortedIds.end());
3350 for (
auto Id : SortedIds) {
3351 auto TypeI = ContextIdToAllocationType.find(Id);
3352 assert(TypeI != ContextIdToAllocationType.end());
3353 auto CSI = ContextIdToContextSizeInfos.find(Id);
3354 if (CSI != ContextIdToContextSizeInfos.end()) {
3355 for (
auto &
Info : CSI->second) {
3356 OS <<
"MemProf hinting: "
3358 <<
" full allocation context " <<
Info.FullStackId
3359 <<
" with total size " <<
Info.TotalSize <<
" is "
3361 if (allocTypeToUse(
Node->AllocTypes) != AllocTypeFromCall)
3363 <<
" due to cold byte percent";
3365 OS <<
" (context id " <<
Id <<
")";
3373template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3374void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::check()
const {
3375 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3378 for (
auto &
Edge :
Node->CallerEdges)
3383template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3385 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3386 using NodeRef =
const ContextNode<DerivedCCG, FuncTy, CallTy> *;
3388 using NodePtrTy = std::unique_ptr<ContextNode<DerivedCCG, FuncTy, CallTy>>;
3404 return G->NodeOwner.begin()->get();
3407 using EdgePtrTy = std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>;
3408 static const ContextNode<DerivedCCG, FuncTy, CallTy> *
3427template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3441 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3447 std::string LabelString =
3448 (
Twine(
"OrigId: ") + (
Node->IsAllocation ?
"Alloc" :
"") +
3451 LabelString +=
"\n";
3452 if (
Node->hasCall()) {
3453 auto Func =
G->NodeToCallingFunc.find(
Node);
3454 assert(Func !=
G->NodeToCallingFunc.end());
3456 G->getLabel(Func->second,
Node->Call.call(),
Node->Call.cloneNo());
3457 for (
auto &MatchingCall :
Node->MatchingCalls) {
3458 LabelString +=
"\n";
3459 LabelString +=
G->getLabel(Func->second, MatchingCall.call(),
3460 MatchingCall.cloneNo());
3463 LabelString +=
"null call";
3464 if (
Node->Recursive)
3465 LabelString +=
" (recursive)";
3467 LabelString +=
" (external)";
3473 auto ContextIds =
Node->getContextIds();
3477 bool Highlight =
false;
3486 std::string AttributeString = (
Twine(
"tooltip=\"") + getNodeId(
Node) +
" " +
3487 getContextIds(ContextIds) +
"\"")
3491 AttributeString +=
",fontsize=\"30\"";
3493 (
Twine(
",fillcolor=\"") + getColor(
Node->AllocTypes, Highlight) +
"\"")
3495 if (
Node->CloneOf) {
3496 AttributeString +=
",color=\"blue\"";
3497 AttributeString +=
",style=\"filled,bold,dashed\"";
3499 AttributeString +=
",style=\"filled\"";
3500 return AttributeString;
3505 auto &Edge = *(ChildIter.getCurrent());
3510 bool Highlight =
false;
3519 auto Color = getColor(Edge->AllocTypes, Highlight);
3520 std::string AttributeString =
3521 (
Twine(
"tooltip=\"") + getContextIds(Edge->ContextIds) +
"\"" +
3523 Twine(
",fillcolor=\"") + Color +
"\"" +
Twine(
",color=\"") + Color +
3526 if (Edge->IsBackedge)
3527 AttributeString +=
",style=\"dotted\"";
3530 AttributeString +=
",penwidth=\"2.0\",weight=\"2\"";
3531 return AttributeString;
3537 if (
Node->isRemoved())
3550 std::string IdString =
"ContextIds:";
3551 if (ContextIds.
size() < 100) {
3552 std::vector<uint32_t> SortedIds(ContextIds.
begin(), ContextIds.
end());
3553 std::sort(SortedIds.begin(), SortedIds.end());
3554 for (
auto Id : SortedIds)
3555 IdString += (
" " +
Twine(Id)).str();
3557 IdString += (
" (" + Twine(ContextIds.
size()) +
" ids)").str();
3562 static std::string getColor(uint8_t AllocTypes,
bool Highlight) {
3568 if (AllocTypes == (uint8_t)AllocationType::NotCold)
3570 return !
DoHighlight || Highlight ?
"brown1" :
"lightpink";
3571 if (AllocTypes == (uint8_t)AllocationType::Cold)
3572 return !
DoHighlight || Highlight ?
"cyan" :
"lightskyblue";
3574 ((uint8_t)AllocationType::NotCold | (uint8_t)AllocationType::Cold))
3575 return Highlight ?
"magenta" :
"mediumorchid1";
3579 static std::string getNodeId(NodeRef Node) {
3580 std::stringstream SStream;
3581 SStream << std::hex <<
"N0x" << (
unsigned long long)Node;
3582 std::string
Result = SStream.str();
3591template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3596template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3597void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::exportToDot(
3598 std::string Label)
const {
3603template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3604typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
3605CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::moveEdgeToNewCalleeClone(
3606 const std::shared_ptr<ContextEdge> &
Edge,
3607 DenseSet<uint32_t> ContextIdsToMove) {
3609 assert(NodeToCallingFunc.count(Node));
3610 ContextNode *Clone =
3611 createNewNode(
Node->IsAllocation, NodeToCallingFunc[Node],
Node->Call);
3612 Node->addClone(Clone);
3613 Clone->MatchingCalls =
Node->MatchingCalls;
3614 moveEdgeToExistingCalleeClone(
Edge, Clone,
true,
3619template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3620void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
3621 moveEdgeToExistingCalleeClone(
const std::shared_ptr<ContextEdge> &
Edge,
3622 ContextNode *NewCallee,
bool NewClone,
3623 DenseSet<uint32_t> ContextIdsToMove) {
3626 assert(NewCallee->getOrigNode() ==
Edge->Callee->getOrigNode());
3628 bool EdgeIsRecursive =
Edge->Callee ==
Edge->Caller;
3630 ContextNode *OldCallee =
Edge->Callee;
3634 auto ExistingEdgeToNewCallee = NewCallee->findEdgeFromCaller(
Edge->Caller);
3638 if (ContextIdsToMove.
empty())
3639 ContextIdsToMove =
Edge->getContextIds();
3643 if (
Edge->getContextIds().size() == ContextIdsToMove.
size()) {
3646 NewCallee->AllocTypes |=
Edge->AllocTypes;
3648 if (ExistingEdgeToNewCallee) {
3651 ExistingEdgeToNewCallee->getContextIds().insert_range(ContextIdsToMove);
3652 ExistingEdgeToNewCallee->AllocTypes |=
Edge->AllocTypes;
3653 assert(
Edge->ContextIds == ContextIdsToMove);
3654 removeEdgeFromGraph(
Edge.get());
3657 Edge->Callee = NewCallee;
3658 NewCallee->CallerEdges.push_back(
Edge);
3660 OldCallee->eraseCallerEdge(
Edge.get());
3667 auto CallerEdgeAllocType = computeAllocType(ContextIdsToMove);
3668 if (ExistingEdgeToNewCallee) {
3671 ExistingEdgeToNewCallee->getContextIds().insert_range(ContextIdsToMove);
3672 ExistingEdgeToNewCallee->AllocTypes |= CallerEdgeAllocType;
3675 auto NewEdge = std::make_shared<ContextEdge>(
3676 NewCallee,
Edge->Caller, CallerEdgeAllocType, ContextIdsToMove);
3677 Edge->Caller->CalleeEdges.push_back(NewEdge);
3678 NewCallee->CallerEdges.push_back(NewEdge);
3682 NewCallee->AllocTypes |= CallerEdgeAllocType;
3684 Edge->AllocTypes = computeAllocType(
Edge->ContextIds);
3689 for (
auto &OldCalleeEdge : OldCallee->CalleeEdges) {
3690 ContextNode *CalleeToUse = OldCalleeEdge->Callee;
3694 if (CalleeToUse == OldCallee) {
3698 if (EdgeIsRecursive) {
3702 CalleeToUse = NewCallee;
3706 DenseSet<uint32_t> EdgeContextIdsToMove =
3708 set_subtract(OldCalleeEdge->getContextIds(), EdgeContextIdsToMove);
3709 OldCalleeEdge->AllocTypes =
3710 computeAllocType(OldCalleeEdge->getContextIds());
3717 if (
auto *NewCalleeEdge = NewCallee->findEdgeFromCallee(CalleeToUse)) {
3718 NewCalleeEdge->getContextIds().insert_range(EdgeContextIdsToMove);
3719 NewCalleeEdge->AllocTypes |= computeAllocType(EdgeContextIdsToMove);
3723 auto NewEdge = std::make_shared<ContextEdge>(
3724 CalleeToUse, NewCallee, computeAllocType(EdgeContextIdsToMove),
3725 EdgeContextIdsToMove);
3726 NewCallee->CalleeEdges.push_back(NewEdge);
3727 NewEdge->Callee->CallerEdges.push_back(NewEdge);
3731 OldCallee->AllocTypes = OldCallee->computeAllocType();
3733 assert((OldCallee->AllocTypes == (uint8_t)AllocationType::None) ==
3734 OldCallee->emptyContextIds());
3738 for (
const auto &OldCalleeEdge : OldCallee->CalleeEdges)
3741 for (
const auto &NewCalleeEdge : NewCallee->CalleeEdges)
3747template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3748void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
3749 moveCalleeEdgeToNewCaller(
const std::shared_ptr<ContextEdge> &
Edge,
3750 ContextNode *NewCaller) {
3751 auto *OldCallee =
Edge->Callee;
3752 auto *NewCallee = OldCallee;
3755 bool Recursive =
Edge->Caller ==
Edge->Callee;
3757 NewCallee = NewCaller;
3759 ContextNode *OldCaller =
Edge->Caller;
3760 OldCaller->eraseCalleeEdge(
Edge.get());
3764 auto ExistingEdgeToNewCaller = NewCaller->findEdgeFromCallee(NewCallee);
3766 if (ExistingEdgeToNewCaller) {
3769 ExistingEdgeToNewCaller->getContextIds().insert_range(
3770 Edge->getContextIds());
3771 ExistingEdgeToNewCaller->AllocTypes |=
Edge->AllocTypes;
3772 Edge->ContextIds.clear();
3773 Edge->AllocTypes = (uint8_t)AllocationType::None;
3774 OldCallee->eraseCallerEdge(
Edge.get());
3777 Edge->Caller = NewCaller;
3778 NewCaller->CalleeEdges.push_back(
Edge);
3780 assert(NewCallee == NewCaller);
3783 Edge->Callee = NewCallee;
3784 NewCallee->CallerEdges.push_back(
Edge);
3785 OldCallee->eraseCallerEdge(
Edge.get());
3791 NewCaller->AllocTypes |=
Edge->AllocTypes;
3798 bool IsNewNode = NewCaller->CallerEdges.empty();
3807 for (
auto &OldCallerEdge : OldCaller->CallerEdges) {
3808 auto OldCallerCaller = OldCallerEdge->Caller;
3812 OldCallerEdge->getContextIds(),
Edge->getContextIds());
3813 if (OldCaller == OldCallerCaller) {
3814 OldCallerCaller = NewCaller;
3820 set_subtract(OldCallerEdge->getContextIds(), EdgeContextIdsToMove);
3821 OldCallerEdge->AllocTypes =
3822 computeAllocType(OldCallerEdge->getContextIds());
3827 auto *ExistingCallerEdge = NewCaller->findEdgeFromCaller(OldCallerCaller);
3831 if (ExistingCallerEdge) {
3832 ExistingCallerEdge->getContextIds().insert_range(EdgeContextIdsToMove);
3833 ExistingCallerEdge->AllocTypes |=
3834 computeAllocType(EdgeContextIdsToMove);
3837 auto NewEdge = std::make_shared<ContextEdge>(
3838 NewCaller, OldCallerCaller, computeAllocType(EdgeContextIdsToMove),
3839 EdgeContextIdsToMove);
3840 NewCaller->CallerEdges.push_back(NewEdge);
3841 NewEdge->Caller->CalleeEdges.push_back(NewEdge);
3846 OldCaller->AllocTypes = OldCaller->computeAllocType();
3848 assert((OldCaller->AllocTypes == (uint8_t)AllocationType::None) ==
3849 OldCaller->emptyContextIds());
3853 for (
const auto &OldCallerEdge : OldCaller->CallerEdges)
3856 for (
const auto &NewCallerEdge : NewCaller->CallerEdges)
3862template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3863void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
3864 recursivelyRemoveNoneTypeCalleeEdges(
3865 ContextNode *Node, DenseSet<const ContextNode *> &Visited) {
3870 removeNoneTypeCalleeEdges(Node);
3872 for (
auto *Clone :
Node->Clones)
3873 recursivelyRemoveNoneTypeCalleeEdges(Clone, Visited);
3877 auto CallerEdges =
Node->CallerEdges;
3878 for (
auto &
Edge : CallerEdges) {
3880 if (
Edge->isRemoved()) {
3884 recursivelyRemoveNoneTypeCalleeEdges(
Edge->Caller, Visited);
3889template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3890void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::markBackedges() {
3895 DenseSet<const ContextNode *> Visited;
3896 DenseSet<const ContextNode *> CurrentStack;
3897 for (
auto &Entry : NonAllocationCallToContextNodeMap) {
3899 if (
Node->isRemoved())
3902 if (!
Node->CallerEdges.empty())
3904 markBackedges(Node, Visited, CurrentStack);
3910template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3911void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::markBackedges(
3912 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
3913 DenseSet<const ContextNode *> &CurrentStack) {
3914 auto I = Visited.
insert(Node);
3918 for (
auto &CalleeEdge :
Node->CalleeEdges) {
3919 auto *
Callee = CalleeEdge->Callee;
3920 if (Visited.
count(Callee)) {
3923 if (CurrentStack.
count(Callee))
3924 CalleeEdge->IsBackedge =
true;
3927 CurrentStack.
insert(Callee);
3928 markBackedges(Callee, Visited, CurrentStack);
3929 CurrentStack.
erase(Callee);
3933template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3934void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones() {
3935 DenseSet<const ContextNode *> Visited;
3936 for (
auto &Entry : AllocationCallToContextNodeMap) {
3938 identifyClones(
Entry.second, Visited,
Entry.second->getContextIds());
3941 for (
auto &Entry : AllocationCallToContextNodeMap)
3942 recursivelyRemoveNoneTypeCalleeEdges(
Entry.second, Visited);
3955template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3956void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones(
3957 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
3958 const DenseSet<uint32_t> &AllocContextIds) {
3968 if (!
Node->hasCall())
3987 auto CallerEdges =
Node->CallerEdges;
3988 for (
auto &
Edge : CallerEdges) {
3990 if (
Edge->isRemoved()) {
3996 if (
Edge->IsBackedge) {
4003 if (!Visited.
count(
Edge->Caller) && !
Edge->Caller->CloneOf) {
4004 identifyClones(
Edge->Caller, Visited, AllocContextIds);
4027 const unsigned AllocTypeCloningPriority[] = { 3, 4,
4031 [&](
const std::shared_ptr<ContextEdge> &
A,
4032 const std::shared_ptr<ContextEdge> &
B) {
4035 if (A->ContextIds.empty())
4041 if (B->ContextIds.empty())
4044 if (A->AllocTypes == B->AllocTypes)
4047 return *A->ContextIds.begin() < *B->ContextIds.begin();
4048 return AllocTypeCloningPriority[A->AllocTypes] <
4049 AllocTypeCloningPriority[B->AllocTypes];
4052 assert(
Node->AllocTypes != (uint8_t)AllocationType::None);
4054 DenseSet<uint32_t> RecursiveContextIds;
4059 DenseSet<uint32_t> AllCallerContextIds;
4060 for (
auto &CE :
Node->CallerEdges) {
4063 AllCallerContextIds.
reserve(
CE->getContextIds().size());
4064 for (
auto Id :
CE->getContextIds())
4065 if (!AllCallerContextIds.
insert(Id).second)
4066 RecursiveContextIds.
insert(Id);
4076 auto CallerEdges =
Node->CallerEdges;
4077 for (
auto &CallerEdge : CallerEdges) {
4079 if (CallerEdge->isRemoved()) {
4083 assert(CallerEdge->Callee == Node);
4092 if (!CallerEdge->Caller->hasCall())
4097 auto CallerEdgeContextsForAlloc =
4099 if (!RecursiveContextIds.
empty())
4100 CallerEdgeContextsForAlloc =
4102 if (CallerEdgeContextsForAlloc.empty())
4105 auto CallerAllocTypeForAlloc = computeAllocType(CallerEdgeContextsForAlloc);
4109 std::vector<uint8_t> CalleeEdgeAllocTypesForCallerEdge;
4110 CalleeEdgeAllocTypesForCallerEdge.reserve(
Node->CalleeEdges.size());
4111 for (
auto &CalleeEdge :
Node->CalleeEdges)
4112 CalleeEdgeAllocTypesForCallerEdge.push_back(intersectAllocTypes(
4113 CalleeEdge->getContextIds(), CallerEdgeContextsForAlloc));
4129 assert(CallerEdge->AllocTypes != (uint8_t)AllocationType::None);
4130 assert(
Node->AllocTypes != (uint8_t)AllocationType::None);
4131 if (!CallerEdge->IsBackedge &&
4132 allocTypeToUse(CallerAllocTypeForAlloc) ==
4133 allocTypeToUse(
Node->AllocTypes) &&
4134 allocTypesMatch<DerivedCCG, FuncTy, CallTy>(
4135 CalleeEdgeAllocTypesForCallerEdge,
Node->CalleeEdges)) {
4139 if (CallerEdge->IsBackedge) {
4143 DeferredBackedges++;
4156 if (CallerEdge->IsBackedge && !CallerEdge->Caller->CloneOf &&
4157 !Visited.
count(CallerEdge->Caller)) {
4158 const auto OrigIdCount = CallerEdge->getContextIds().size();
4161 identifyClones(CallerEdge->Caller, Visited, CallerEdgeContextsForAlloc);
4162 removeNoneTypeCalleeEdges(CallerEdge->Caller);
4166 bool UpdatedEdge =
false;
4167 if (OrigIdCount > CallerEdge->getContextIds().size()) {
4168 for (
auto E :
Node->CallerEdges) {
4170 if (
E->Caller->CloneOf != CallerEdge->Caller)
4174 auto CallerEdgeContextsForAllocNew =
4176 if (CallerEdgeContextsForAllocNew.empty())
4186 CallerEdgeContextsForAlloc.swap(CallerEdgeContextsForAllocNew);
4196 if (CallerEdge->isRemoved())
4206 CallerEdgeContextsForAlloc, CallerEdge->getContextIds());
4207 if (CallerEdgeContextsForAlloc.empty())
4212 CallerAllocTypeForAlloc = computeAllocType(CallerEdgeContextsForAlloc);
4213 CalleeEdgeAllocTypesForCallerEdge.clear();
4214 for (
auto &CalleeEdge :
Node->CalleeEdges) {
4215 CalleeEdgeAllocTypesForCallerEdge.push_back(intersectAllocTypes(
4216 CalleeEdge->getContextIds(), CallerEdgeContextsForAlloc));
4222 ContextNode *Clone =
nullptr;
4223 for (
auto *CurClone :
Node->Clones) {
4224 if (allocTypeToUse(CurClone->AllocTypes) !=
4225 allocTypeToUse(CallerAllocTypeForAlloc))
4232 assert(!BothSingleAlloc ||
4233 CurClone->AllocTypes == CallerAllocTypeForAlloc);
4239 if (BothSingleAlloc || allocTypesMatchClone<DerivedCCG, FuncTy, CallTy>(
4240 CalleeEdgeAllocTypesForCallerEdge, CurClone)) {
4248 moveEdgeToExistingCalleeClone(CallerEdge, Clone,
false,
4249 CallerEdgeContextsForAlloc);
4251 Clone = moveEdgeToNewCalleeClone(CallerEdge, CallerEdgeContextsForAlloc);
4254 assert(Clone->AllocTypes != (uint8_t)AllocationType::None);
4261 assert(
Node->AllocTypes != (uint8_t)AllocationType::None);
4267void ModuleCallsiteContextGraph::updateAllocationCall(
4272 "memprof", AllocTypeString);
4275 .emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofAttribute",
Call.call())
4276 <<
ore::NV(
"AllocationCall",
Call.call()) <<
" in clone "
4278 <<
" marked with memprof allocation attribute "
4279 <<
ore::NV(
"Attribute", AllocTypeString));
4282void IndexCallsiteContextGraph::updateAllocationCall(CallInfo &
Call,
4286 assert(AI->Versions.size() >
Call.cloneNo());
4291ModuleCallsiteContextGraph::getAllocationCallType(
const CallInfo &
Call)
const {
4293 if (!CB->getAttributes().hasFnAttr(
"memprof"))
4294 return AllocationType::None;
4295 return CB->getAttributes().getFnAttr(
"memprof").getValueAsString() ==
"cold"
4296 ? AllocationType::Cold
4297 : AllocationType::NotCold;
4301IndexCallsiteContextGraph::getAllocationCallType(
const CallInfo &
Call)
const {
4303 assert(AI->Versions.size() >
Call.cloneNo());
4307void ModuleCallsiteContextGraph::updateCall(CallInfo &CallerCall,
4308 FuncInfo CalleeFunc) {
4309 auto *CurF = getCalleeFunc(CallerCall.call());
4310 auto NewCalleeCloneNo = CalleeFunc.cloneNo();
4317 if (CurCalleeCloneNo != NewCalleeCloneNo) {
4319 << CurCalleeCloneNo <<
" now " << NewCalleeCloneNo
4321 MismatchedCloneAssignments++;
4324 if (NewCalleeCloneNo > 0)
4325 cast<CallBase>(CallerCall.call())->setCalledFunction(CalleeFunc.func());
4326 OREGetter(CallerCall.call()->getFunction())
4327 .emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofCall", CallerCall.call())
4328 <<
ore::NV(
"Call", CallerCall.call()) <<
" in clone "
4329 <<
ore::NV(
"Caller", CallerCall.call()->getFunction())
4330 <<
" assigned to call function clone "
4331 <<
ore::NV(
"Callee", CalleeFunc.func()));
4334void IndexCallsiteContextGraph::updateCall(CallInfo &CallerCall,
4335 FuncInfo CalleeFunc) {
4338 "Caller cannot be an allocation which should not have profiled calls");
4339 assert(CI->Clones.size() > CallerCall.cloneNo());
4340 auto NewCalleeCloneNo = CalleeFunc.cloneNo();
4341 auto &CurCalleeCloneNo = CI->Clones[CallerCall.cloneNo()];
4346 if (CurCalleeCloneNo != 0 && CurCalleeCloneNo != NewCalleeCloneNo) {
4348 << CurCalleeCloneNo <<
" now " << NewCalleeCloneNo
4350 MismatchedCloneAssignments++;
4352 CurCalleeCloneNo = NewCalleeCloneNo;
4364 SP->replaceLinkageName(MDName);
4368 TempDISubprogram NewDecl = Decl->
clone();
4369 NewDecl->replaceLinkageName(MDName);
4373CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
4375ModuleCallsiteContextGraph::cloneFunctionForCallsite(
4376 FuncInfo &Func, CallInfo &
Call, DenseMap<CallInfo, CallInfo> &CallMap,
4377 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
4382 assert(!
Func.func()->getParent()->getFunction(Name));
4383 NewFunc->setName(Name);
4385 for (
auto &Inst : CallsWithMetadataInFunc) {
4387 assert(Inst.cloneNo() == 0);
4390 OREGetter(
Func.func())
4391 .emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofClone",
Func.func())
4392 <<
"created clone " <<
ore::NV(
"NewFunction", NewFunc));
4393 return {NewFunc, CloneNo};
4396CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
4397 IndexCall>::FuncInfo
4398IndexCallsiteContextGraph::cloneFunctionForCallsite(
4399 FuncInfo &Func, CallInfo &
Call, DenseMap<CallInfo, CallInfo> &CallMap,
4400 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
4414 for (
auto &Inst : CallsWithMetadataInFunc) {
4416 assert(Inst.cloneNo() == 0);
4418 assert(AI->Versions.size() == CloneNo);
4421 AI->Versions.push_back(0);
4424 assert(CI && CI->Clones.size() == CloneNo);
4427 CI->Clones.push_back(0);
4429 CallMap[Inst] = {Inst.call(), CloneNo};
4431 return {
Func.func(), CloneNo};
4448template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4449void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::mergeClones() {
4455 DenseMap<uint32_t, ContextNode *> ContextIdToAllocationNode;
4456 for (
auto &Entry : AllocationCallToContextNodeMap) {
4458 for (
auto Id :
Node->getContextIds())
4459 ContextIdToAllocationNode[
Id] =
Node->getOrigNode();
4460 for (
auto *Clone :
Node->Clones) {
4461 for (
auto Id : Clone->getContextIds())
4462 ContextIdToAllocationNode[
Id] = Clone->getOrigNode();
4469 DenseSet<const ContextNode *> Visited;
4470 for (
auto &Entry : AllocationCallToContextNodeMap) {
4473 mergeClones(Node, Visited, ContextIdToAllocationNode);
4479 auto Clones =
Node->Clones;
4480 for (
auto *Clone : Clones)
4481 mergeClones(Clone, Visited, ContextIdToAllocationNode);
4485 dbgs() <<
"CCG after merging:\n";
4489 exportToDot(
"aftermerge");
4497template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4498void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::mergeClones(
4499 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
4500 DenseMap<uint32_t, ContextNode *> &ContextIdToAllocationNode) {
4510 bool FoundUnvisited =
true;
4512 while (FoundUnvisited) {
4514 FoundUnvisited =
false;
4517 auto CallerEdges =
Node->CallerEdges;
4518 for (
auto CallerEdge : CallerEdges) {
4520 if (CallerEdge->Callee != Node)
4525 FoundUnvisited =
true;
4526 mergeClones(CallerEdge->Caller, Visited, ContextIdToAllocationNode);
4530 TotalMergeInvokes++;
4531 TotalMergeIters += Iters;
4532 if (Iters > MaxMergeIters)
4533 MaxMergeIters = Iters;
4536 mergeNodeCalleeClones(Node, Visited, ContextIdToAllocationNode);
4539template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4540void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::mergeNodeCalleeClones(
4541 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
4542 DenseMap<uint32_t, ContextNode *> &ContextIdToAllocationNode) {
4544 if (
Node->emptyContextIds())
4549 MapVector<ContextNode *, std::vector<std::shared_ptr<ContextEdge>>>
4550 OrigNodeToCloneEdges;
4551 for (
const auto &
E :
Node->CalleeEdges) {
4556 OrigNodeToCloneEdges[
Base].push_back(
E);
4562 auto CalleeCallerEdgeLessThan = [](
const std::shared_ptr<ContextEdge> &
A,
4563 const std::shared_ptr<ContextEdge> &
B) {
4564 if (
A->Callee->CallerEdges.size() !=
B->Callee->CallerEdges.size())
4565 return A->Callee->CallerEdges.size() <
B->Callee->CallerEdges.size();
4566 if (
A->Callee->CloneOf && !
B->Callee->CloneOf)
4568 else if (!
A->Callee->CloneOf &&
B->Callee->CloneOf)
4572 return *
A->ContextIds.begin() < *
B->ContextIds.begin();
4577 for (
auto Entry : OrigNodeToCloneEdges) {
4580 auto &CalleeEdges =
Entry.second;
4581 auto NumCalleeClones = CalleeEdges.size();
4583 if (NumCalleeClones == 1)
4594 DenseSet<ContextNode *> OtherCallersToShareMerge;
4595 findOtherCallersToShareMerge(Node, CalleeEdges, ContextIdToAllocationNode,
4596 OtherCallersToShareMerge);
4601 ContextNode *MergeNode =
nullptr;
4602 DenseMap<ContextNode *, unsigned> CallerToMoveCount;
4603 for (
auto CalleeEdge : CalleeEdges) {
4604 auto *OrigCallee = CalleeEdge->Callee;
4610 if (CalleeEdge->Callee->CallerEdges.size() == 1) {
4611 MergeNode = OrigCallee;
4612 NonNewMergedNodes++;
4619 if (!OtherCallersToShareMerge.
empty()) {
4620 bool MoveAllCallerEdges =
true;
4621 for (
auto CalleeCallerE : OrigCallee->CallerEdges) {
4622 if (CalleeCallerE == CalleeEdge)
4624 if (!OtherCallersToShareMerge.
contains(CalleeCallerE->Caller)) {
4625 MoveAllCallerEdges =
false;
4631 if (MoveAllCallerEdges) {
4632 MergeNode = OrigCallee;
4633 NonNewMergedNodes++;
4640 assert(MergeNode != OrigCallee);
4641 moveEdgeToExistingCalleeClone(CalleeEdge, MergeNode,
4644 MergeNode = moveEdgeToNewCalleeClone(CalleeEdge);
4649 if (!OtherCallersToShareMerge.
empty()) {
4653 auto OrigCalleeCallerEdges = OrigCallee->CallerEdges;
4654 for (
auto &CalleeCallerE : OrigCalleeCallerEdges) {
4655 if (CalleeCallerE == CalleeEdge)
4657 if (!OtherCallersToShareMerge.
contains(CalleeCallerE->Caller))
4659 CallerToMoveCount[CalleeCallerE->Caller]++;
4660 moveEdgeToExistingCalleeClone(CalleeCallerE, MergeNode,
4664 removeNoneTypeCalleeEdges(OrigCallee);
4665 removeNoneTypeCalleeEdges(MergeNode);
4683template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4684void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
4685 findOtherCallersToShareMerge(
4687 std::vector<std::shared_ptr<ContextEdge>> &CalleeEdges,
4688 DenseMap<uint32_t, ContextNode *> &ContextIdToAllocationNode,
4689 DenseSet<ContextNode *> &OtherCallersToShareMerge) {
4690 auto NumCalleeClones = CalleeEdges.size();
4693 DenseMap<ContextNode *, unsigned> OtherCallersToSharedCalleeEdgeCount;
4696 unsigned PossibleOtherCallerNodes = 0;
4700 if (CalleeEdges[0]->
Callee->CallerEdges.size() < 2)
4706 DenseMap<ContextEdge *, DenseSet<ContextNode *>> CalleeEdgeToAllocNodes;
4707 for (
auto CalleeEdge : CalleeEdges) {
4708 assert(CalleeEdge->Callee->CallerEdges.size() > 1);
4711 for (
auto CalleeCallerEdges : CalleeEdge->Callee->CallerEdges) {
4712 if (CalleeCallerEdges->Caller == Node) {
4713 assert(CalleeCallerEdges == CalleeEdge);
4716 OtherCallersToSharedCalleeEdgeCount[CalleeCallerEdges->Caller]++;
4719 if (OtherCallersToSharedCalleeEdgeCount[CalleeCallerEdges->Caller] ==
4721 PossibleOtherCallerNodes++;
4725 for (
auto Id : CalleeEdge->getContextIds()) {
4726 auto *
Alloc = ContextIdToAllocationNode.
lookup(Id);
4730 MissingAllocForContextId++;
4733 CalleeEdgeToAllocNodes[CalleeEdge.get()].
insert(
Alloc);
4740 for (
auto CalleeEdge : CalleeEdges) {
4741 assert(CalleeEdge->Callee->CallerEdges.size() > 1);
4743 if (!PossibleOtherCallerNodes)
4745 auto &CurCalleeAllocNodes = CalleeEdgeToAllocNodes[CalleeEdge.get()];
4747 for (
auto &CalleeCallerE : CalleeEdge->Callee->CallerEdges) {
4749 if (CalleeCallerE == CalleeEdge)
4753 if (OtherCallersToSharedCalleeEdgeCount[CalleeCallerE->Caller] !=
4758 for (
auto Id : CalleeCallerE->getContextIds()) {
4759 auto *
Alloc = ContextIdToAllocationNode.
lookup(Id);
4764 if (!CurCalleeAllocNodes.contains(
Alloc)) {
4765 OtherCallersToSharedCalleeEdgeCount[CalleeCallerE->Caller] = 0;
4766 PossibleOtherCallerNodes--;
4773 if (!PossibleOtherCallerNodes)
4778 for (
auto &[OtherCaller,
Count] : OtherCallersToSharedCalleeEdgeCount) {
4779 if (
Count != NumCalleeClones)
4781 OtherCallersToShareMerge.
insert(OtherCaller);
4826template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4827bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
4834 DenseMap<ContextNode *, FuncInfo> CallsiteToCalleeFuncCloneMap;
4838 auto RecordCalleeFuncOfCallsite = [&](ContextNode *
Caller,
4839 const FuncInfo &CalleeFunc) {
4841 CallsiteToCalleeFuncCloneMap[
Caller] = CalleeFunc;
4845 struct FuncCloneInfo {
4850 DenseMap<CallInfo, CallInfo> CallMap;
4878 DenseMap<const ContextNode *, std::map<unsigned, SmallVector<CallInfo, 0>>>
4879 UnassignedCallClones;
4883 for (
auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) {
4884 FuncInfo OrigFunc(Func);
4889 std::vector<FuncCloneInfo> FuncCloneInfos;
4890 for (
auto &
Call : CallsWithMetadata) {
4891 ContextNode *
Node = getNodeForInst(
Call);
4895 if (!Node ||
Node->Clones.empty())
4898 "Not having a call should have prevented cloning");
4902 std::map<FuncInfo, ContextNode *> FuncCloneToCurNodeCloneMap;
4906 auto AssignCallsiteCloneToFuncClone = [&](
const FuncInfo &FuncClone,
4908 ContextNode *CallsiteClone,
4911 FuncCloneToCurNodeCloneMap[FuncClone] = CallsiteClone;
4913 assert(FuncCloneInfos.size() > FuncClone.cloneNo());
4914 DenseMap<CallInfo, CallInfo> &CallMap =
4915 FuncCloneInfos[FuncClone.cloneNo()].CallMap;
4916 CallInfo CallClone(
Call);
4917 if (
auto It = CallMap.
find(
Call); It != CallMap.
end())
4918 CallClone = It->second;
4919 CallsiteClone->setCall(CallClone);
4921 for (
auto &MatchingCall :
Node->MatchingCalls) {
4922 CallInfo CallClone(MatchingCall);
4923 if (
auto It = CallMap.
find(MatchingCall); It != CallMap.
end())
4924 CallClone = It->second;
4926 MatchingCall = CallClone;
4934 auto MoveEdgeToNewCalleeCloneAndSetUp =
4935 [&](
const std::shared_ptr<ContextEdge> &
Edge) {
4936 ContextNode *OrigCallee =
Edge->Callee;
4937 ContextNode *NewClone = moveEdgeToNewCalleeClone(
Edge);
4938 removeNoneTypeCalleeEdges(NewClone);
4939 assert(NewClone->AllocTypes != (uint8_t)AllocationType::None);
4943 if (CallsiteToCalleeFuncCloneMap.
count(OrigCallee))
4944 RecordCalleeFuncOfCallsite(
4945 NewClone, CallsiteToCalleeFuncCloneMap[OrigCallee]);
4952 std::deque<ContextNode *> ClonesWorklist;
4954 if (!
Node->emptyContextIds())
4955 ClonesWorklist.push_back(Node);
4961 unsigned NodeCloneCount = 0;
4962 while (!ClonesWorklist.empty()) {
4963 ContextNode *Clone = ClonesWorklist.front();
4964 ClonesWorklist.pop_front();
4973 if (FuncCloneInfos.size() < NodeCloneCount) {
4975 if (NodeCloneCount == 1) {
4980 Clone->CallerEdges, [&](
const std::shared_ptr<ContextEdge> &
E) {
4981 return CallsiteToCalleeFuncCloneMap.count(E->Caller);
4985 FuncCloneInfos.push_back(
4986 {OrigFunc, DenseMap<CallInfo, CallInfo>()});
4987 AssignCallsiteCloneToFuncClone(
4988 OrigFunc,
Call, Clone,
4989 AllocationCallToContextNodeMap.count(
Call));
4990 for (
auto &CE : Clone->CallerEdges) {
4992 if (!
CE->Caller->hasCall())
4994 RecordCalleeFuncOfCallsite(
CE->Caller, OrigFunc);
5004 FuncInfo PreviousAssignedFuncClone;
5006 Clone->CallerEdges, [&](
const std::shared_ptr<ContextEdge> &
E) {
5007 return CallsiteToCalleeFuncCloneMap.count(E->Caller);
5009 bool CallerAssignedToCloneOfFunc =
false;
5010 if (EI != Clone->CallerEdges.end()) {
5011 const std::shared_ptr<ContextEdge> &
Edge = *EI;
5012 PreviousAssignedFuncClone =
5013 CallsiteToCalleeFuncCloneMap[
Edge->Caller];
5014 CallerAssignedToCloneOfFunc =
true;
5019 DenseMap<CallInfo, CallInfo> NewCallMap;
5020 unsigned CloneNo = FuncCloneInfos.size();
5021 assert(CloneNo > 0 &&
"Clone 0 is the original function, which "
5022 "should already exist in the map");
5023 FuncInfo NewFuncClone = cloneFunctionForCallsite(
5024 OrigFunc,
Call, NewCallMap, CallsWithMetadata, CloneNo);
5025 FuncCloneInfos.push_back({NewFuncClone, std::move(NewCallMap)});
5026 FunctionClonesAnalysis++;
5032 if (!CallerAssignedToCloneOfFunc) {
5033 AssignCallsiteCloneToFuncClone(
5034 NewFuncClone,
Call, Clone,
5035 AllocationCallToContextNodeMap.count(
Call));
5036 for (
auto &CE : Clone->CallerEdges) {
5038 if (!
CE->Caller->hasCall())
5040 RecordCalleeFuncOfCallsite(
CE->Caller, NewFuncClone);
5052 auto CallerEdges = Clone->CallerEdges;
5053 for (
auto CE : CallerEdges) {
5055 if (
CE->isRemoved()) {
5061 if (!
CE->Caller->hasCall())
5064 if (!CallsiteToCalleeFuncCloneMap.
count(
CE->Caller) ||
5068 CallsiteToCalleeFuncCloneMap[
CE->Caller] !=
5069 PreviousAssignedFuncClone)
5072 RecordCalleeFuncOfCallsite(
CE->Caller, NewFuncClone);
5085 auto CalleeEdges =
CE->Caller->CalleeEdges;
5086 for (
auto CalleeEdge : CalleeEdges) {
5089 if (CalleeEdge->isRemoved()) {
5094 ContextNode *
Callee = CalleeEdge->Callee;
5098 if (Callee == Clone || !
Callee->hasCall())
5103 if (Callee == CalleeEdge->Caller)
5105 ContextNode *NewClone =
5106 MoveEdgeToNewCalleeCloneAndSetUp(CalleeEdge);
5109 removeNoneTypeCalleeEdges(Callee);
5117 CallInfo OrigCall(
Callee->getOrigNode()->Call);
5118 OrigCall.setCloneNo(0);
5119 DenseMap<CallInfo, CallInfo> &CallMap =
5120 FuncCloneInfos[NewFuncClone.cloneNo()].CallMap;
5122 CallInfo NewCall(CallMap[OrigCall]);
5124 NewClone->setCall(NewCall);
5126 for (
auto &MatchingCall : NewClone->MatchingCalls) {
5127 CallInfo OrigMatchingCall(MatchingCall);
5128 OrigMatchingCall.setCloneNo(0);
5130 CallInfo NewCall(CallMap[OrigMatchingCall]);
5133 MatchingCall = NewCall;
5142 auto FindFirstAvailFuncClone = [&]() {
5147 for (
auto &CF : FuncCloneInfos) {
5148 if (!FuncCloneToCurNodeCloneMap.count(CF.FuncClone))
5149 return CF.FuncClone;
5152 "Expected an available func clone for this callsite clone");
5169 std::map<FuncInfo, ContextNode *> FuncCloneToNewCallsiteCloneMap;
5170 FuncInfo FuncCloneAssignedToCurCallsiteClone;
5174 auto CloneCallerEdges = Clone->CallerEdges;
5175 for (
auto &
Edge : CloneCallerEdges) {
5179 if (
Edge->isRemoved())
5182 if (!
Edge->Caller->hasCall())
5186 if (CallsiteToCalleeFuncCloneMap.
count(
Edge->Caller)) {
5187 FuncInfo FuncCloneCalledByCaller =
5188 CallsiteToCalleeFuncCloneMap[
Edge->Caller];
5198 if ((FuncCloneToCurNodeCloneMap.count(FuncCloneCalledByCaller) &&
5199 FuncCloneToCurNodeCloneMap[FuncCloneCalledByCaller] !=
5207 (FuncCloneAssignedToCurCallsiteClone &&
5208 FuncCloneAssignedToCurCallsiteClone !=
5209 FuncCloneCalledByCaller)) {
5224 if (FuncCloneToNewCallsiteCloneMap.count(
5225 FuncCloneCalledByCaller)) {
5226 ContextNode *NewClone =
5227 FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller];
5228 moveEdgeToExistingCalleeClone(
Edge, NewClone);
5230 removeNoneTypeCalleeEdges(NewClone);
5233 ContextNode *NewClone = MoveEdgeToNewCalleeCloneAndSetUp(
Edge);
5234 FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller] =
5237 ClonesWorklist.push_back(NewClone);
5241 removeNoneTypeCalleeEdges(Clone);
5249 if (!FuncCloneAssignedToCurCallsiteClone) {
5250 FuncCloneAssignedToCurCallsiteClone = FuncCloneCalledByCaller;
5252 AssignCallsiteCloneToFuncClone(
5253 FuncCloneCalledByCaller,
Call, Clone,
5254 AllocationCallToContextNodeMap.count(
Call));
5258 assert(FuncCloneAssignedToCurCallsiteClone ==
5259 FuncCloneCalledByCaller);
5268 if (!FuncCloneAssignedToCurCallsiteClone) {
5269 FuncCloneAssignedToCurCallsiteClone = FindFirstAvailFuncClone();
5270 assert(FuncCloneAssignedToCurCallsiteClone);
5272 AssignCallsiteCloneToFuncClone(
5273 FuncCloneAssignedToCurCallsiteClone,
Call, Clone,
5274 AllocationCallToContextNodeMap.count(
Call));
5276 assert(FuncCloneToCurNodeCloneMap
5277 [FuncCloneAssignedToCurCallsiteClone] == Clone);
5279 RecordCalleeFuncOfCallsite(
Edge->Caller,
5280 FuncCloneAssignedToCurCallsiteClone);
5300 if (!FuncCloneAssignedToCurCallsiteClone) {
5301 FuncCloneAssignedToCurCallsiteClone = FindFirstAvailFuncClone();
5302 assert(FuncCloneAssignedToCurCallsiteClone &&
5303 "No available func clone for this callsite clone");
5304 AssignCallsiteCloneToFuncClone(
5305 FuncCloneAssignedToCurCallsiteClone,
Call, Clone,
5306 AllocationCallToContextNodeMap.contains(
Call));
5311 for (
const auto &PE :
Node->CalleeEdges)
5313 for (
const auto &CE :
Node->CallerEdges)
5315 for (
auto *Clone :
Node->Clones) {
5317 for (
const auto &PE : Clone->CalleeEdges)
5319 for (
const auto &CE : Clone->CallerEdges)
5325 if (FuncCloneInfos.size() < 2)
5331 for (
auto &
Call : CallsWithMetadata) {
5332 ContextNode *
Node = getNodeForInst(
Call);
5333 if (!Node || !
Node->hasCall() ||
Node->emptyContextIds())
5339 if (
Node->Clones.size() + 1 >= FuncCloneInfos.size())
5343 DenseSet<unsigned> NodeCallClones;
5344 for (
auto *
C :
Node->Clones)
5345 NodeCallClones.
insert(
C->Call.cloneNo());
5348 for (
auto &FC : FuncCloneInfos) {
5353 if (++
I == 1 || NodeCallClones.
contains(
I)) {
5358 auto &CallVector = UnassignedCallClones[
Node][
I];
5359 DenseMap<CallInfo, CallInfo> &CallMap =
FC.CallMap;
5360 if (
auto It = CallMap.
find(
Call); It != CallMap.
end()) {
5361 CallInfo CallClone = It->second;
5362 CallVector.push_back(CallClone);
5366 assert(
false &&
"Expected to find call in CallMap");
5369 for (
auto &MatchingCall :
Node->MatchingCalls) {
5370 if (
auto It = CallMap.
find(MatchingCall); It != CallMap.
end()) {
5371 CallInfo CallClone = It->second;
5372 CallVector.push_back(CallClone);
5376 assert(
false &&
"Expected to find call in CallMap");
5384 (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
5386 auto UpdateCalls = [&](ContextNode *
Node,
5387 DenseSet<const ContextNode *> &Visited,
5388 auto &&UpdateCalls) {
5389 auto Inserted = Visited.insert(Node);
5393 for (
auto *Clone :
Node->Clones)
5394 UpdateCalls(Clone, Visited, UpdateCalls);
5396 for (
auto &
Edge :
Node->CallerEdges)
5397 UpdateCalls(
Edge->Caller, Visited, UpdateCalls);
5401 if (!
Node->hasCall() ||
Node->emptyContextIds())
5404 if (
Node->IsAllocation) {
5405 auto AT = allocTypeToUse(
Node->AllocTypes);
5411 !ContextIdToContextSizeInfos.empty()) {
5412 uint64_t TotalCold = 0;
5414 for (
auto Id :
Node->getContextIds()) {
5415 auto TypeI = ContextIdToAllocationType.find(Id);
5416 assert(TypeI != ContextIdToAllocationType.end());
5417 auto CSI = ContextIdToContextSizeInfos.find(Id);
5418 if (CSI != ContextIdToContextSizeInfos.end()) {
5419 for (
auto &
Info : CSI->second) {
5421 if (TypeI->second == AllocationType::Cold)
5422 TotalCold +=
Info.TotalSize;
5427 AT = AllocationType::Cold;
5429 updateAllocationCall(
Node->Call, AT);
5434 if (!CallsiteToCalleeFuncCloneMap.
count(Node))
5437 auto CalleeFunc = CallsiteToCalleeFuncCloneMap[
Node];
5438 updateCall(
Node->Call, CalleeFunc);
5440 for (
auto &
Call :
Node->MatchingCalls)
5441 updateCall(
Call, CalleeFunc);
5445 if (!UnassignedCallClones.
contains(Node))
5447 DenseSet<unsigned> NodeCallClones;
5448 for (
auto *
C :
Node->Clones)
5449 NodeCallClones.
insert(
C->Call.cloneNo());
5451 auto &ClonedCalls = UnassignedCallClones[
Node];
5452 for (
auto &[CloneNo, CallVector] : ClonedCalls) {
5456 if (NodeCallClones.
contains(CloneNo))
5459 for (
auto &
Call : CallVector)
5460 updateCall(
Call, CalleeFunc);
5469 DenseSet<const ContextNode *> Visited;
5470 for (
auto &Entry : AllocationCallToContextNodeMap)
5471 UpdateCalls(
Entry.second, Visited, UpdateCalls);
5482 for (
auto &SN : FS->callsites()) {
5487 SN.Clones.size() >
I &&
5488 "Callsite summary has fewer entries than other summaries in function");
5489 if (SN.Clones.size() <=
I || !SN.Clones[
I])
5496 for (
auto &AN : FS->allocs()) {
5500 assert(AN.Versions.size() >
I &&
5501 "Alloc summary has fewer entries than other summaries in function");
5502 if (AN.Versions.size() <=
I ||
5519 NewGV->takeName(DeclGV);
5526 auto CloneFuncAliases = [&](
Function *NewF,
unsigned I) {
5527 if (!FuncToAliasMap.count(&
F))
5529 for (
auto *
A : FuncToAliasMap[&
F]) {
5531 auto *PrevA = M.getNamedAlias(AliasName);
5533 A->getType()->getPointerAddressSpace(),
5534 A->getLinkage(), AliasName, NewF);
5535 NewA->copyAttributesFrom(
A);
5537 TakeDeclNameAndReplace(PrevA, NewA);
5546 FunctionsClonedThinBackend++;
5563 for (
unsigned I = 1;
I < NumClones;
I++) {
5564 VMaps.
emplace_back(std::make_unique<ValueToValueMapTy>());
5571 FunctionCloneDuplicatesThinBackend++;
5572 auto *Func = HashToFunc[Hash];
5573 if (Func->hasAvailableExternallyLinkage()) {
5579 auto Decl = M.getOrInsertFunction(Name, Func->getFunctionType());
5581 <<
"created clone decl " <<
ore::NV(
"Decl", Decl.getCallee()));
5584 auto *PrevF = M.getFunction(Name);
5587 TakeDeclNameAndReplace(PrevF, Alias);
5589 <<
"created clone alias " <<
ore::NV(
"Alias", Alias));
5592 CloneFuncAliases(Func,
I);
5596 HashToFunc[Hash] = NewF;
5597 FunctionClonesThinBackend++;
5600 for (
auto &BB : *NewF) {
5601 for (
auto &Inst : BB) {
5602 Inst.setMetadata(LLVMContext::MD_memprof,
nullptr);
5603 Inst.setMetadata(LLVMContext::MD_callsite,
nullptr);
5608 TakeDeclNameAndReplace(PrevF, NewF);
5610 NewF->setName(Name);
5613 <<
"created clone " <<
ore::NV(
"NewFunction", NewF));
5616 CloneFuncAliases(NewF,
I);
5625 const Function *CallingFunc =
nullptr) {
5644 auto SrcFileMD =
F.getMetadata(
"thinlto_src_file");
5650 if (!SrcFileMD &&
F.isDeclaration()) {
5654 SrcFileMD = CallingFunc->getMetadata(
"thinlto_src_file");
5659 assert(SrcFileMD || OrigName ==
F.getName());
5661 StringRef SrcFile = M.getSourceFileName();
5673 if (!TheFnVI && OrigName ==
F.getName() &&
F.hasLocalLinkage() &&
5674 F.getName().contains(
'.')) {
5675 OrigName =
F.getName().rsplit(
'.').first;
5684 assert(TheFnVI ||
F.isDeclaration());
5688bool MemProfContextDisambiguation::initializeIndirectCallPromotionInfo(
5690 ICallAnalysis = std::make_unique<ICallPromotionAnalysis>();
5691 Symtab = std::make_unique<InstrProfSymtab>();
5702 if (
Error E = Symtab->create(M,
true,
false)) {
5703 std::string SymtabFailure =
toString(std::move(
E));
5704 M.getContext().emitError(
"Failed to create symtab: " + SymtabFailure);
5717 auto MIBIter = AllocNode.
MIBs.begin();
5718 for (
auto &MDOp : MemProfMD->
operands()) {
5720 auto StackIdIndexIter = MIBIter->StackIdIndices.begin();
5725 auto ContextIterBegin =
5729 (ContextIterBegin != StackContext.
end() && *ContextIterBegin == 0) ? 1
5731 for (
auto ContextIter = ContextIterBegin; ContextIter != StackContext.
end();
5736 if (LastStackContextId == *ContextIter)
5738 LastStackContextId = *ContextIter;
5739 assert(StackIdIndexIter != MIBIter->StackIdIndices.end());
5749bool MemProfContextDisambiguation::applyImport(
Module &M) {
5756 std::map<const Function *, SmallPtrSet<const GlobalAlias *, 1>>
5758 for (
auto &
A :
M.aliases()) {
5759 auto *Aliasee =
A.getAliaseeObject();
5761 FuncToAliasMap[
F].insert(&
A);
5764 if (!initializeIndirectCallPromotionInfo(M))
5771 OptimizationRemarkEmitter ORE(&
F);
5774 bool ClonesCreated =
false;
5775 unsigned NumClonesCreated = 0;
5776 auto CloneFuncIfNeeded = [&](
unsigned NumClones, FunctionSummary *
FS) {
5786 if (ClonesCreated) {
5787 assert(NumClonesCreated == NumClones);
5794 ClonesCreated =
true;
5795 NumClonesCreated = NumClones;
5798 auto CloneCallsite = [&](
const CallsiteInfo &StackNode, CallBase *CB,
5799 Function *CalledFunction, FunctionSummary *
FS) {
5801 CloneFuncIfNeeded(StackNode.
Clones.
size(), FS);
5813 if (CalledFunction != CB->getCalledOperand() &&
5814 (!GA || CalledFunction != GA->getAliaseeObject())) {
5815 SkippedCallsCloning++;
5821 auto CalleeOrigName = CalledFunction->getName();
5822 for (
unsigned J = 0; J < StackNode.
Clones.
size(); J++) {
5825 if (J > 0 && VMaps[J - 1]->
empty())
5829 if (!StackNode.
Clones[J])
5831 auto NewF =
M.getOrInsertFunction(
5833 CalledFunction->getFunctionType());
5847 ORE.emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofCall", CBClone)
5848 <<
ore::NV(
"Call", CBClone) <<
" in clone "
5850 <<
" assigned to call function clone "
5851 <<
ore::NV(
"Callee", NewF.getCallee()));
5865 ImportSummary->findSummaryInModule(TheFnVI,
M.getModuleIdentifier());
5869 auto SrcModuleMD =
F.getMetadata(
"thinlto_src_module");
5871 "enable-import-metadata is needed to emit thinlto_src_module");
5872 StringRef SrcModule =
5875 if (GVS->modulePath() == SrcModule) {
5876 GVSummary = GVS.get();
5880 assert(GVSummary && GVSummary->modulePath() == SrcModule);
5890 if (
FS->allocs().empty() &&
FS->callsites().empty())
5893 auto SI =
FS->callsites().begin();
5894 auto AI =
FS->allocs().begin();
5899 DenseMap<ValueInfo, CallsiteInfo> MapTailCallCalleeVIToCallsite;
5902 for (
auto CallsiteIt =
FS->callsites().rbegin();
5903 CallsiteIt !=
FS->callsites().rend(); CallsiteIt++) {
5904 auto &Callsite = *CallsiteIt;
5908 if (!Callsite.StackIdIndices.empty())
5910 MapTailCallCalleeVIToCallsite.
insert({Callsite.Callee, Callsite});
5919 for (
auto &BB :
F) {
5920 for (
auto &
I : BB) {
5926 auto *CalledValue = CB->getCalledOperand();
5927 auto *CalledFunction = CB->getCalledFunction();
5928 if (CalledValue && !CalledFunction) {
5929 CalledValue = CalledValue->stripPointerCasts();
5936 assert(!CalledFunction &&
5937 "Expected null called function in callsite for alias");
5941 CallStack<MDNode, MDNode::op_iterator> CallsiteContext(
5942 I.getMetadata(LLVMContext::MD_callsite));
5943 auto *MemProfMD =
I.getMetadata(LLVMContext::MD_memprof);
5949 if (CB->getAttributes().hasFnAttr(
"memprof") && !MemProfMD) {
5950 CB->getAttributes().getFnAttr(
"memprof").getValueAsString() ==
"cold"
5951 ? AllocTypeColdThinBackend++
5952 : AllocTypeNotColdThinBackend++;
5953 OrigAllocsThinBackend++;
5954 AllocVersionsThinBackend++;
5955 if (!MaxAllocVersionsThinBackend)
5956 MaxAllocVersionsThinBackend = 1;
5963 auto &AllocNode = *(AI++);
5971 CloneFuncIfNeeded(AllocNode.Versions.size(), FS);
5973 OrigAllocsThinBackend++;
5974 AllocVersionsThinBackend += AllocNode.Versions.size();
5975 if (MaxAllocVersionsThinBackend < AllocNode.Versions.size())
5976 MaxAllocVersionsThinBackend = AllocNode.Versions.size();
5986 if (AllocNode.Versions.size() == 1 &&
5989 AllocationType::NotCold ||
5991 AllocationType::None);
5992 UnclonableAllocsThinBackend++;
5998 return Type == ((uint8_t)AllocationType::NotCold |
5999 (uint8_t)AllocationType::Cold);
6003 for (
unsigned J = 0; J < AllocNode.Versions.size(); J++) {
6006 if (J > 0 && VMaps[J - 1]->
empty())
6009 if (AllocNode.Versions[J] == (uint8_t)AllocationType::None)
6012 AllocTy == AllocationType::Cold ? AllocTypeColdThinBackend++
6013 : AllocTypeNotColdThinBackend++;
6028 ORE.emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofAttribute", CBClone)
6029 <<
ore::NV(
"AllocationCall", CBClone) <<
" in clone "
6031 <<
" marked with memprof allocation attribute "
6032 <<
ore::NV(
"Attribute", AllocTypeString));
6034 }
else if (!CallsiteContext.empty()) {
6035 if (!CalledFunction) {
6039 assert(!CI || !CI->isInlineAsm());
6049 recordICPInfo(CB,
FS->callsites(), SI, ICallAnalysisInfo);
6055 CloneFuncIfNeeded(NumClones, FS);
6060 assert(SI !=
FS->callsites().end());
6061 auto &StackNode = *(
SI++);
6067 for (
auto StackId : CallsiteContext) {
6069 assert(ImportSummary->getStackIdAtIndex(*StackIdIndexIter) ==
6075 CloneCallsite(StackNode, CB, CalledFunction, FS);
6077 }
else if (CB->isTailCall() && CalledFunction) {
6080 ValueInfo CalleeVI =
6082 if (CalleeVI && MapTailCallCalleeVIToCallsite.
count(CalleeVI)) {
6083 auto Callsite = MapTailCallCalleeVIToCallsite.
find(CalleeVI);
6084 assert(Callsite != MapTailCallCalleeVIToCallsite.
end());
6085 CloneCallsite(Callsite->second, CB, CalledFunction, FS);
6092 performICP(M,
FS->callsites(), VMaps, ICallAnalysisInfo, ORE);
6102 for (
auto &BB :
F) {
6103 for (
auto &
I : BB) {
6106 I.setMetadata(LLVMContext::MD_memprof,
nullptr);
6107 I.setMetadata(LLVMContext::MD_callsite,
nullptr);
6115unsigned MemProfContextDisambiguation::recordICPInfo(
6120 uint32_t NumCandidates;
6121 uint64_t TotalCount;
6122 auto CandidateProfileData =
6123 ICallAnalysis->getPromotionCandidatesForInstruction(
6125 if (CandidateProfileData.empty())
6131 bool ICPNeeded =
false;
6132 unsigned NumClones = 0;
6133 size_t CallsiteInfoStartIndex = std::distance(AllCallsites.
begin(), SI);
6134 for (
const auto &Candidate : CandidateProfileData) {
6136 auto CalleeValueInfo =
6138 ImportSummary->getValueInfo(Candidate.Value);
6141 assert(!CalleeValueInfo ||
SI->Callee == CalleeValueInfo);
6143 auto &StackNode = *(
SI++);
6148 [](
unsigned CloneNo) { return CloneNo != 0; });
6158 ICallAnalysisInfo.
push_back({CB, CandidateProfileData.vec(), NumCandidates,
6159 TotalCount, CallsiteInfoStartIndex});
6163void MemProfContextDisambiguation::performICP(
6165 ArrayRef<std::unique_ptr<ValueToValueMapTy>> VMaps,
6167 OptimizationRemarkEmitter &ORE) {
6174 for (
auto &
Info : ICallAnalysisInfo) {
6177 auto TotalCount =
Info.TotalCount;
6178 unsigned NumPromoted = 0;
6179 unsigned NumClones = 0;
6181 for (
auto &Candidate :
Info.CandidateProfileData) {
6192 Function *TargetFunction = Symtab->getFunction(Candidate.Value);
6193 if (TargetFunction ==
nullptr ||
6201 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnableToFindTarget", CB)
6202 <<
"Memprof cannot promote indirect call: target with md5sum "
6203 <<
ore::NV(
"target md5sum", Candidate.Value) <<
" not found";
6212 const char *Reason =
nullptr;
6215 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnableToPromote", CB)
6216 <<
"Memprof cannot promote indirect call to "
6217 <<
ore::NV(
"TargetFunction", TargetFunction)
6218 <<
" with count of " <<
ore::NV(
"TotalCount", TotalCount)
6229 CallBase *CBClone = CB;
6230 for (
unsigned J = 0; J < NumClones; J++) {
6233 if (J > 0 && VMaps[J - 1]->
empty())
6243 TotalCount, isSamplePGO, &ORE);
6244 auto *TargetToUse = TargetFunction;
6247 if (StackNode.
Clones[J]) {
6266 <<
ore::NV(
"Call", CBClone) <<
" in clone "
6268 <<
" promoted and assigned to call function clone "
6269 <<
ore::NV(
"Callee", TargetToUse));
6273 TotalCount -= Candidate.Count;
6278 CallBase *CBClone = CB;
6279 for (
unsigned J = 0; J < NumClones; J++) {
6282 if (J > 0 && VMaps[J - 1]->
empty())
6288 CBClone->
setMetadata(LLVMContext::MD_prof,
nullptr);
6291 if (TotalCount != 0)
6293 M, *CBClone,
ArrayRef(
Info.CandidateProfileData).slice(NumPromoted),
6294 TotalCount, IPVK_IndirectCallTarget,
Info.NumCandidates);
6299template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
6300bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::process() {
6302 dbgs() <<
"CCG before cloning:\n";
6306 exportToDot(
"postbuild");
6319 dbgs() <<
"CCG after cloning:\n";
6323 exportToDot(
"cloned");
6325 bool Changed = assignFunctions();
6328 dbgs() <<
"CCG after assigning function clones:\n";
6332 exportToDot(
"clonefuncassign");
6335 printTotalSizes(
errs());
6340bool MemProfContextDisambiguation::processModule(
6342 llvm::function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter) {
6347 return applyImport(M);
6360 ModuleCallsiteContextGraph CCG(M, OREGetter);
6361 return CCG.process();
6366 : ImportSummary(Summary), isSamplePGO(isSamplePGO) {
6371 "-memprof-dot-scope=alloc requires -memprof-dot-alloc-id");
6375 "-memprof-dot-scope=context requires -memprof-dot-context-id");
6379 "-memprof-dot-scope=all can't have both -memprof-dot-alloc-id and "
6380 "-memprof-dot-context-id");
6381 if (ImportSummary) {
6391 auto ReadSummaryFile =
6393 if (!ReadSummaryFile) {
6400 if (!ImportSummaryForTestingOrErr) {
6406 ImportSummaryForTesting = std::move(*ImportSummaryForTestingOrErr);
6407 ImportSummary = ImportSummaryForTesting.get();
6416 if (!processModule(M, OREGetter))
6433 IndexCallsiteContextGraph CCG(Index, isPrevailing);
6449 for (
auto &BB :
F) {
6450 for (
auto &
I : BB) {
6454 if (CI->hasFnAttr(
"memprof")) {
6455 CI->removeFnAttr(
"memprof");
6458 if (!CI->hasMetadata(LLVMContext::MD_callsite)) {
6459 assert(!CI->hasMetadata(LLVMContext::MD_memprof));
6465 CI->setMetadata(LLVMContext::MD_memprof,
nullptr);
6466 CI->setMetadata(LLVMContext::MD_callsite,
nullptr);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Prepare AGPR Alloc
Unify divergent function exit nodes
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
Module.h This file contains the declarations for the Module class.
Machine Check Debug Module
This file implements a map that provides insertion order iteration.
static cl::opt< unsigned > TailCallSearchDepth("memprof-tail-call-search-depth", cl::init(5), cl::Hidden, cl::desc("Max depth to recursively search for missing " "frames through tail calls."))
uint64_t ComputeHash(const FunctionSummary *FS, unsigned I)
static cl::opt< DotScope > DotGraphScope("memprof-dot-scope", cl::desc("Scope of graph to export to dot"), cl::Hidden, cl::init(DotScope::All), cl::values(clEnumValN(DotScope::All, "all", "Export full callsite graph"), clEnumValN(DotScope::Alloc, "alloc", "Export only nodes with contexts feeding given " "-memprof-dot-alloc-id"), clEnumValN(DotScope::Context, "context", "Export only nodes with given -memprof-dot-context-id")))
static cl::opt< bool > DoMergeIteration("memprof-merge-iteration", cl::init(true), cl::Hidden, cl::desc("Iteratively apply merging on a node to catch new callers"))
static bool isMemProfClone(const Function &F)
static cl::opt< unsigned > AllocIdForDot("memprof-dot-alloc-id", cl::init(0), cl::Hidden, cl::desc("Id of alloc to export if -memprof-dot-scope=alloc " "or to highlight if -memprof-dot-scope=all"))
static cl::opt< unsigned > ContextIdForDot("memprof-dot-context-id", cl::init(0), cl::Hidden, cl::desc("Id of context to export if -memprof-dot-scope=context or to " "highlight otherwise"))
static cl::opt< bool > ExportToDot("memprof-export-to-dot", cl::init(false), cl::Hidden, cl::desc("Export graph to dot files."))
static void checkEdge(const std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > &Edge)
static cl::opt< bool > AllowRecursiveCallsites("memprof-allow-recursive-callsites", cl::init(true), cl::Hidden, cl::desc("Allow cloning of callsites involved in recursive cycles"))
bool checkColdOrNotCold(uint8_t AllocType)
static ValueInfo findValueInfoForFunc(const Function &F, const Module &M, const ModuleSummaryIndex *ImportSummary, const Function *CallingFunc=nullptr)
static cl::opt< bool > CloneRecursiveContexts("memprof-clone-recursive-contexts", cl::init(true), cl::Hidden, cl::desc("Allow cloning of contexts through recursive cycles"))
static std::string getAllocTypeString(uint8_t AllocTypes)
static cl::opt< unsigned > MemProfICPNoInlineThreshold("memprof-icp-noinline-threshold", cl::init(2), cl::Hidden, cl::desc("Minimum absolute count for promoted target to be inlinable"))
bool DOTGraphTraits< constCallsiteContextGraph< DerivedCCG, FuncTy, CallTy > * >::DoHighlight
static unsigned getMemProfCloneNum(const Function &F)
static SmallVector< std::unique_ptr< ValueToValueMapTy >, 4 > createFunctionClones(Function &F, unsigned NumClones, Module &M, OptimizationRemarkEmitter &ORE, std::map< const Function *, SmallPtrSet< const GlobalAlias *, 1 > > &FuncToAliasMap, FunctionSummary *FS)
static cl::opt< bool > VerifyCCG("memprof-verify-ccg", cl::init(false), cl::Hidden, cl::desc("Perform verification checks on CallingContextGraph."))
static void checkNode(const ContextNode< DerivedCCG, FuncTy, CallTy > *Node, bool CheckEdges=true)
static cl::opt< bool > MergeClones("memprof-merge-clones", cl::init(true), cl::Hidden, cl::desc("Merge clones before assigning functions"))
static std::string getMemProfFuncName(Twine Base, unsigned CloneNo)
static cl::opt< std::string > MemProfImportSummary("memprof-import-summary", cl::desc("Import summary to use for testing the ThinLTO backend via opt"), cl::Hidden)
static const std::string MemProfCloneSuffix
static void updateSubprogramLinkageName(Function *NewFunc, StringRef Name)
static cl::opt< bool > AllowRecursiveContexts("memprof-allow-recursive-contexts", cl::init(true), cl::Hidden, cl::desc("Allow cloning of contexts having recursive cycles"))
static cl::opt< std::string > DotFilePathPrefix("memprof-dot-file-path-prefix", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path prefix of the MemProf dot files."))
static cl::opt< bool > VerifyNodes("memprof-verify-nodes", cl::init(false), cl::Hidden, cl::desc("Perform frequent verification checks on nodes."))
static void checkAllocContextIds(const AllocInfo &AllocNode, const MDNode *MemProfMD, const CallStack< MDNode, MDNode::op_iterator > &CallsiteContext, const ModuleSummaryIndex *ImportSummary)
static cl::opt< bool > DumpCCG("memprof-dump-ccg", cl::init(false), cl::Hidden, cl::desc("Dump CallingContextGraph to stdout after each stage."))
This is the interface to build a ModuleSummaryIndex for a module.
ModuleSummaryIndex.h This file contains the declarations the classes that hold the module index and s...
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
FunctionAnalysisManager FAM
std::pair< BasicBlock *, BasicBlock * > Edge
This file defines generic set operations that may be used on set's of different types,...
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
void print(OutputBuffer &OB) const
ValueInfo getAliaseeVI() const
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
bool empty() const
empty - Check if the array is empty.
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
void addFnAttr(Attribute::AttrKind Kind)
Adds the attribute to the function.
void setCalledOperand(Value *V)
Subprogram description. Uses SubclassData1.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
void reserve(size_type NumEntries)
Grow the densemap so that it can contain at least NumEntries items before resizing again.
Implements a dense probed hash-table based set.
Function summary information to aid decisions and implementation of importing.
FunctionType * getFunctionType() const
Returns the FunctionType for me.
DISubprogram * getSubprogram() const
Get the attached subprogram.
const Function & getFunction() const
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
static LLVM_ABI GlobalAlias * create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent)
If a parent module is specified, the alias is automatically inserted into the end of the specified mo...
Function and variable summary information to aid decisions and implementation of importing.
static LLVM_ABI GUID getGUIDAssumingExternalLinkage(StringRef GlobalName)
Return a 64-bit global unique ID constructed from the name of a global symbol.
static bool isLocalLinkage(LinkageTypes Linkage)
LLVM_ABI bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
uint64_t GUID
Declare a type to represent a global unique identifier for a global value.
Module * getParent()
Get the module that this global value is contained inside of...
LLVM_ABI void eraseFromParent()
This method unlinks 'this' from the containing module and deletes it.
static LLVM_ABI std::string getGlobalIdentifier(StringRef Name, GlobalValue::LinkageTypes Linkage, StringRef FileName)
Return the modified name for a global value suitable to be used as the key for a global lookup (e....
bool isWeakForLinker() const
@ InternalLinkage
Rename collisions when linking (static functions).
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
const MDOperand & getOperand(unsigned I) const
ArrayRef< MDOperand > operands() const
unsigned getNumOperands() const
Return number of MDNode operands.
LLVM_ABI TempMDNode clone() const
Create a (temporary) clone of this.
static std::enable_if_t< std::is_base_of< MDNode, T >::value, T * > replaceWithUniqued(std::unique_ptr< T, TempMDNodeDeleter > N)
Replace a temporary node with a uniqued one.
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
This class implements a map that also provides access to all stored values in a deterministic order.
size_type count(const KeyT &Key) const
MemProfContextDisambiguation(const ModuleSummaryIndex *Summary=nullptr, bool isSamplePGO=false)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
Class to hold module path string table and global value map, and encapsulate methods for operating on...
static StringRef getOriginalNameBeforePromote(StringRef Name)
Helper to obtain the unpromoted name for a global value (or the original name if not promoted).
ValueInfo getValueInfo(const GlobalValueSummaryMapTy::value_type &R) const
Return a ValueInfo for the index value_type (convenient when iterating index).
uint64_t getStackIdAtIndex(unsigned Index) const
A Module instance is used to store all the information related to an LLVM module.
LLVMContext & getContext() const
Get the global data context.
A NodeSet contains a set of SUnit DAG nodes with additional information that assigns a priority to th...
A discriminated union of two or more pointer types, with the discriminator in the low bit of the poin...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
A class that wrap the SHA1 algorithm.
LLVM_ABI void update(ArrayRef< uint8_t > Data)
Digest more data.
LLVM_ABI std::array< uint8_t, 20 > result()
Return the current raw 160-bits SHA1 for the digested data since the last call to init().
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
std::pair< iterator, bool > insert(const ValueT &V)
void reserve(size_t Size)
Grow the DenseSet so that it can contain at least NumEntries items before resizing again.
void insert_range(Range &&R)
void swap(DenseSetImpl &RHS)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
bool erase(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
An efficient, type-erasing, non-owning reference to a callable.
Helper class to iterate through stack ids in both metadata (memprof MIB and callsite) and the corresp...
CallStackIterator beginAfterSharedPrefix(const CallStack &Other)
CallStackIterator end() const
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
@ CE
Windows NT (Windows on ARM)
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract(Y &&MD)
Extract a Value from Metadata, if any.
LLVM_ABI AllocationType getMIBAllocType(const MDNode *MIB)
Returns the allocation type from an MIB metadata node.
LLVM_ABI bool metadataMayIncludeContextSizeInfo()
Whether the alloc memprof metadata may include context size info for some MIBs (but possibly not all)...
LLVM_ABI bool hasSingleAllocType(uint8_t AllocTypes)
True if the AllocTypes bitmask contains just a single type.
LLVM_ABI std::string getAllocTypeAttributeString(AllocationType Type)
Returns the string to use in attributes with the given type.
LLVM_ABI MDNode * getMIBStackNode(const MDNode *MIB)
Returns the stack node from an MIB metadata node.
LLVM_ABI void removeAnyExistingAmbiguousAttribute(CallBase *CB)
Removes any existing "ambiguous" memprof attribute.
DiagnosticInfoOptimizationBase::Argument NV
LLVM_ABI CallBase & promoteIndirectCall(CallBase &CB, Function *F, uint64_t Count, uint64_t TotalCount, bool AttachProfToDirectCall, OptimizationRemarkEmitter *ORE)
NodeAddr< NodeBase * > Node
NodeAddr< FuncNode * > Func
friend class Instruction
Iterator for Instructions in a `BasicBlock.
uint64_t read64le(const void *P)
void write32le(void *P, uint32_t V)
This is an optimization pass for GlobalISel generic memory operations.
cl::opt< unsigned > MinClonedColdBytePercent("memprof-cloning-cold-threshold", cl::init(100), cl::Hidden, cl::desc("Min percent of cold bytes to hint alloc cold during cloning"))
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
LLVM_ABI void logAllUnhandledErrors(Error E, raw_ostream &OS, Twine ErrorBanner={})
Log all errors (if any) in E to OS.
FunctionAddr VTableAddr Value
void stable_sort(R &&Range)
cl::opt< bool > MemProfReportHintedSizes("memprof-report-hinted-sizes", cl::init(false), cl::Hidden, cl::desc("Report total allocation sizes of hinted allocations"))
LLVM_ABI bool isLegalToPromote(const CallBase &CB, Function *Callee, const char **FailureReason=nullptr)
Return true if the given indirect call site can be made to call Callee.
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI bool mayHaveMemprofSummary(const CallBase *CB)
Returns true if the instruction could have memprof metadata, used to ensure consistency between summa...
constexpr from_range_t from_range
static cl::opt< bool > MemProfRequireDefinitionForPromotion("memprof-require-definition-for-promotion", cl::init(false), cl::Hidden, cl::desc("Require target function definition when promoting indirect calls"))
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
cl::opt< unsigned > MemProfTopNImportant("memprof-top-n-important", cl::init(10), cl::Hidden, cl::desc("Number of largest cold contexts to consider important"))
bool set_is_subset(const S1Ty &S1, const S2Ty &S2)
set_is_subset(A, B) - Return true iff A in B
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
void set_subtract(S1Ty &S1, const S2Ty &S2)
set_subtract(A, B) - Compute A := A - B
InnerAnalysisManagerProxy< FunctionAnalysisManager, Module > FunctionAnalysisManagerModuleProxy
Provide the FunctionAnalysisManager to Module proxy.
raw_ostream & WriteGraph(raw_ostream &O, const GraphType &G, bool ShortNames=false, const Twine &Title="")
bool set_intersects(const S1Ty &S1, const S2Ty &S2)
set_intersects(A, B) - Return true iff A ^ B is non empty
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
LLVM_ABI Expected< std::unique_ptr< ModuleSummaryIndex > > getModuleSummaryIndex(MemoryBufferRef Buffer)
Parse the specified bitcode buffer, returning the module summary index.
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
cl::opt< unsigned > MaxSummaryIndirectEdges("module-summary-max-indirect-edges", cl::init(0), cl::Hidden, cl::desc("Max number of summary edges added from " "indirect call profile metadata"))
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
FunctionAddr VTableAddr Count
bool set_union(S1Ty &S1, const S2Ty &S2)
set_union(A, B) - Compute A := A u B, return whether A changed.
cl::opt< bool > SupportsHotColdNew
Indicate we are linking with an allocator that supports hot/cold operator new interfaces.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
S1Ty set_intersection(const S1Ty &S1, const S2Ty &S2)
set_intersection(A, B) - Return A ^ B
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
cl::opt< bool > EnableMemProfContextDisambiguation
Enable MemProf context disambiguation for thin link.
S1Ty set_difference(const S1Ty &S1, const S2Ty &S2)
set_difference(A, B) - Return A - B
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Expected< T > errorOrToExpected(ErrorOr< T > &&EO)
Convert an ErrorOr<T> to an Expected<T>.
ArrayRef(const T &OneElt) -> ArrayRef< T >
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
LLVM_ABI Function * CloneFunction(Function *F, ValueToValueMapTy &VMap, ClonedCodeInfo *CodeInfo=nullptr)
Return a copy of the specified function and add it to that function's module.
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
cl::opt< bool > MemProfFixupImportant("memprof-fixup-important", cl::init(true), cl::Hidden, cl::desc("Enables edge fixup for important contexts"))
DOTGraphTraits(bool IsSimple=false)
typename GTraits::NodeRef NodeRef
static std::string getEdgeAttributes(NodeRef, ChildIteratorType ChildIter, GraphType G)
const CallsiteContextGraph< DerivedCCG, FuncTy, CallTy > * GraphType
typename GTraits::ChildIteratorType ChildIteratorType
static std::string getNodeAttributes(NodeRef Node, GraphType G)
static bool isNodeHidden(NodeRef Node, GraphType G)
static std::string getNodeLabel(NodeRef Node, GraphType G)
GraphTraits< GraphType > GTraits
static NodeRef getNode(const NodePtrTy &P)
static const ContextNode< DerivedCCG, FuncTy, CallTy > * GetCallee(const EdgePtrTy &P)
static ChildIteratorType child_end(NodeRef N)
std::unique_ptr< ContextNode< DerivedCCG, FuncTy, CallTy > > NodePtrTy
mapped_iterator< typename std::vector< std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > >::const_iterator, decltype(&GetCallee)> ChildIteratorType
const CallsiteContextGraph< DerivedCCG, FuncTy, CallTy > * GraphType
const ContextNode< DerivedCCG, FuncTy, CallTy > * NodeRef
mapped_iterator< typename std::vector< NodePtrTy >::const_iterator, decltype(&getNode)> nodes_iterator
static ChildIteratorType child_begin(NodeRef N)
static NodeRef getEntryNode(GraphType G)
static nodes_iterator nodes_begin(GraphType G)
static nodes_iterator nodes_end(GraphType G)
std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > EdgePtrTy
Summary of memprof metadata on allocations.
std::vector< MIBInfo > MIBs
SmallVector< unsigned > StackIdIndices
SmallVector< unsigned > Clones
DOTGraphTraits - Template class that can be specialized to customize how graphs are converted to 'dot...
DefaultDOTGraphTraits(bool simple=false)
An information struct used to provide DenseMap with the various necessary components for a given valu...
typename GraphType::UnknownGraphTypeError NodeRef
Struct that holds a reference to a particular GUID in a global value summary.
ArrayRef< std::unique_ptr< GlobalValueSummary > > getSummaryList() const
GlobalValue::GUID getGUID() const
PointerUnion< CallsiteInfo *, AllocInfo * > SimpleType
static SimpleType getSimplifiedValue(IndexCall &Val)
const PointerUnion< CallsiteInfo *, AllocInfo * > SimpleType
static SimpleType getSimplifiedValue(const IndexCall &Val)
Define a template that can be specialized by smart pointers to reflect the fact that they are automat...