49#include <unordered_map>
54#define DEBUG_TYPE "memprof-context-disambiguation"
57 "Number of function clones created during whole program analysis");
59 "Number of function clones created during ThinLTO backend");
61 "Number of functions that had clones created during ThinLTO backend");
62STATISTIC(AllocTypeNotCold,
"Number of not cold static allocations (possibly "
63 "cloned) during whole program analysis");
64STATISTIC(AllocTypeCold,
"Number of cold static allocations (possibly cloned) "
65 "during whole program analysis");
67 "Number of not cold static allocations (possibly cloned) during "
69STATISTIC(AllocTypeColdThinBackend,
"Number of cold static allocations "
70 "(possibly cloned) during ThinLTO backend");
72 "Number of original (not cloned) allocations with memprof profiles "
73 "during ThinLTO backend");
75 AllocVersionsThinBackend,
76 "Number of allocation versions (including clones) during ThinLTO backend");
78 "Maximum number of allocation versions created for an original "
79 "allocation during ThinLTO backend");
81 "Number of unclonable ambigous allocations during ThinLTO backend");
83 "Number of edges removed due to mismatched callees (profiled vs IR)");
85 "Number of profiled callees found via tail calls");
87 "Aggregate depth of profiled callees found via tail calls");
89 "Maximum depth of profiled callees found via tail calls");
91 "Number of profiled callees found via multiple tail call chains");
96 cl::desc(
"Specify the path prefix of the MemProf dot files."));
100 cl::desc(
"Export graph to dot files."));
104 cl::desc(
"Dump CallingContextGraph to stdout after each stage."));
108 cl::desc(
"Perform verification checks on CallingContextGraph."));
112 cl::desc(
"Perform frequent verification checks on nodes."));
115 "memprof-import-summary",
116 cl::desc(
"Import summary to use for testing the ThinLTO backend via opt"),
122 cl::desc(
"Max depth to recursively search for missing "
123 "frames through tail calls."));
134 cl::desc(
"Linking with hot/cold operator new interfaces"));
139 "Require target function definition when promoting indirect calls"));
160template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
161class CallsiteContextGraph {
163 CallsiteContextGraph() =
default;
164 CallsiteContextGraph(
const CallsiteContextGraph &) =
default;
165 CallsiteContextGraph(CallsiteContextGraph &&) =
default;
172 void identifyClones();
179 bool assignFunctions();
186 const CallsiteContextGraph &CCG) {
192 const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>;
194 const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>;
196 void exportToDot(std::string Label)
const;
199 struct FuncInfo final
200 :
public std::pair<FuncTy *, unsigned > {
201 using Base = std::pair<FuncTy *, unsigned>;
202 FuncInfo(
const Base &
B) :
Base(
B) {}
203 FuncInfo(FuncTy *
F =
nullptr,
unsigned CloneNo = 0) :
Base(
F, CloneNo) {}
204 explicit operator bool()
const {
return this->first !=
nullptr; }
205 FuncTy *
func()
const {
return this->first; }
206 unsigned cloneNo()
const {
return this->second; }
210 struct CallInfo final :
public std::pair<CallTy, unsigned > {
211 using Base = std::pair<CallTy, unsigned>;
213 CallInfo(CallTy Call =
nullptr,
unsigned CloneNo = 0)
215 explicit operator bool()
const {
return (
bool)this->first; }
216 CallTy call()
const {
return this->first; }
217 unsigned cloneNo()
const {
return this->second; }
218 void setCloneNo(
unsigned N) { this->second =
N; }
220 if (!
operator bool()) {
226 OS <<
"\t(clone " << cloneNo() <<
")";
249 bool Recursive =
false;
276 std::vector<std::shared_ptr<ContextEdge>> CalleeEdges;
280 std::vector<std::shared_ptr<ContextEdge>> CallerEdges;
284 const std::vector<std::shared_ptr<ContextEdge>> *
285 getEdgesWithAllocInfo()
const {
288 if (!CalleeEdges.empty())
290 if (!CallerEdges.empty()) {
303 auto *Edges = getEdgesWithAllocInfo();
307 for (
auto &Edge : *Edges)
308 Count += Edge->getContextIds().size();
310 for (
auto &Edge : *Edges)
311 ContextIds.
insert(Edge->getContextIds().begin(),
312 Edge->getContextIds().end());
318 uint8_t computeAllocType()
const {
319 auto *Edges = getEdgesWithAllocInfo();
321 return (
uint8_t)AllocationType::None;
325 for (
auto &Edge : *Edges) {
336 bool emptyContextIds()
const {
337 auto *Edges = getEdgesWithAllocInfo();
340 for (
auto &Edge : *Edges) {
341 if (!Edge->getContextIds().empty())
348 std::vector<ContextNode *> Clones;
351 ContextNode *CloneOf =
nullptr;
353 ContextNode(
bool IsAllocation) : IsAllocation(IsAllocation),
Call() {}
355 ContextNode(
bool IsAllocation,
CallInfo C)
356 : IsAllocation(IsAllocation),
Call(
C) {}
358 void addClone(ContextNode *Clone) {
360 CloneOf->Clones.push_back(Clone);
361 Clone->CloneOf = CloneOf;
363 Clones.push_back(Clone);
365 Clone->CloneOf =
this;
369 ContextNode *getOrigNode() {
376 unsigned int ContextId);
378 ContextEdge *findEdgeFromCallee(
const ContextNode *Callee);
379 ContextEdge *findEdgeFromCaller(
const ContextNode *Caller);
380 void eraseCalleeEdge(
const ContextEdge *Edge);
381 void eraseCallerEdge(
const ContextEdge *Edge);
385 bool hasCall()
const {
return (
bool)
Call.call(); }
391 bool isRemoved()
const {
394 return AllocTypes == (
uint8_t)AllocationType::None;
422 ContextIds(
std::
move(ContextIds)) {}
428 inline void clear() {
430 AllocTypes = (
uint8_t)AllocationType::None;
438 inline bool isRemoved()
const {
439 if (Callee || Caller)
460 void removeNoneTypeCalleeEdges(ContextNode *
Node);
461 void removeNoneTypeCallerEdges(ContextNode *
Node);
463 recursivelyRemoveNoneTypeCalleeEdges(ContextNode *
Node,
469 template <
class NodeT,
class IteratorT>
470 std::vector<uint64_t>
475 ContextNode *addAllocNode(
CallInfo Call,
const FuncTy *
F);
478 template <
class NodeT,
class IteratorT>
479 void addStackNodesForMIB(ContextNode *AllocNode,
488 void updateStackNodes();
492 void handleCallsitesWithMultipleTargets();
498 bool partitionCallsByCallee(
500 std::vector<std::pair<CallInfo, ContextNode *>> &NewCallToNode);
507 std::map<const ContextNode *, const FuncTy *> NodeToCallingFunc;
510 using EdgeIter =
typename std::vector<std::shared_ptr<ContextEdge>>::iterator;
515 struct CallContextInfo {
519 std::vector<uint64_t> StackIds;
533 void removeEdgeFromGraph(ContextEdge *Edge, EdgeIter *EI =
nullptr,
534 bool CalleeIter =
true);
542 void assignStackNodesPostOrder(
555 void propagateDuplicateContextIds(
561 void connectNewNode(ContextNode *NewNode, ContextNode *OrigNode,
569 return static_cast<const DerivedCCG *
>(
this)->getStackId(IdOrIndex);
579 calleesMatch(CallTy Call, EdgeIter &EI,
584 const FuncTy *getCalleeFunc(CallTy Call) {
585 return static_cast<DerivedCCG *
>(
this)->getCalleeFunc(Call);
591 bool calleeMatchesFunc(
592 CallTy Call,
const FuncTy *Func,
const FuncTy *CallerFunc,
593 std::vector<std::pair<CallTy, FuncTy *>> &FoundCalleeChain) {
594 return static_cast<DerivedCCG *
>(
this)->calleeMatchesFunc(
595 Call, Func, CallerFunc, FoundCalleeChain);
599 bool sameCallee(CallTy Call1, CallTy Call2) {
600 return static_cast<DerivedCCG *
>(
this)->sameCallee(Call1, Call2);
605 std::vector<uint64_t> getStackIdsWithContextNodesForCall(CallTy Call) {
606 return static_cast<DerivedCCG *
>(
this)->getStackIdsWithContextNodesForCall(
611 uint64_t getLastStackId(CallTy Call) {
612 return static_cast<DerivedCCG *
>(
this)->getLastStackId(Call);
617 AllocType == AllocationType::Cold ? AllocTypeCold++ : AllocTypeNotCold++;
618 static_cast<DerivedCCG *
>(
this)->updateAllocationCall(Call,
AllocType);
623 return static_cast<const DerivedCCG *
>(
this)->getAllocationCallType(Call);
628 void updateCall(
CallInfo &CallerCall, FuncInfo CalleeFunc) {
629 static_cast<DerivedCCG *
>(
this)->updateCall(CallerCall, CalleeFunc);
635 FuncInfo cloneFunctionForCallsite(
636 FuncInfo &Func,
CallInfo &Call, std::map<CallInfo, CallInfo> &CallMap,
637 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
638 return static_cast<DerivedCCG *
>(
this)->cloneFunctionForCallsite(
639 Func, Call, CallMap, CallsWithMetadataInFunc, CloneNo);
644 std::string getLabel(
const FuncTy *Func,
const CallTy Call,
645 unsigned CloneNo)
const {
646 return static_cast<const DerivedCCG *
>(
this)->getLabel(Func, Call, CloneNo);
650 ContextNode *createNewNode(
bool IsAllocation,
const FuncTy *
F =
nullptr,
652 NodeOwner.push_back(std::make_unique<ContextNode>(IsAllocation,
C));
653 auto *NewNode = NodeOwner.back().get();
655 NodeToCallingFunc[NewNode] =
F;
660 ContextNode *getNodeForInst(
const CallInfo &
C);
661 ContextNode *getNodeForAlloc(
const CallInfo &
C);
662 ContextNode *getNodeForStackId(
uint64_t StackId);
685 moveEdgeToNewCalleeClone(
const std::shared_ptr<ContextEdge> &Edge,
686 EdgeIter *CallerEdgeI =
nullptr,
694 void moveEdgeToExistingCalleeClone(
const std::shared_ptr<ContextEdge> &Edge,
695 ContextNode *NewCallee,
696 EdgeIter *CallerEdgeI =
nullptr,
697 bool NewClone =
false,
706 void moveCalleeEdgeToNewCaller(EdgeIter &CalleeEdgeI, ContextNode *NewCaller);
735 std::vector<std::unique_ptr<ContextNode>> NodeOwner;
741 unsigned int LastContextId = 0;
744template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
746 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode;
747template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
749 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge;
750template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
752 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::FuncInfo;
753template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
755 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::CallInfo;
758class ModuleCallsiteContextGraph
759 :
public CallsiteContextGraph<ModuleCallsiteContextGraph, Function,
762 ModuleCallsiteContextGraph(
767 friend CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
772 bool calleeMatchesFunc(
774 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain);
776 bool findProfiledCalleeThroughTailCalls(
778 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain,
779 bool &FoundMultipleCalleeChains);
781 std::vector<uint64_t> getStackIdsWithContextNodesForCall(
Instruction *Call);
784 void updateCall(
CallInfo &CallerCall, FuncInfo CalleeFunc);
785 CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
787 cloneFunctionForCallsite(FuncInfo &Func,
CallInfo &Call,
788 std::map<CallInfo, CallInfo> &CallMap,
789 std::vector<CallInfo> &CallsWithMetadataInFunc,
792 unsigned CloneNo)
const;
801struct IndexCall :
public PointerUnion<CallsiteInfo *, AllocInfo *> {
803 IndexCall(std::nullptr_t) : IndexCall() {}
808 IndexCall *operator->() {
return this; }
813 if (
auto *AI = llvm::dyn_cast_if_present<AllocInfo *>(getBase())) {
816 auto *CI = llvm::dyn_cast_if_present<CallsiteInfo *>(getBase());
824class IndexCallsiteContextGraph
825 :
public CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
828 IndexCallsiteContextGraph(
833 ~IndexCallsiteContextGraph() {
838 for (
auto &
I : FunctionCalleesToSynthesizedCallsiteInfos) {
840 for (
auto &Callsite :
I.second)
841 FS->addCallsite(*Callsite.second);
846 friend CallsiteContextGraph<IndexCallsiteContextGraph,
FunctionSummary,
851 bool calleeMatchesFunc(
854 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain);
855 bool sameCallee(IndexCall &Call1, IndexCall &Call2);
856 bool findProfiledCalleeThroughTailCalls(
858 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain,
859 bool &FoundMultipleCalleeChains);
860 uint64_t getLastStackId(IndexCall &Call);
861 std::vector<uint64_t> getStackIdsWithContextNodesForCall(IndexCall &Call);
864 void updateCall(
CallInfo &CallerCall, FuncInfo CalleeFunc);
867 cloneFunctionForCallsite(FuncInfo &Func,
CallInfo &Call,
868 std::map<CallInfo, CallInfo> &CallMap,
869 std::vector<CallInfo> &CallsWithMetadataInFunc,
871 std::string getLabel(
const FunctionSummary *Func,
const IndexCall &Call,
872 unsigned CloneNo)
const;
876 std::map<const FunctionSummary *, ValueInfo> FSToVIMap;
887 std::map<ValueInfo, std::unique_ptr<CallsiteInfo>>>
888 FunctionCalleesToSynthesizedCallsiteInfos;
900 :
public DenseMapInfo<std::pair<IndexCall, unsigned>> {};
903 :
public DenseMapInfo<PointerUnion<CallsiteInfo *, AllocInfo *>> {};
916 ((
uint8_t)AllocationType::NotCold | (
uint8_t)AllocationType::Cold))
917 return AllocationType::NotCold;
925template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
927 const std::vector<uint8_t> &InAllocTypes,
928 const std::vector<std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>>
932 assert(InAllocTypes.size() == Edges.size());
934 InAllocTypes.begin(), InAllocTypes.end(), Edges.begin(), Edges.end(),
936 const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &r) {
940 if (l == (uint8_t)AllocationType::None ||
941 r->AllocTypes == (uint8_t)AllocationType::None)
943 return allocTypeToUse(l) == allocTypeToUse(r->AllocTypes);
952template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
953bool allocTypesMatchClone(
954 const std::vector<uint8_t> &InAllocTypes,
955 const ContextNode<DerivedCCG, FuncTy, CallTy> *Clone) {
956 const ContextNode<DerivedCCG, FuncTy, CallTy> *
Node = Clone->CloneOf;
960 assert(InAllocTypes.size() ==
Node->CalleeEdges.size());
964 for (
const auto &E : Clone->CalleeEdges) {
966 EdgeCalleeMap[E->Callee] = E->AllocTypes;
970 for (
unsigned I = 0;
I <
Node->CalleeEdges.size();
I++) {
971 auto Iter = EdgeCalleeMap.
find(
Node->CalleeEdges[
I]->Callee);
973 if (Iter == EdgeCalleeMap.
end())
978 if (InAllocTypes[
I] == (
uint8_t)AllocationType::None ||
979 Iter->second == (
uint8_t)AllocationType::None)
981 if (allocTypeToUse(Iter->second) != allocTypeToUse(InAllocTypes[
I]))
989template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
990typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
991CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForInst(
993 ContextNode *
Node = getNodeForAlloc(
C);
997 return NonAllocationCallToContextNodeMap.lookup(
C);
1000template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1001typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1002CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForAlloc(
1004 return AllocationCallToContextNodeMap.lookup(
C);
1007template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1008typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1009CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForStackId(
1011 auto StackEntryNode = StackEntryIdToContextNodeMap.find(StackId);
1012 if (StackEntryNode != StackEntryIdToContextNodeMap.end())
1013 return StackEntryNode->second;
1017template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1018void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1020 unsigned int ContextId) {
1021 for (
auto &Edge : CallerEdges) {
1022 if (Edge->Caller == Caller) {
1024 Edge->getContextIds().insert(ContextId);
1028 std::shared_ptr<ContextEdge> Edge = std::make_shared<ContextEdge>(
1030 CallerEdges.push_back(Edge);
1031 Caller->CalleeEdges.push_back(Edge);
1034template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1035void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::removeEdgeFromGraph(
1036 ContextEdge *Edge, EdgeIter *EI,
bool CalleeIter) {
1037 assert(!EI || (*EI)->get() == Edge);
1042 auto *
Callee = Edge->Callee;
1043 auto *
Caller = Edge->Caller;
1051 Callee->eraseCallerEdge(Edge);
1052 Caller->eraseCalleeEdge(Edge);
1053 }
else if (CalleeIter) {
1054 Callee->eraseCallerEdge(Edge);
1055 *EI =
Caller->CalleeEdges.erase(*EI);
1057 Caller->eraseCalleeEdge(Edge);
1058 *EI =
Callee->CallerEdges.erase(*EI);
1062template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1063void CallsiteContextGraph<
1064 DerivedCCG, FuncTy, CallTy>::removeNoneTypeCalleeEdges(ContextNode *
Node) {
1065 for (
auto EI =
Node->CalleeEdges.begin(); EI !=
Node->CalleeEdges.end();) {
1067 if (Edge->AllocTypes == (
uint8_t)AllocationType::None) {
1068 assert(Edge->ContextIds.empty());
1069 removeEdgeFromGraph(Edge.get(), &EI,
true);
1075template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1076void CallsiteContextGraph<
1077 DerivedCCG, FuncTy, CallTy>::removeNoneTypeCallerEdges(ContextNode *
Node) {
1078 for (
auto EI =
Node->CallerEdges.begin(); EI !=
Node->CallerEdges.end();) {
1080 if (Edge->AllocTypes == (
uint8_t)AllocationType::None) {
1081 assert(Edge->ContextIds.empty());
1082 Edge->Caller->eraseCalleeEdge(Edge.get());
1083 EI =
Node->CallerEdges.erase(EI);
1089template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1090typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge *
1091CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1092 findEdgeFromCallee(
const ContextNode *Callee) {
1093 for (
const auto &Edge : CalleeEdges)
1094 if (Edge->Callee == Callee)
1099template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1100typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge *
1101CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1102 findEdgeFromCaller(
const ContextNode *Caller) {
1103 for (
const auto &Edge : CallerEdges)
1104 if (Edge->Caller == Caller)
1109template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1110void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1111 eraseCalleeEdge(
const ContextEdge *Edge) {
1113 CalleeEdges, [Edge](
const std::shared_ptr<ContextEdge> &CalleeEdge) {
1114 return CalleeEdge.get() == Edge;
1116 assert(EI != CalleeEdges.end());
1117 CalleeEdges.erase(EI);
1120template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1121void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1122 eraseCallerEdge(
const ContextEdge *Edge) {
1124 CallerEdges, [Edge](
const std::shared_ptr<ContextEdge> &CallerEdge) {
1125 return CallerEdge.get() == Edge;
1127 assert(EI != CallerEdges.end());
1128 CallerEdges.erase(EI);
1131template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1132uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::computeAllocType(
1135 (
uint8_t)AllocationType::Cold | (
uint8_t)AllocationType::NotCold;
1137 for (
auto Id : ContextIds) {
1146template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1148CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypesImpl(
1151 (
uint8_t)AllocationType::Cold | (
uint8_t)AllocationType::NotCold;
1153 for (
auto Id : Node1Ids) {
1154 if (!Node2Ids.
count(Id))
1164template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1165uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypes(
1167 if (Node1Ids.
size() < Node2Ids.
size())
1168 return intersectAllocTypesImpl(Node1Ids, Node2Ids);
1170 return intersectAllocTypesImpl(Node2Ids, Node1Ids);
1173template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1174typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1175CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addAllocNode(
1177 assert(!getNodeForAlloc(Call));
1178 ContextNode *AllocNode = createNewNode(
true,
F, Call);
1179 AllocationCallToContextNodeMap[
Call] = AllocNode;
1181 AllocNode->OrigStackOrAllocId = LastContextId;
1184 AllocNode->AllocTypes = (
uint8_t)AllocationType::None;
1193 if (AllocTypes & (
uint8_t)AllocationType::NotCold)
1195 if (AllocTypes & (
uint8_t)AllocationType::Cold)
1200template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1201template <
class NodeT,
class IteratorT>
1202void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addStackNodesForMIB(
1211 ContextIdToAllocationType[++LastContextId] =
AllocType;
1213 if (!ContextSizeInfo.
empty()) {
1214 auto &
Entry = ContextIdToContextSizeInfos[LastContextId];
1224 ContextNode *PrevNode = AllocNode;
1231 ContextIter != StackContext.
end(); ++ContextIter) {
1232 auto StackId = getStackId(*ContextIter);
1233 ContextNode *
StackNode = getNodeForStackId(StackId);
1236 StackEntryIdToContextNodeMap[StackId] =
StackNode;
1237 StackNode->OrigStackOrAllocId = StackId;
1248template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1250CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::duplicateContextIds(
1254 for (
auto OldId : StackSequenceContextIds) {
1255 NewContextIds.
insert(++LastContextId);
1256 OldToNewContextIds[OldId].insert(LastContextId);
1257 assert(ContextIdToAllocationType.count(OldId));
1259 ContextIdToAllocationType[LastContextId] = ContextIdToAllocationType[OldId];
1261 return NewContextIds;
1264template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1265void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
1266 propagateDuplicateContextIds(
1271 for (
auto Id : ContextIds)
1272 if (
auto NewId = OldToNewContextIds.find(Id);
1273 NewId != OldToNewContextIds.end())
1274 NewIds.
insert(NewId->second.begin(), NewId->second.end());
1279 auto UpdateCallers = [&](ContextNode *
Node,
1281 auto &&UpdateCallers) ->
void {
1282 for (
const auto &Edge :
Node->CallerEdges) {
1283 auto Inserted = Visited.insert(Edge.get());
1286 ContextNode *NextNode = Edge->Caller;
1290 if (!NewIdsToAdd.
empty()) {
1291 Edge->getContextIds().insert(NewIdsToAdd.
begin(), NewIdsToAdd.
end());
1292 UpdateCallers(NextNode, Visited, UpdateCallers);
1298 for (
auto &Entry : AllocationCallToContextNodeMap) {
1300 UpdateCallers(
Node, Visited, UpdateCallers);
1304template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1305void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::connectNewNode(
1306 ContextNode *NewNode, ContextNode *OrigNode,
bool TowardsCallee,
1311 TowardsCallee ? OrigNode->CalleeEdges : OrigNode->CallerEdges;
1313 for (
auto EI = OrigEdges.begin(); EI != OrigEdges.end();) {
1319 set_subtract(Edge->getContextIds(), RemainingContextIds, NewEdgeContextIds,
1320 NotFoundContextIds);
1321 RemainingContextIds.
swap(NotFoundContextIds);
1323 if (NewEdgeContextIds.
empty()) {
1327 if (TowardsCallee) {
1328 uint8_t NewAllocType = computeAllocType(NewEdgeContextIds);
1329 auto NewEdge = std::make_shared<ContextEdge>(
1330 Edge->Callee, NewNode, NewAllocType, std::move(NewEdgeContextIds));
1331 NewNode->CalleeEdges.push_back(NewEdge);
1332 NewEdge->Callee->CallerEdges.push_back(NewEdge);
1334 uint8_t NewAllocType = computeAllocType(NewEdgeContextIds);
1335 auto NewEdge = std::make_shared<ContextEdge>(
1336 NewNode, Edge->Caller, NewAllocType, std::move(NewEdgeContextIds));
1337 NewNode->CallerEdges.push_back(NewEdge);
1338 NewEdge->Caller->CalleeEdges.push_back(NewEdge);
1341 if (Edge->getContextIds().empty()) {
1342 removeEdgeFromGraph(Edge.get(), &EI, TowardsCallee);
1349template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1351 const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &Edge) {
1355 assert(!Edge->ContextIds.empty());
1358template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1360 bool CheckEdges =
true) {
1361 if (
Node->isRemoved())
1365 auto NodeContextIds =
Node->getContextIds();
1369 if (
Node->CallerEdges.size()) {
1371 Node->CallerEdges.front()->ContextIds);
1374 checkEdge<DerivedCCG, FuncTy, CallTy>(Edge);
1375 set_union(CallerEdgeContextIds, Edge->ContextIds);
1379 assert(NodeContextIds == CallerEdgeContextIds ||
1382 if (
Node->CalleeEdges.size()) {
1384 Node->CalleeEdges.front()->ContextIds);
1387 checkEdge<DerivedCCG, FuncTy, CallTy>(Edge);
1388 set_union(CalleeEdgeContextIds, Edge->getContextIds());
1390 assert(NodeContextIds == CalleeEdgeContextIds);
1399 for (
const auto &E :
Node->CalleeEdges)
1405template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1406void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
1407 assignStackNodesPostOrder(
1410 &StackIdToMatchingCalls,
1419 auto CallerEdges =
Node->CallerEdges;
1420 for (
auto &Edge : CallerEdges) {
1422 if (Edge->isRemoved()) {
1426 assignStackNodesPostOrder(Edge->Caller, Visited, StackIdToMatchingCalls,
1427 CallToMatchingCall);
1436 if (
Node->IsAllocation ||
1437 !StackIdToMatchingCalls.count(
Node->OrigStackOrAllocId))
1440 auto &Calls = StackIdToMatchingCalls[
Node->OrigStackOrAllocId];
1444 if (Calls.size() == 1) {
1445 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[0];
1446 if (Ids.size() == 1) {
1447 assert(SavedContextIds.empty());
1450 if (
Node->Recursive)
1452 Node->setCall(Call);
1453 NonAllocationCallToContextNodeMap[
Call] =
Node;
1463 ContextNode *LastNode = getNodeForStackId(LastId);
1468 ContextNode *LastNode =
Node;
1475 bool PrevIterCreatedNode =
false;
1476 bool CreatedNode =
false;
1477 for (
unsigned I = 0;
I < Calls.size();
1478 I++, PrevIterCreatedNode = CreatedNode) {
1479 CreatedNode =
false;
1480 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[
I];
1483 if (SavedContextIds.empty()) {
1488 if (!CallToMatchingCall.
contains(Call))
1490 auto MatchingCall = CallToMatchingCall[
Call];
1491 if (!NonAllocationCallToContextNodeMap.contains(MatchingCall)) {
1495 assert(
I > 0 && !PrevIterCreatedNode);
1498 NonAllocationCallToContextNodeMap[MatchingCall]->MatchingCalls.push_back(
1503 assert(LastId == Ids.back());
1512 ContextNode *PrevNode = LastNode;
1516 for (
auto IdIter = Ids.rbegin() + 1; IdIter != Ids.rend(); IdIter++) {
1518 ContextNode *CurNode = getNodeForStackId(Id);
1522 assert(!CurNode->Recursive);
1524 auto *Edge = CurNode->findEdgeFromCaller(PrevNode);
1536 if (SavedContextIds.empty()) {
1545 ContextNode *NewNode = createNewNode(
false, Func, Call);
1546 NonAllocationCallToContextNodeMap[
Call] = NewNode;
1548 NewNode->AllocTypes = computeAllocType(SavedContextIds);
1550 ContextNode *FirstNode = getNodeForStackId(Ids[0]);
1556 connectNewNode(NewNode, FirstNode,
true, SavedContextIds);
1561 connectNewNode(NewNode, LastNode,
false, SavedContextIds);
1566 for (
auto Id : Ids) {
1567 ContextNode *CurNode = getNodeForStackId(Id);
1574 auto *PrevEdge = CurNode->findEdgeFromCallee(PrevNode);
1576 set_subtract(PrevEdge->getContextIds(), SavedContextIds);
1577 if (PrevEdge->getContextIds().empty())
1578 removeEdgeFromGraph(PrevEdge);
1583 CurNode->AllocTypes = CurNode->CalleeEdges.empty()
1584 ? (
uint8_t)AllocationType::None
1585 : CurNode->computeAllocType();
1589 checkNode<DerivedCCG, FuncTy, CallTy>(NewNode,
true);
1590 for (
auto Id : Ids) {
1591 ContextNode *CurNode = getNodeForStackId(Id);
1594 checkNode<DerivedCCG, FuncTy, CallTy>(CurNode,
true);
1600template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1601void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
1610 for (
auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) {
1611 for (
auto &Call : CallsWithMetadata) {
1613 if (AllocationCallToContextNodeMap.count(Call))
1615 auto StackIdsWithContextNodes =
1616 getStackIdsWithContextNodesForCall(
Call.call());
1619 if (StackIdsWithContextNodes.empty())
1623 StackIdToMatchingCalls[StackIdsWithContextNodes.back()].push_back(
1624 {
Call.call(), StackIdsWithContextNodes, Func, {}});
1639 for (
auto &It : StackIdToMatchingCalls) {
1640 auto &Calls = It.getSecond();
1642 if (Calls.size() == 1) {
1643 auto &Ids = Calls[0].StackIds;
1644 if (Ids.size() == 1)
1658 for (
const auto &[
Idx, CallCtxInfo] :
enumerate(Calls))
1659 FuncToIndex.
insert({CallCtxInfo.Func,
Idx});
1661 Calls.begin(), Calls.end(),
1662 [&FuncToIndex](
const CallContextInfo &
A,
const CallContextInfo &
B) {
1663 return A.StackIds.size() > B.StackIds.size() ||
1664 (A.StackIds.size() == B.StackIds.size() &&
1665 (A.StackIds < B.StackIds ||
1666 (A.StackIds == B.StackIds &&
1667 FuncToIndex[A.Func] < FuncToIndex[B.Func])));
1674 ContextNode *LastNode = getNodeForStackId(LastId);
1678 if (LastNode->Recursive)
1694 for (
unsigned I = 0;
I < Calls.size();
I++) {
1695 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[
I];
1696 assert(SavedContextIds.empty());
1697 assert(LastId == Ids.back());
1702 if (
I > 0 && Ids != Calls[
I - 1].StackIds)
1703 MatchingIdsFuncSet.
clear();
1712 ContextNode *PrevNode = LastNode;
1713 ContextNode *CurNode = LastNode;
1718 for (
auto IdIter = Ids.rbegin() + 1; IdIter != Ids.rend(); IdIter++) {
1720 CurNode = getNodeForStackId(Id);
1724 if (CurNode->Recursive) {
1729 auto *Edge = CurNode->findEdgeFromCaller(PrevNode);
1747 set_intersect(StackSequenceContextIds, Edge->getContextIds());
1750 if (StackSequenceContextIds.
empty()) {
1763 if (Ids.back() != getLastStackId(Call)) {
1764 for (
const auto &PE : LastNode->CallerEdges) {
1765 set_subtract(StackSequenceContextIds, PE->getContextIds());
1766 if (StackSequenceContextIds.
empty())
1770 if (StackSequenceContextIds.
empty())
1782 MatchingIdsFuncSet.
insert(Func);
1789 bool DuplicateContextIds =
false;
1790 for (
unsigned J =
I + 1; J < Calls.size(); J++) {
1791 auto &CallCtxInfo = Calls[J];
1792 auto &NextIds = CallCtxInfo.StackIds;
1795 auto *NextFunc = CallCtxInfo.Func;
1796 if (NextFunc != Func) {
1799 DuplicateContextIds =
true;
1802 auto &NextCall = CallCtxInfo.Call;
1803 CallToMatchingCall[NextCall] =
Call;
1814 OldToNewContextIds.
reserve(OldToNewContextIds.
size() +
1815 StackSequenceContextIds.
size());
1818 ? duplicateContextIds(StackSequenceContextIds, OldToNewContextIds)
1819 : StackSequenceContextIds;
1820 assert(!SavedContextIds.empty());
1822 if (!DuplicateContextIds) {
1826 set_subtract(LastNodeContextIds, StackSequenceContextIds);
1827 if (LastNodeContextIds.
empty())
1834 propagateDuplicateContextIds(OldToNewContextIds);
1845 for (
auto &Entry : AllocationCallToContextNodeMap)
1846 assignStackNodesPostOrder(
Entry.second, Visited, StackIdToMatchingCalls,
1847 CallToMatchingCall);
1854 Call->getMetadata(LLVMContext::MD_callsite));
1855 return CallsiteContext.
back();
1858uint64_t IndexCallsiteContextGraph::getLastStackId(IndexCall &Call) {
1861 CallsiteContext(dyn_cast_if_present<CallsiteInfo *>(
Call.getBase()));
1863 return Index.getStackIdAtIndex(CallsiteContext.
back());
1880std::string ModuleCallsiteContextGraph::getLabel(
const Function *Func,
1882 unsigned CloneNo)
const {
1883 return (
Twine(
Call->getFunction()->getName()) +
" -> " +
1884 cast<CallBase>(Call)->getCalledFunction()->getName())
1888std::string IndexCallsiteContextGraph::getLabel(
const FunctionSummary *Func,
1889 const IndexCall &Call,
1890 unsigned CloneNo)
const {
1891 auto VI = FSToVIMap.find(Func);
1892 assert(VI != FSToVIMap.end());
1893 if (isa<AllocInfo *>(
Call.getBase()))
1894 return (
VI->second.name() +
" -> alloc").str();
1896 auto *Callsite = dyn_cast_if_present<CallsiteInfo *>(
Call.getBase());
1897 return (
VI->second.name() +
" -> " +
1899 Callsite->Clones[CloneNo]))
1904std::vector<uint64_t>
1905ModuleCallsiteContextGraph::getStackIdsWithContextNodesForCall(
1908 Call->getMetadata(LLVMContext::MD_callsite));
1909 return getStackIdsWithContextNodes<MDNode, MDNode::op_iterator>(
1913std::vector<uint64_t>
1914IndexCallsiteContextGraph::getStackIdsWithContextNodesForCall(IndexCall &Call) {
1917 CallsiteContext(dyn_cast_if_present<CallsiteInfo *>(
Call.getBase()));
1923template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1924template <
class NodeT,
class IteratorT>
1925std::vector<uint64_t>
1926CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getStackIdsWithContextNodes(
1928 std::vector<uint64_t> StackIds;
1929 for (
auto IdOrIndex : CallsiteContext) {
1930 auto StackId = getStackId(IdOrIndex);
1931 ContextNode *
Node = getNodeForStackId(StackId);
1934 StackIds.push_back(StackId);
1939ModuleCallsiteContextGraph::ModuleCallsiteContextGraph(
1942 :
Mod(
M), OREGetter(OREGetter) {
1944 std::vector<CallInfo> CallsWithMetadata;
1945 for (
auto &BB :
F) {
1946 for (
auto &
I : BB) {
1947 if (!isa<CallBase>(
I))
1949 if (
auto *MemProfMD =
I.getMetadata(LLVMContext::MD_memprof)) {
1950 CallsWithMetadata.push_back(&
I);
1951 auto *AllocNode = addAllocNode(&
I, &
F);
1952 auto *CallsiteMD =
I.getMetadata(LLVMContext::MD_callsite);
1956 for (
auto &MDOp : MemProfMD->operands()) {
1957 auto *MIBMD = cast<const MDNode>(MDOp);
1958 std::vector<ContextTotalSize> ContextSizeInfo;
1960 if (MIBMD->getNumOperands() > 2) {
1961 for (
unsigned I = 2;
I < MIBMD->getNumOperands();
I++) {
1962 MDNode *ContextSizePair =
1963 dyn_cast<MDNode>(MIBMD->getOperand(
I));
1965 uint64_t FullStackId = mdconst::dyn_extract<ConstantInt>(
1968 uint64_t TotalSize = mdconst::dyn_extract<ConstantInt>(
1971 ContextSizeInfo.push_back({FullStackId, TotalSize});
1977 addStackNodesForMIB<MDNode, MDNode::op_iterator>(
1978 AllocNode, StackContext, CallsiteContext,
1981 assert(AllocNode->AllocTypes != (
uint8_t)AllocationType::None);
1984 I.setMetadata(LLVMContext::MD_memprof,
nullptr);
1985 I.setMetadata(LLVMContext::MD_callsite,
nullptr);
1988 else if (
I.getMetadata(LLVMContext::MD_callsite)) {
1989 CallsWithMetadata.push_back(&
I);
1993 if (!CallsWithMetadata.empty())
1994 FuncToCallsWithMetadata[&
F] = CallsWithMetadata;
1998 dbgs() <<
"CCG before updating call stack chains:\n";
2003 exportToDot(
"prestackupdate");
2007 handleCallsitesWithMultipleTargets();
2010 for (
auto &FuncEntry : FuncToCallsWithMetadata)
2011 for (
auto &Call : FuncEntry.second)
2012 Call.call()->setMetadata(LLVMContext::MD_callsite,
nullptr);
2015IndexCallsiteContextGraph::IndexCallsiteContextGraph(
2020 for (
auto &
I : Index) {
2022 for (
auto &S :
VI.getSummaryList()) {
2031 !isPrevailing(
VI.getGUID(), S.get()))
2033 auto *
FS = dyn_cast<FunctionSummary>(S.get());
2036 std::vector<CallInfo> CallsWithMetadata;
2037 if (!
FS->allocs().empty()) {
2038 for (
auto &AN :
FS->mutableAllocs()) {
2043 if (AN.MIBs.empty())
2045 IndexCall AllocCall(&AN);
2046 CallsWithMetadata.push_back(AllocCall);
2047 auto *AllocNode = addAllocNode(AllocCall, FS);
2056 AN.ContextSizeInfos.size() == AN.MIBs.size());
2058 for (
auto &MIB : AN.MIBs) {
2061 std::vector<ContextTotalSize> ContextSizeInfo;
2062 if (!AN.ContextSizeInfos.empty()) {
2063 for (
auto [FullStackId, TotalSize] : AN.ContextSizeInfos[
I])
2064 ContextSizeInfo.push_back({FullStackId, TotalSize});
2066 addStackNodesForMIB<MIBInfo, SmallVector<unsigned>::const_iterator>(
2067 AllocNode, StackContext, EmptyContext, MIB.AllocType,
2071 assert(AllocNode->AllocTypes != (
uint8_t)AllocationType::None);
2076 AN.Versions[0] = (
uint8_t)allocTypeToUse(AllocNode->AllocTypes);
2080 if (!
FS->callsites().empty())
2081 for (
auto &SN :
FS->mutableCallsites()) {
2082 IndexCall StackNodeCall(&SN);
2083 CallsWithMetadata.push_back(StackNodeCall);
2086 if (!CallsWithMetadata.empty())
2087 FuncToCallsWithMetadata[
FS] = CallsWithMetadata;
2089 if (!
FS->allocs().empty() || !
FS->callsites().empty())
2095 dbgs() <<
"CCG before updating call stack chains:\n";
2100 exportToDot(
"prestackupdate");
2104 handleCallsitesWithMultipleTargets();
2107template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2108void CallsiteContextGraph<DerivedCCG, FuncTy,
2109 CallTy>::handleCallsitesWithMultipleTargets() {
2124 std::vector<std::pair<CallInfo, ContextNode *>> NewCallToNode;
2125 for (
auto &Entry : NonAllocationCallToContextNodeMap) {
2135 std::vector<CallInfo> AllCalls;
2136 AllCalls.reserve(
Node->MatchingCalls.size() + 1);
2137 AllCalls.push_back(
Node->Call);
2138 AllCalls.insert(AllCalls.end(),
Node->MatchingCalls.begin(),
2139 Node->MatchingCalls.end());
2152 if (partitionCallsByCallee(
Node, AllCalls, NewCallToNode))
2155 auto It = AllCalls.begin();
2157 for (; It != AllCalls.end(); ++It) {
2160 for (
auto EI =
Node->CalleeEdges.begin(); EI !=
Node->CalleeEdges.end();
2163 if (!Edge->Callee->hasCall())
2165 assert(NodeToCallingFunc.count(Edge->Callee));
2167 if (!calleesMatch(
ThisCall.call(), EI, TailCallToContextNodeMap)) {
2176 if (
Node->Call != ThisCall) {
2177 Node->setCall(ThisCall);
2188 Node->MatchingCalls.clear();
2191 if (It == AllCalls.end()) {
2192 RemovedEdgesWithMismatchedCallees++;
2201 for (++It; It != AllCalls.end(); ++It) {
2205 Node->MatchingCalls.push_back(ThisCall);
2214 NonAllocationCallToContextNodeMap.remove_if([](
const auto &it) {
2215 return !it.second->hasCall() || it.second->Call != it.first;
2219 for (
auto &[Call,
Node] : NewCallToNode)
2220 NonAllocationCallToContextNodeMap[
Call] =
Node;
2224 for (
auto &[Call,
Node] : TailCallToContextNodeMap)
2225 NonAllocationCallToContextNodeMap[
Call] =
Node;
2228template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2229bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::partitionCallsByCallee(
2231 std::vector<std::pair<CallInfo, ContextNode *>> &NewCallToNode) {
2235 struct CallsWithSameCallee {
2236 std::vector<CallInfo> Calls;
2237 ContextNode *
Node =
nullptr;
2243 for (
auto ThisCall : AllCalls) {
2244 auto *
F = getCalleeFunc(
ThisCall.call());
2246 CalleeFuncToCallInfo[
F].Calls.push_back(ThisCall);
2255 for (
const auto &Edge :
Node->CalleeEdges) {
2256 if (!Edge->Callee->hasCall())
2258 const FuncTy *ProfiledCalleeFunc = NodeToCallingFunc[Edge->Callee];
2259 if (CalleeFuncToCallInfo.
contains(ProfiledCalleeFunc))
2260 CalleeNodeToCallInfo[Edge->Callee] =
2261 &CalleeFuncToCallInfo[ProfiledCalleeFunc];
2267 if (CalleeNodeToCallInfo.
empty())
2279 ContextNode *UnmatchedCalleesNode =
nullptr;
2281 bool UsedOrigNode =
false;
2283 for (
auto EI =
Node->CalleeEdges.begin(); EI !=
Node->CalleeEdges.end();) {
2285 if (!Edge->Callee->hasCall()) {
2292 ContextNode *CallerNodeToUse =
nullptr;
2296 if (!CalleeNodeToCallInfo.
contains(Edge->Callee)) {
2297 if (!UnmatchedCalleesNode)
2298 UnmatchedCalleesNode =
2299 createNewNode(
false, NodeToCallingFunc[
Node]);
2300 CallerNodeToUse = UnmatchedCalleesNode;
2304 auto *
Info = CalleeNodeToCallInfo[Edge->Callee];
2307 if (!UsedOrigNode) {
2310 Node->MatchingCalls.clear();
2311 UsedOrigNode =
true;
2314 createNewNode(
false, NodeToCallingFunc[
Node]);
2318 Info->Node->setCall(
Info->Calls.front());
2319 Info->Node->MatchingCalls.insert(
Info->Node->MatchingCalls.end(),
2320 Info->Calls.begin() + 1,
2325 NewCallToNode.push_back({
Info->Node->Call,
Info->Node});
2327 CallerNodeToUse =
Info->Node;
2331 if (CallerNodeToUse ==
Node) {
2336 moveCalleeEdgeToNewCaller(EI, CallerNodeToUse);
2343 for (
auto &
I : CalleeNodeToCallInfo)
2344 removeNoneTypeCallerEdges(
I.second->Node);
2345 if (UnmatchedCalleesNode)
2346 removeNoneTypeCallerEdges(UnmatchedCalleesNode);
2347 removeNoneTypeCallerEdges(
Node);
2360 return Index.getStackIdAtIndex(IdOrIndex);
2363template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2364bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::calleesMatch(
2365 CallTy Call, EdgeIter &EI,
2368 const FuncTy *ProfiledCalleeFunc = NodeToCallingFunc[Edge->Callee];
2369 const FuncTy *CallerFunc = NodeToCallingFunc[Edge->Caller];
2372 std::vector<std::pair<CallTy, FuncTy *>> FoundCalleeChain;
2373 if (!calleeMatchesFunc(Call, ProfiledCalleeFunc, CallerFunc,
2378 if (FoundCalleeChain.empty())
2381 auto AddEdge = [Edge, &EI](ContextNode *
Caller, ContextNode *
Callee) {
2382 auto *CurEdge =
Callee->findEdgeFromCaller(Caller);
2386 CurEdge->ContextIds.insert(Edge->ContextIds.begin(),
2387 Edge->ContextIds.end());
2388 CurEdge->AllocTypes |= Edge->AllocTypes;
2393 auto NewEdge = std::make_shared<ContextEdge>(
2394 Callee, Caller, Edge->AllocTypes, Edge->ContextIds);
2395 Callee->CallerEdges.push_back(NewEdge);
2396 if (Caller == Edge->Caller) {
2400 EI =
Caller->CalleeEdges.insert(EI, NewEdge);
2403 "Iterator position not restored after insert and increment");
2405 Caller->CalleeEdges.push_back(NewEdge);
2410 auto *CurCalleeNode = Edge->Callee;
2411 for (
auto &[NewCall, Func] : FoundCalleeChain) {
2412 ContextNode *NewNode =
nullptr;
2414 if (TailCallToContextNodeMap.
count(NewCall)) {
2415 NewNode = TailCallToContextNodeMap[NewCall];
2416 NewNode->AllocTypes |= Edge->AllocTypes;
2418 FuncToCallsWithMetadata[
Func].push_back({NewCall});
2420 NewNode = createNewNode(
false, Func, NewCall);
2421 TailCallToContextNodeMap[NewCall] = NewNode;
2422 NewNode->AllocTypes = Edge->AllocTypes;
2426 AddEdge(NewNode, CurCalleeNode);
2428 CurCalleeNode = NewNode;
2432 AddEdge(Edge->Caller, CurCalleeNode);
2436 auto *
Caller = Edge->Caller;
2440 removeEdgeFromGraph(Edge.get(), &EI,
true);
2452bool ModuleCallsiteContextGraph::findProfiledCalleeThroughTailCalls(
2454 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain,
2455 bool &FoundMultipleCalleeChains) {
2462 FoundCalleeChain.push_back({Callsite,
F});
2465 auto *CalleeFunc = dyn_cast<Function>(CurCallee);
2467 auto *Alias = dyn_cast<GlobalAlias>(CurCallee);
2469 CalleeFunc = dyn_cast<Function>(Alias->getAliasee());
2477 bool FoundSingleCalleeChain =
false;
2478 for (
auto &BB : *CalleeFunc) {
2479 for (
auto &
I : BB) {
2480 auto *CB = dyn_cast<CallBase>(&
I);
2481 if (!CB || !CB->isTailCall())
2483 auto *CalledValue = CB->getCalledOperand();
2484 auto *CalledFunction = CB->getCalledFunction();
2485 if (CalledValue && !CalledFunction) {
2486 CalledValue = CalledValue->stripPointerCasts();
2488 CalledFunction = dyn_cast<Function>(CalledValue);
2492 if (
auto *GA = dyn_cast<GlobalAlias>(CalledValue)) {
2493 assert(!CalledFunction &&
2494 "Expected null called function in callsite for alias");
2495 CalledFunction = dyn_cast<Function>(GA->getAliaseeObject());
2497 if (!CalledFunction)
2499 if (CalledFunction == ProfiledCallee) {
2500 if (FoundSingleCalleeChain) {
2501 FoundMultipleCalleeChains =
true;
2504 FoundSingleCalleeChain =
true;
2505 FoundProfiledCalleeCount++;
2506 FoundProfiledCalleeDepth +=
Depth;
2507 if (
Depth > FoundProfiledCalleeMaxDepth)
2508 FoundProfiledCalleeMaxDepth =
Depth;
2509 SaveCallsiteInfo(&
I, CalleeFunc);
2510 }
else if (findProfiledCalleeThroughTailCalls(
2511 ProfiledCallee, CalledFunction,
Depth + 1,
2512 FoundCalleeChain, FoundMultipleCalleeChains)) {
2515 assert(!FoundMultipleCalleeChains);
2516 if (FoundSingleCalleeChain) {
2517 FoundMultipleCalleeChains =
true;
2520 FoundSingleCalleeChain =
true;
2521 SaveCallsiteInfo(&
I, CalleeFunc);
2522 }
else if (FoundMultipleCalleeChains)
2527 return FoundSingleCalleeChain;
2531 auto *CB = dyn_cast<CallBase>(Call);
2532 if (!CB->getCalledOperand() || CB->isIndirectCall())
2534 auto *CalleeVal = CB->getCalledOperand()->stripPointerCasts();
2535 auto *Alias = dyn_cast<GlobalAlias>(CalleeVal);
2537 return dyn_cast<Function>(Alias->getAliasee());
2538 return dyn_cast<Function>(CalleeVal);
2541bool ModuleCallsiteContextGraph::calleeMatchesFunc(
2543 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain) {
2544 auto *CB = dyn_cast<CallBase>(Call);
2545 if (!CB->getCalledOperand() || CB->isIndirectCall())
2547 auto *CalleeVal = CB->getCalledOperand()->stripPointerCasts();
2548 auto *CalleeFunc = dyn_cast<Function>(CalleeVal);
2549 if (CalleeFunc == Func)
2551 auto *Alias = dyn_cast<GlobalAlias>(CalleeVal);
2552 if (Alias && Alias->getAliasee() == Func)
2563 bool FoundMultipleCalleeChains =
false;
2564 if (!findProfiledCalleeThroughTailCalls(Func, CalleeVal,
Depth,
2566 FoundMultipleCalleeChains)) {
2567 LLVM_DEBUG(
dbgs() <<
"Not found through unique tail call chain: "
2568 <<
Func->getName() <<
" from " << CallerFunc->
getName()
2569 <<
" that actually called " << CalleeVal->getName()
2570 << (FoundMultipleCalleeChains
2571 ?
" (found multiple possible chains)"
2574 if (FoundMultipleCalleeChains)
2575 FoundProfiledCalleeNonUniquelyCount++;
2582bool ModuleCallsiteContextGraph::sameCallee(
Instruction *Call1,
2584 auto *CB1 = cast<CallBase>(Call1);
2585 if (!CB1->getCalledOperand() || CB1->isIndirectCall())
2587 auto *CalleeVal1 = CB1->getCalledOperand()->stripPointerCasts();
2588 auto *CalleeFunc1 = dyn_cast<Function>(CalleeVal1);
2589 auto *CB2 = cast<CallBase>(Call2);
2590 if (!CB2->getCalledOperand() || CB2->isIndirectCall())
2592 auto *CalleeVal2 = CB2->getCalledOperand()->stripPointerCasts();
2593 auto *CalleeFunc2 = dyn_cast<Function>(CalleeVal2);
2594 return CalleeFunc1 == CalleeFunc2;
2597bool IndexCallsiteContextGraph::findProfiledCalleeThroughTailCalls(
2599 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain,
2600 bool &FoundMultipleCalleeChains) {
2609 if (!FunctionCalleesToSynthesizedCallsiteInfos.count(FS) ||
2610 !FunctionCalleesToSynthesizedCallsiteInfos[
FS].count(Callee))
2613 FunctionCalleesToSynthesizedCallsiteInfos[FS][Callee] =
2616 FunctionCalleesToSynthesizedCallsiteInfos[
FS][
Callee].get();
2617 FoundCalleeChain.push_back({NewCallsiteInfo,
FS});
2624 bool FoundSingleCalleeChain =
false;
2627 !isPrevailing(CurCallee.
getGUID(), S.get()))
2629 auto *
FS = dyn_cast<FunctionSummary>(S->getBaseObject());
2632 auto FSVI = CurCallee;
2633 auto *AS = dyn_cast<AliasSummary>(S.get());
2635 FSVI = AS->getAliaseeVI();
2636 for (
auto &CallEdge :
FS->calls()) {
2637 if (!CallEdge.second.hasTailCall())
2639 if (CallEdge.first == ProfiledCallee) {
2640 if (FoundSingleCalleeChain) {
2641 FoundMultipleCalleeChains =
true;
2644 FoundSingleCalleeChain =
true;
2645 FoundProfiledCalleeCount++;
2646 FoundProfiledCalleeDepth +=
Depth;
2647 if (
Depth > FoundProfiledCalleeMaxDepth)
2648 FoundProfiledCalleeMaxDepth =
Depth;
2649 CreateAndSaveCallsiteInfo(CallEdge.first, FS);
2651 assert(!FSToVIMap.count(FS) || FSToVIMap[FS] == FSVI);
2652 FSToVIMap[
FS] = FSVI;
2653 }
else if (findProfiledCalleeThroughTailCalls(
2654 ProfiledCallee, CallEdge.first,
Depth + 1,
2655 FoundCalleeChain, FoundMultipleCalleeChains)) {
2658 assert(!FoundMultipleCalleeChains);
2659 if (FoundSingleCalleeChain) {
2660 FoundMultipleCalleeChains =
true;
2663 FoundSingleCalleeChain =
true;
2664 CreateAndSaveCallsiteInfo(CallEdge.first, FS);
2666 assert(!FSToVIMap.count(FS) || FSToVIMap[FS] == FSVI);
2667 FSToVIMap[
FS] = FSVI;
2668 }
else if (FoundMultipleCalleeChains)
2673 return FoundSingleCalleeChain;
2677IndexCallsiteContextGraph::getCalleeFunc(IndexCall &Call) {
2679 dyn_cast_if_present<CallsiteInfo *>(
Call.getBase())->Callee;
2680 if (
Callee.getSummaryList().empty())
2682 return dyn_cast<FunctionSummary>(
Callee.getSummaryList()[0]->getBaseObject());
2685bool IndexCallsiteContextGraph::calleeMatchesFunc(
2688 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain) {
2690 dyn_cast_if_present<CallsiteInfo *>(
Call.getBase())->Callee;
2694 Callee.getSummaryList().empty()
2696 : dyn_cast<AliasSummary>(
Callee.getSummaryList()[0].get());
2697 assert(FSToVIMap.count(Func));
2698 auto FuncVI = FSToVIMap[
Func];
2699 if (Callee == FuncVI ||
2714 bool FoundMultipleCalleeChains =
false;
2715 if (!findProfiledCalleeThroughTailCalls(
2716 FuncVI, Callee,
Depth, FoundCalleeChain, FoundMultipleCalleeChains)) {
2717 LLVM_DEBUG(
dbgs() <<
"Not found through unique tail call chain: " << FuncVI
2718 <<
" from " << FSToVIMap[CallerFunc]
2719 <<
" that actually called " << Callee
2720 << (FoundMultipleCalleeChains
2721 ?
" (found multiple possible chains)"
2724 if (FoundMultipleCalleeChains)
2725 FoundProfiledCalleeNonUniquelyCount++;
2732bool IndexCallsiteContextGraph::sameCallee(IndexCall &Call1, IndexCall &Call2) {
2734 dyn_cast_if_present<CallsiteInfo *>(Call1.getBase())->Callee;
2736 dyn_cast_if_present<CallsiteInfo *>(Call2.getBase())->Callee;
2737 return Callee1 == Callee2;
2740template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2741void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::dump()
2747template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2748void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::print(
2750 OS <<
"Node " <<
this <<
"\n";
2754 OS <<
" (recursive)";
2756 if (!MatchingCalls.
empty()) {
2757 OS <<
"\tMatchingCalls:\n";
2758 for (
auto &MatchingCall : MatchingCalls) {
2760 MatchingCall.print(
OS);
2765 OS <<
"\tContextIds:";
2767 auto ContextIds = getContextIds();
2768 std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
2769 std::sort(SortedIds.begin(), SortedIds.end());
2770 for (
auto Id : SortedIds)
2773 OS <<
"\tCalleeEdges:\n";
2774 for (
auto &Edge : CalleeEdges)
2775 OS <<
"\t\t" << *Edge <<
"\n";
2776 OS <<
"\tCallerEdges:\n";
2777 for (
auto &Edge : CallerEdges)
2778 OS <<
"\t\t" << *Edge <<
"\n";
2779 if (!Clones.empty()) {
2782 for (
auto *Clone : Clones)
2785 }
else if (CloneOf) {
2786 OS <<
"\tClone of " << CloneOf <<
"\n";
2790template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2791void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::dump()
2797template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2798void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::print(
2802 OS <<
" ContextIds:";
2803 std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
2804 std::sort(SortedIds.begin(), SortedIds.end());
2805 for (
auto Id : SortedIds)
2809template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2810void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::dump()
const {
2814template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2815void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::print(
2817 OS <<
"Callsite Context Graph:\n";
2818 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
2819 for (
const auto Node : nodes<GraphType>(
this)) {
2820 if (
Node->isRemoved())
2827template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2828void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::printTotalSizes(
2830 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
2831 for (
const auto Node : nodes<GraphType>(
this)) {
2832 if (
Node->isRemoved())
2834 if (!
Node->IsAllocation)
2837 auto AllocTypeFromCall = getAllocationCallType(
Node->Call);
2838 std::vector<uint32_t> SortedIds(ContextIds.
begin(), ContextIds.
end());
2839 std::sort(SortedIds.begin(), SortedIds.end());
2840 for (
auto Id : SortedIds) {
2841 auto TypeI = ContextIdToAllocationType.find(Id);
2842 assert(TypeI != ContextIdToAllocationType.end());
2843 auto CSI = ContextIdToContextSizeInfos.find(Id);
2844 if (CSI != ContextIdToContextSizeInfos.end()) {
2845 for (
auto &Info : CSI->second) {
2846 OS <<
"MemProf hinting: "
2848 <<
" full allocation context " <<
Info.FullStackId
2849 <<
" with total size " <<
Info.TotalSize <<
" is "
2851 if (allocTypeToUse(
Node->AllocTypes) != AllocTypeFromCall)
2853 <<
" due to cold byte percent";
2861template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2862void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::check()
const {
2863 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
2864 for (
const auto Node : nodes<GraphType>(
this)) {
2865 checkNode<DerivedCCG, FuncTy, CallTy>(
Node,
false);
2866 for (
auto &Edge :
Node->CallerEdges)
2867 checkEdge<DerivedCCG, FuncTy, CallTy>(Edge);
2871template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2873 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
2874 using NodeRef =
const ContextNode<DerivedCCG, FuncTy, CallTy> *;
2876 using NodePtrTy = std::unique_ptr<ContextNode<DerivedCCG, FuncTy, CallTy>>;
2892 return G->NodeOwner.begin()->get();
2895 using EdgePtrTy = std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>;
2896 static const ContextNode<DerivedCCG, FuncTy, CallTy> *
2904 decltype(&GetCallee)>;
2915template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2920 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
2926 std::string LabelString =
2927 (
Twine(
"OrigId: ") + (Node->IsAllocation ?
"Alloc" :
"") +
2928 Twine(Node->OrigStackOrAllocId))
2930 LabelString +=
"\n";
2931 if (Node->hasCall()) {
2932 auto Func =
G->NodeToCallingFunc.find(Node);
2933 assert(Func !=
G->NodeToCallingFunc.end());
2935 G->getLabel(Func->second, Node->Call.call(), Node->Call.cloneNo());
2937 LabelString +=
"null call";
2938 if (Node->Recursive)
2939 LabelString +=
" (recursive)";
2941 LabelString +=
" (external)";
2947 std::string AttributeString = (
Twine(
"tooltip=\"") + getNodeId(Node) +
" " +
2948 getContextIds(Node->getContextIds()) +
"\"")
2951 (
Twine(
",fillcolor=\"") + getColor(Node->AllocTypes) +
"\"").str();
2952 AttributeString +=
",style=\"filled\"";
2953 if (Node->CloneOf) {
2954 AttributeString +=
",color=\"blue\"";
2955 AttributeString +=
",style=\"filled,bold,dashed\"";
2957 AttributeString +=
",style=\"filled\"";
2958 return AttributeString;
2963 auto &Edge = *(ChildIter.getCurrent());
2964 return (
Twine(
"tooltip=\"") + getContextIds(Edge->ContextIds) +
"\"" +
2965 Twine(
",fillcolor=\"") + getColor(Edge->AllocTypes) +
"\"")
2972 return Node->isRemoved();
2977 std::string IdString =
"ContextIds:";
2978 if (ContextIds.
size() < 100) {
2979 std::vector<uint32_t> SortedIds(ContextIds.
begin(), ContextIds.
end());
2980 std::sort(SortedIds.begin(), SortedIds.end());
2981 for (
auto Id : SortedIds)
2982 IdString += (
" " +
Twine(Id)).str();
2984 IdString += (
" (" +
Twine(ContextIds.
size()) +
" ids)").str();
2989 static std::string getColor(
uint8_t AllocTypes) {
2998 return "mediumorchid1";
3002 static std::string getNodeId(NodeRef
Node) {
3003 std::stringstream SStream;
3004 SStream << std::hex <<
"N0x" << (
unsigned long long)
Node;
3005 std::string
Result = SStream.str();
3010template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3011void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::exportToDot(
3012 std::string Label)
const {
3017template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3018typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
3019CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::moveEdgeToNewCalleeClone(
3020 const std::shared_ptr<ContextEdge> &Edge, EdgeIter *CallerEdgeI,
3022 ContextNode *
Node = Edge->Callee;
3024 ContextNode *Clone =
3025 createNewNode(
Node->IsAllocation, NodeToCallingFunc[
Node],
Node->Call);
3026 Node->addClone(Clone);
3027 Clone->MatchingCalls =
Node->MatchingCalls;
3028 moveEdgeToExistingCalleeClone(Edge, Clone, CallerEdgeI,
true,
3033template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3034void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
3035 moveEdgeToExistingCalleeClone(
const std::shared_ptr<ContextEdge> &Edge,
3036 ContextNode *NewCallee, EdgeIter *CallerEdgeI,
3041 assert(NewCallee->getOrigNode() == Edge->Callee->getOrigNode());
3043 ContextNode *OldCallee = Edge->Callee;
3047 auto ExistingEdgeToNewCallee = NewCallee->findEdgeFromCaller(Edge->Caller);
3051 if (ContextIdsToMove.
empty())
3052 ContextIdsToMove = Edge->getContextIds();
3056 if (Edge->getContextIds().size() == ContextIdsToMove.
size()) {
3059 NewCallee->AllocTypes |= Edge->AllocTypes;
3061 if (ExistingEdgeToNewCallee) {
3064 ExistingEdgeToNewCallee->getContextIds().
insert(ContextIdsToMove.
begin(),
3065 ContextIdsToMove.
end());
3066 ExistingEdgeToNewCallee->AllocTypes |= Edge->AllocTypes;
3067 assert(Edge->ContextIds == ContextIdsToMove);
3068 removeEdgeFromGraph(Edge.get(), CallerEdgeI,
false);
3071 Edge->Callee = NewCallee;
3072 NewCallee->CallerEdges.push_back(Edge);
3075 *CallerEdgeI = OldCallee->CallerEdges.erase(*CallerEdgeI);
3077 OldCallee->eraseCallerEdge(Edge.get());
3086 auto CallerEdgeAllocType = computeAllocType(ContextIdsToMove);
3087 if (ExistingEdgeToNewCallee) {
3090 ExistingEdgeToNewCallee->getContextIds().insert(ContextIdsToMove.
begin(),
3091 ContextIdsToMove.
end());
3092 ExistingEdgeToNewCallee->AllocTypes |= CallerEdgeAllocType;
3095 auto NewEdge = std::make_shared<ContextEdge>(
3096 NewCallee, Edge->Caller, CallerEdgeAllocType, ContextIdsToMove);
3097 Edge->Caller->CalleeEdges.push_back(NewEdge);
3098 NewCallee->CallerEdges.push_back(NewEdge);
3102 NewCallee->AllocTypes |= CallerEdgeAllocType;
3104 Edge->AllocTypes = computeAllocType(Edge->ContextIds);
3109 for (
auto &OldCalleeEdge : OldCallee->CalleeEdges) {
3114 set_subtract(OldCalleeEdge->getContextIds(), EdgeContextIdsToMove);
3115 OldCalleeEdge->AllocTypes =
3116 computeAllocType(OldCalleeEdge->getContextIds());
3123 if (
auto *NewCalleeEdge =
3124 NewCallee->findEdgeFromCallee(OldCalleeEdge->Callee)) {
3125 NewCalleeEdge->getContextIds().insert(EdgeContextIdsToMove.
begin(),
3126 EdgeContextIdsToMove.
end());
3127 NewCalleeEdge->AllocTypes |= computeAllocType(EdgeContextIdsToMove);
3131 auto NewEdge = std::make_shared<ContextEdge>(
3132 OldCalleeEdge->Callee, NewCallee,
3133 computeAllocType(EdgeContextIdsToMove), EdgeContextIdsToMove);
3134 NewCallee->CalleeEdges.push_back(NewEdge);
3135 NewEdge->Callee->CallerEdges.push_back(NewEdge);
3139 OldCallee->AllocTypes = OldCallee->computeAllocType();
3142 OldCallee->emptyContextIds());
3144 checkNode<DerivedCCG, FuncTy, CallTy>(OldCallee,
false);
3145 checkNode<DerivedCCG, FuncTy, CallTy>(NewCallee,
false);
3146 for (
const auto &OldCalleeEdge : OldCallee->CalleeEdges)
3147 checkNode<DerivedCCG, FuncTy, CallTy>(OldCalleeEdge->Callee,
3149 for (
const auto &NewCalleeEdge : NewCallee->CalleeEdges)
3150 checkNode<DerivedCCG, FuncTy, CallTy>(NewCalleeEdge->Callee,
3155template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3156void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
3157 moveCalleeEdgeToNewCaller(EdgeIter &CalleeEdgeI, ContextNode *NewCaller) {
3158 auto Edge = *CalleeEdgeI;
3160 ContextNode *OldCaller = Edge->Caller;
3164 auto ExistingEdgeToNewCaller = NewCaller->findEdgeFromCallee(Edge->Callee);
3166 CalleeEdgeI = OldCaller->CalleeEdges.erase(CalleeEdgeI);
3167 if (ExistingEdgeToNewCaller) {
3170 ExistingEdgeToNewCaller->getContextIds().insert(
3171 Edge->getContextIds().begin(), Edge->getContextIds().end());
3172 ExistingEdgeToNewCaller->AllocTypes |= Edge->AllocTypes;
3173 Edge->ContextIds.clear();
3175 Edge->Callee->eraseCallerEdge(Edge.get());
3178 Edge->Caller = NewCaller;
3179 NewCaller->CalleeEdges.push_back(Edge);
3184 NewCaller->AllocTypes |= Edge->AllocTypes;
3191 bool IsNewNode = NewCaller->CallerEdges.empty();
3193 for (
auto &OldCallerEdge : OldCaller->CallerEdges) {
3198 set_subtract(OldCallerEdge->getContextIds(), EdgeContextIdsToMove);
3199 OldCallerEdge->AllocTypes =
3200 computeAllocType(OldCallerEdge->getContextIds());
3205 auto *ExistingCallerEdge =
3206 NewCaller->findEdgeFromCaller(OldCallerEdge->Caller);
3207 assert(IsNewNode || ExistingCallerEdge);
3208 if (ExistingCallerEdge) {
3209 ExistingCallerEdge->getContextIds().insert(EdgeContextIdsToMove.
begin(),
3210 EdgeContextIdsToMove.
end());
3211 ExistingCallerEdge->AllocTypes |= computeAllocType(EdgeContextIdsToMove);
3214 auto NewEdge = std::make_shared<ContextEdge>(
3215 NewCaller, OldCallerEdge->Caller,
3216 computeAllocType(EdgeContextIdsToMove), EdgeContextIdsToMove);
3217 NewCaller->CallerEdges.push_back(NewEdge);
3218 NewEdge->Caller->CalleeEdges.push_back(NewEdge);
3222 OldCaller->AllocTypes = OldCaller->computeAllocType();
3225 OldCaller->emptyContextIds());
3227 checkNode<DerivedCCG, FuncTy, CallTy>(OldCaller,
false);
3228 checkNode<DerivedCCG, FuncTy, CallTy>(NewCaller,
false);
3229 for (
const auto &OldCallerEdge : OldCaller->CallerEdges)
3230 checkNode<DerivedCCG, FuncTy, CallTy>(OldCallerEdge->Caller,
3232 for (
const auto &NewCallerEdge : NewCaller->CallerEdges)
3233 checkNode<DerivedCCG, FuncTy, CallTy>(NewCallerEdge->Caller,
3238template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3239void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
3240 recursivelyRemoveNoneTypeCalleeEdges(
3246 removeNoneTypeCalleeEdges(
Node);
3248 for (
auto *Clone :
Node->Clones)
3249 recursivelyRemoveNoneTypeCalleeEdges(Clone, Visited);
3253 auto CallerEdges =
Node->CallerEdges;
3254 for (
auto &Edge : CallerEdges) {
3256 if (Edge->isRemoved()) {
3260 recursivelyRemoveNoneTypeCalleeEdges(Edge->Caller, Visited);
3264template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3265void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones() {
3267 for (
auto &Entry : AllocationCallToContextNodeMap) {
3269 identifyClones(
Entry.second, Visited,
Entry.second->getContextIds());
3272 for (
auto &Entry : AllocationCallToContextNodeMap)
3273 recursivelyRemoveNoneTypeCalleeEdges(
Entry.second, Visited);
3286template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3287void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones(
3291 checkNode<DerivedCCG, FuncTy, CallTy>(
Node,
false);
3299 if (!
Node->hasCall())
3314 auto CallerEdges =
Node->CallerEdges;
3315 for (
auto &Edge : CallerEdges) {
3317 if (Edge->isRemoved()) {
3322 if (!Visited.
count(Edge->Caller) && !Edge->Caller->CloneOf) {
3323 identifyClones(Edge->Caller, Visited, AllocContextIds);
3346 const unsigned AllocTypeCloningPriority[] = { 3, 4,
3349 std::stable_sort(
Node->CallerEdges.begin(),
Node->CallerEdges.end(),
3350 [&](
const std::shared_ptr<ContextEdge> &
A,
3351 const std::shared_ptr<ContextEdge> &
B) {
3354 if (A->ContextIds.empty())
3360 if (B->ContextIds.empty())
3363 if (A->AllocTypes == B->AllocTypes)
3366 return *A->ContextIds.begin() < *B->ContextIds.begin();
3367 return AllocTypeCloningPriority[A->AllocTypes] <
3368 AllocTypeCloningPriority[B->AllocTypes];
3378 for (
auto EI =
Node->CallerEdges.begin(); EI !=
Node->CallerEdges.end();) {
3379 auto CallerEdge = *EI;
3388 if (!CallerEdge->Caller->hasCall()) {
3395 auto CallerEdgeContextsForAlloc =
3397 if (CallerEdgeContextsForAlloc.empty()) {
3401 auto CallerAllocTypeForAlloc = computeAllocType(CallerEdgeContextsForAlloc);
3405 std::vector<uint8_t> CalleeEdgeAllocTypesForCallerEdge;
3406 CalleeEdgeAllocTypesForCallerEdge.reserve(
Node->CalleeEdges.size());
3407 for (
auto &CalleeEdge :
Node->CalleeEdges)
3408 CalleeEdgeAllocTypesForCallerEdge.push_back(intersectAllocTypes(
3409 CalleeEdge->getContextIds(), CallerEdgeContextsForAlloc));
3424 if (allocTypeToUse(CallerAllocTypeForAlloc) ==
3425 allocTypeToUse(
Node->AllocTypes) &&
3426 allocTypesMatch<DerivedCCG, FuncTy, CallTy>(
3427 CalleeEdgeAllocTypesForCallerEdge,
Node->CalleeEdges)) {
3434 ContextNode *Clone =
nullptr;
3435 for (
auto *CurClone :
Node->Clones) {
3436 if (allocTypeToUse(CurClone->AllocTypes) !=
3437 allocTypeToUse(CallerAllocTypeForAlloc))
3444 assert(!BothSingleAlloc ||
3445 CurClone->AllocTypes == CallerAllocTypeForAlloc);
3451 if (BothSingleAlloc || allocTypesMatchClone<DerivedCCG, FuncTy, CallTy>(
3452 CalleeEdgeAllocTypesForCallerEdge, CurClone)) {
3460 moveEdgeToExistingCalleeClone(CallerEdge, Clone, &EI,
false,
3461 CallerEdgeContextsForAlloc);
3464 moveEdgeToNewCalleeClone(CallerEdge, &EI, CallerEdgeContextsForAlloc);
3479 checkNode<DerivedCCG, FuncTy, CallTy>(
Node,
false);
3482void ModuleCallsiteContextGraph::updateAllocationCall(
3486 "memprof", AllocTypeString);
3487 cast<CallBase>(
Call.call())->addFnAttr(
A);
3488 OREGetter(
Call.call()->getFunction())
3490 <<
ore::NV(
"AllocationCall",
Call.call()) <<
" in clone "
3492 <<
" marked with memprof allocation attribute "
3493 <<
ore::NV(
"Attribute", AllocTypeString));
3496void IndexCallsiteContextGraph::updateAllocationCall(
CallInfo &Call,
3500 assert(AI->Versions.size() >
Call.cloneNo());
3505ModuleCallsiteContextGraph::getAllocationCallType(
const CallInfo &Call)
const {
3506 const auto *CB = cast<CallBase>(
Call.call());
3507 if (!CB->getAttributes().hasFnAttr(
"memprof"))
3509 return CB->getAttributes().getFnAttr(
"memprof").getValueAsString() ==
"cold"
3515IndexCallsiteContextGraph::getAllocationCallType(
const CallInfo &Call)
const {
3517 assert(AI->Versions.size() >
Call.cloneNo());
3521void ModuleCallsiteContextGraph::updateCall(
CallInfo &CallerCall,
3522 FuncInfo CalleeFunc) {
3523 if (CalleeFunc.cloneNo() > 0)
3524 cast<CallBase>(CallerCall.call())->setCalledFunction(CalleeFunc.func());
3525 OREGetter(CallerCall.call()->getFunction())
3527 <<
ore::NV(
"Call", CallerCall.call()) <<
" in clone "
3528 <<
ore::NV(
"Caller", CallerCall.call()->getFunction())
3529 <<
" assigned to call function clone "
3530 <<
ore::NV(
"Callee", CalleeFunc.func()));
3533void IndexCallsiteContextGraph::updateCall(
CallInfo &CallerCall,
3534 FuncInfo CalleeFunc) {
3535 auto *CI = CallerCall.call().dyn_cast<
CallsiteInfo *>();
3537 "Caller cannot be an allocation which should not have profiled calls");
3538 assert(CI->Clones.size() > CallerCall.cloneNo());
3539 CI->Clones[CallerCall.cloneNo()] = CalleeFunc.cloneNo();
3542CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
3544ModuleCallsiteContextGraph::cloneFunctionForCallsite(
3545 FuncInfo &Func,
CallInfo &Call, std::map<CallInfo, CallInfo> &CallMap,
3546 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
3552 NewFunc->setName(
Name);
3553 for (
auto &Inst : CallsWithMetadataInFunc) {
3555 assert(Inst.cloneNo() == 0);
3556 CallMap[Inst] = {cast<Instruction>(VMap[Inst.call()]), CloneNo};
3558 OREGetter(
Func.func())
3560 <<
"created clone " <<
ore::NV(
"NewFunction", NewFunc));
3561 return {NewFunc, CloneNo};
3565 IndexCall>::FuncInfo
3566IndexCallsiteContextGraph::cloneFunctionForCallsite(
3567 FuncInfo &Func,
CallInfo &Call, std::map<CallInfo, CallInfo> &CallMap,
3568 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
3583 for (
auto &Inst : CallsWithMetadataInFunc) {
3585 assert(Inst.cloneNo() == 0);
3586 if (
auto *AI = Inst.call().dyn_cast<
AllocInfo *>()) {
3587 assert(AI->Versions.size() == CloneNo);
3590 AI->Versions.push_back(0);
3593 assert(CI && CI->Clones.size() == CloneNo);
3596 CI->Clones.push_back(0);
3598 CallMap[Inst] = {Inst.call(), CloneNo};
3600 return {
Func.func(), CloneNo};
3634template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3635bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
3636 bool Changed =
false;
3644 auto RecordCalleeFuncOfCallsite = [&](ContextNode *
Caller,
3645 const FuncInfo &CalleeFunc) {
3647 CallsiteToCalleeFuncCloneMap[
Caller] = CalleeFunc;
3652 for (
auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) {
3653 FuncInfo OrigFunc(Func);
3657 std::map<FuncInfo, std::map<CallInfo, CallInfo>> FuncClonesToCallMap;
3658 for (
auto &Call : CallsWithMetadata) {
3659 ContextNode *
Node = getNodeForInst(Call);
3666 "Not having a call should have prevented cloning");
3670 std::map<FuncInfo, ContextNode *> FuncCloneToCurNodeCloneMap;
3674 auto AssignCallsiteCloneToFuncClone = [&](
const FuncInfo &FuncClone,
3676 ContextNode *CallsiteClone,
3679 FuncCloneToCurNodeCloneMap[FuncClone] = CallsiteClone;
3681 assert(FuncClonesToCallMap.count(FuncClone));
3682 std::map<CallInfo, CallInfo> &CallMap = FuncClonesToCallMap[FuncClone];
3684 if (CallMap.count(Call))
3685 CallClone = CallMap[
Call];
3686 CallsiteClone->setCall(CallClone);
3688 for (
auto &MatchingCall :
Node->MatchingCalls) {
3690 if (CallMap.count(MatchingCall))
3691 CallClone = CallMap[MatchingCall];
3693 MatchingCall = CallClone;
3700 std::deque<ContextNode *> ClonesWorklist;
3702 if (!
Node->emptyContextIds())
3703 ClonesWorklist.push_back(
Node);
3704 ClonesWorklist.insert(ClonesWorklist.end(),
Node->Clones.begin(),
3705 Node->Clones.end());
3710 unsigned NodeCloneCount = 0;
3711 while (!ClonesWorklist.empty()) {
3712 ContextNode *Clone = ClonesWorklist.front();
3713 ClonesWorklist.pop_front();
3716 checkNode<DerivedCCG, FuncTy, CallTy>(Clone);
3722 if (FuncClonesToCallMap.size() < NodeCloneCount) {
3724 if (NodeCloneCount == 1) {
3729 Clone->CallerEdges, [&](
const std::shared_ptr<ContextEdge> &E) {
3730 return CallsiteToCalleeFuncCloneMap.count(E->Caller);
3734 FuncClonesToCallMap[OrigFunc] = {};
3735 AssignCallsiteCloneToFuncClone(
3736 OrigFunc, Call, Clone,
3737 AllocationCallToContextNodeMap.count(Call));
3738 for (
auto &CE : Clone->CallerEdges) {
3740 if (!
CE->Caller->hasCall())
3742 RecordCalleeFuncOfCallsite(
CE->Caller, OrigFunc);
3752 FuncInfo PreviousAssignedFuncClone;
3754 Clone->CallerEdges, [&](
const std::shared_ptr<ContextEdge> &E) {
3755 return CallsiteToCalleeFuncCloneMap.count(E->Caller);
3757 bool CallerAssignedToCloneOfFunc =
false;
3758 if (EI != Clone->CallerEdges.end()) {
3759 const std::shared_ptr<ContextEdge> &Edge = *EI;
3760 PreviousAssignedFuncClone =
3761 CallsiteToCalleeFuncCloneMap[Edge->Caller];
3762 CallerAssignedToCloneOfFunc =
true;
3767 std::map<CallInfo, CallInfo> NewCallMap;
3768 unsigned CloneNo = FuncClonesToCallMap.size();
3769 assert(CloneNo > 0 &&
"Clone 0 is the original function, which "
3770 "should already exist in the map");
3771 FuncInfo NewFuncClone = cloneFunctionForCallsite(
3772 OrigFunc, Call, NewCallMap, CallsWithMetadata, CloneNo);
3773 FuncClonesToCallMap.emplace(NewFuncClone, std::move(NewCallMap));
3774 FunctionClonesAnalysis++;
3780 if (!CallerAssignedToCloneOfFunc) {
3781 AssignCallsiteCloneToFuncClone(
3782 NewFuncClone, Call, Clone,
3783 AllocationCallToContextNodeMap.count(Call));
3784 for (
auto &CE : Clone->CallerEdges) {
3786 if (!
CE->Caller->hasCall())
3788 RecordCalleeFuncOfCallsite(
CE->Caller, NewFuncClone);
3800 auto CallerEdges = Clone->CallerEdges;
3801 for (
auto CE : CallerEdges) {
3803 if (
CE->isRemoved()) {
3809 if (!
CE->Caller->hasCall())
3812 if (!CallsiteToCalleeFuncCloneMap.
count(
CE->Caller) ||
3816 CallsiteToCalleeFuncCloneMap[
CE->Caller] !=
3817 PreviousAssignedFuncClone)
3820 RecordCalleeFuncOfCallsite(
CE->Caller, NewFuncClone);
3833 auto CalleeEdges =
CE->Caller->CalleeEdges;
3834 for (
auto CalleeEdge : CalleeEdges) {
3837 if (CalleeEdge->isRemoved()) {
3842 ContextNode *
Callee = CalleeEdge->Callee;
3846 if (Callee == Clone || !
Callee->hasCall())
3848 ContextNode *NewClone = moveEdgeToNewCalleeClone(CalleeEdge);
3849 removeNoneTypeCalleeEdges(NewClone);
3852 removeNoneTypeCalleeEdges(Callee);
3857 if (CallsiteToCalleeFuncCloneMap.
count(Callee))
3858 RecordCalleeFuncOfCallsite(
3859 NewClone, CallsiteToCalleeFuncCloneMap[Callee]);
3868 OrigCall.setCloneNo(0);
3869 std::map<CallInfo, CallInfo> &CallMap =
3870 FuncClonesToCallMap[NewFuncClone];
3871 assert(CallMap.count(OrigCall));
3872 CallInfo NewCall(CallMap[OrigCall]);
3874 NewClone->setCall(NewCall);
3876 for (
auto &MatchingCall : NewClone->MatchingCalls) {
3877 CallInfo OrigMatchingCall(MatchingCall);
3878 OrigMatchingCall.setCloneNo(0);
3879 assert(CallMap.count(OrigMatchingCall));
3880 CallInfo NewCall(CallMap[OrigMatchingCall]);
3883 MatchingCall = NewCall;
3906 std::map<FuncInfo, ContextNode *> FuncCloneToNewCallsiteCloneMap;
3907 FuncInfo FuncCloneAssignedToCurCallsiteClone;
3910 for (
auto EI = Clone->CallerEdges.begin();
3911 EI != Clone->CallerEdges.end();) {
3914 if (!Edge->Caller->hasCall()) {
3920 if (CallsiteToCalleeFuncCloneMap.
count(Edge->Caller)) {
3921 FuncInfo FuncCloneCalledByCaller =
3922 CallsiteToCalleeFuncCloneMap[Edge->Caller];
3932 if ((FuncCloneToCurNodeCloneMap.count(FuncCloneCalledByCaller) &&
3933 FuncCloneToCurNodeCloneMap[FuncCloneCalledByCaller] !=
3941 (FuncCloneAssignedToCurCallsiteClone &&
3942 FuncCloneAssignedToCurCallsiteClone !=
3943 FuncCloneCalledByCaller)) {
3958 if (FuncCloneToNewCallsiteCloneMap.count(
3959 FuncCloneCalledByCaller)) {
3960 ContextNode *NewClone =
3961 FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller];
3962 moveEdgeToExistingCalleeClone(Edge, NewClone, &EI);
3964 removeNoneTypeCalleeEdges(NewClone);
3967 ContextNode *NewClone = moveEdgeToNewCalleeClone(Edge, &EI);
3968 removeNoneTypeCalleeEdges(NewClone);
3969 FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller] =
3972 ClonesWorklist.push_back(NewClone);
3973 assert(EI == Clone->CallerEdges.end() ||
3979 removeNoneTypeCalleeEdges(Clone);
3988 if (!FuncCloneAssignedToCurCallsiteClone) {
3989 FuncCloneAssignedToCurCallsiteClone = FuncCloneCalledByCaller;
3991 AssignCallsiteCloneToFuncClone(
3992 FuncCloneCalledByCaller, Call, Clone,
3993 AllocationCallToContextNodeMap.count(Call));
3997 assert(FuncCloneAssignedToCurCallsiteClone ==
3998 FuncCloneCalledByCaller);
4007 if (!FuncCloneAssignedToCurCallsiteClone) {
4012 for (
auto &CF : FuncClonesToCallMap) {
4013 if (!FuncCloneToCurNodeCloneMap.count(CF.first)) {
4014 FuncCloneAssignedToCurCallsiteClone = CF.first;
4018 assert(FuncCloneAssignedToCurCallsiteClone);
4020 AssignCallsiteCloneToFuncClone(
4021 FuncCloneAssignedToCurCallsiteClone, Call, Clone,
4022 AllocationCallToContextNodeMap.count(Call));
4024 assert(FuncCloneToCurNodeCloneMap
4025 [FuncCloneAssignedToCurCallsiteClone] == Clone);
4027 RecordCalleeFuncOfCallsite(Edge->Caller,
4028 FuncCloneAssignedToCurCallsiteClone);
4035 checkNode<DerivedCCG, FuncTy, CallTy>(
Node);
4036 for (
const auto &PE :
Node->CalleeEdges)
4037 checkNode<DerivedCCG, FuncTy, CallTy>(PE->Callee);
4038 for (
const auto &CE :
Node->CallerEdges)
4039 checkNode<DerivedCCG, FuncTy, CallTy>(
CE->Caller);
4040 for (
auto *Clone :
Node->Clones) {
4041 checkNode<DerivedCCG, FuncTy, CallTy>(Clone);
4042 for (
const auto &PE : Clone->CalleeEdges)
4043 checkNode<DerivedCCG, FuncTy, CallTy>(PE->Callee);
4044 for (
const auto &CE : Clone->CallerEdges)
4045 checkNode<DerivedCCG, FuncTy, CallTy>(
CE->Caller);
4054 auto UpdateCalls = [&](ContextNode *
Node,
4056 auto &&UpdateCalls) {
4061 for (
auto *Clone :
Node->Clones)
4062 UpdateCalls(Clone, Visited, UpdateCalls);
4064 for (
auto &Edge :
Node->CallerEdges)
4065 UpdateCalls(Edge->Caller, Visited, UpdateCalls);
4069 if (!
Node->hasCall() ||
Node->emptyContextIds())
4072 if (
Node->IsAllocation) {
4073 auto AT = allocTypeToUse(
Node->AllocTypes);
4079 !ContextIdToContextSizeInfos.empty()) {
4082 for (
auto Id :
Node->getContextIds()) {
4083 auto TypeI = ContextIdToAllocationType.find(Id);
4084 assert(TypeI != ContextIdToAllocationType.end());
4085 auto CSI = ContextIdToContextSizeInfos.find(Id);
4086 if (CSI != ContextIdToContextSizeInfos.end()) {
4087 for (
auto &Info : CSI->second) {
4090 TotalCold +=
Info.TotalSize;
4097 updateAllocationCall(
Node->Call, AT);
4102 if (!CallsiteToCalleeFuncCloneMap.
count(
Node))
4105 auto CalleeFunc = CallsiteToCalleeFuncCloneMap[
Node];
4106 updateCall(
Node->Call, CalleeFunc);
4108 for (
auto &Call :
Node->MatchingCalls)
4109 updateCall(Call, CalleeFunc);
4118 for (
auto &Entry : AllocationCallToContextNodeMap)
4119 UpdateCalls(
Entry.second, Visited, UpdateCalls);
4133 FunctionsClonedThinBackend++;
4134 for (
unsigned I = 1;
I < NumClones;
I++) {
4135 VMaps.
emplace_back(std::make_unique<ValueToValueMapTy>());
4137 FunctionClonesThinBackend++;
4140 for (
auto &BB : *NewF) {
4141 for (
auto &Inst : BB) {
4142 Inst.setMetadata(LLVMContext::MD_memprof,
nullptr);
4143 Inst.setMetadata(LLVMContext::MD_callsite,
nullptr);
4147 auto *PrevF = M.getFunction(
Name);
4151 assert(PrevF->isDeclaration());
4152 NewF->takeName(PrevF);
4153 PrevF->replaceAllUsesWith(NewF);
4154 PrevF->eraseFromParent();
4156 NewF->setName(
Name);
4158 <<
"created clone " <<
ore::NV(
"NewFunction", NewF));
4161 if (!FuncToAliasMap.count(&
F))
4163 for (
auto *
A : FuncToAliasMap[&
F]) {
4165 auto *PrevA = M.getNamedAlias(
Name);
4167 A->getType()->getPointerAddressSpace(),
4168 A->getLinkage(),
Name, NewF);
4169 NewA->copyAttributesFrom(
A);
4173 assert(PrevA->isDeclaration());
4174 NewA->takeName(PrevA);
4175 PrevA->replaceAllUsesWith(NewA);
4176 PrevA->eraseFromParent();
4218bool MemProfContextDisambiguation::initializeIndirectCallPromotionInfo(
4220 ICallAnalysis = std::make_unique<ICallPromotionAnalysis>();
4221 Symtab = std::make_unique<InstrProfSymtab>();
4232 if (
Error E = Symtab->create(M,
true,
false)) {
4233 std::string SymtabFailure =
toString(std::move(
E));
4234 M.getContext().emitError(
"Failed to create symtab: " + SymtabFailure);
4240bool MemProfContextDisambiguation::applyImport(
Module &M) {
4242 bool Changed =
false;
4247 std::map<const Function *, SmallPtrSet<const GlobalAlias *, 1>>
4249 for (
auto &
A :
M.aliases()) {
4250 auto *Aliasee =
A.getAliaseeObject();
4251 if (
auto *
F = dyn_cast<Function>(Aliasee))
4252 FuncToAliasMap[
F].insert(&
A);
4255 if (!initializeIndirectCallPromotionInfo(M))
4265 bool ClonesCreated =
false;
4266 unsigned NumClonesCreated = 0;
4267 auto CloneFuncIfNeeded = [&](
unsigned NumClones) {
4277 if (ClonesCreated) {
4278 assert(NumClonesCreated == NumClones);
4285 ClonesCreated =
true;
4286 NumClonesCreated = NumClones;
4292 CloneFuncIfNeeded(
StackNode.Clones.size());
4299 auto CalleeOrigName = CalledFunction->getName();
4300 for (
unsigned J = 0; J <
StackNode.Clones.size(); J++) {
4305 auto NewF =
M.getOrInsertFunction(
4307 CalledFunction->getFunctionType());
4313 CBClone = cast<CallBase>((*VMaps[J - 1])[CB]);
4316 <<
ore::NV(
"Call", CBClone) <<
" in clone "
4318 <<
" assigned to call function clone "
4319 <<
ore::NV(
"Callee", NewF.getCallee()));
4337 auto SrcModuleMD =
F.getMetadata(
"thinlto_src_module");
4339 "enable-import-metadata is needed to emit thinlto_src_module");
4341 dyn_cast<MDString>(SrcModuleMD->getOperand(0))->getString();
4343 if (GVS->modulePath() == SrcModule) {
4344 GVSummary = GVS.get();
4348 assert(GVSummary && GVSummary->modulePath() == SrcModule);
4353 if (isa<AliasSummary>(GVSummary))
4356 auto *
FS = cast<FunctionSummary>(GVSummary->getBaseObject());
4358 if (
FS->allocs().empty() &&
FS->callsites().empty())
4361 auto SI =
FS->callsites().begin();
4362 auto AI =
FS->allocs().begin();
4370 for (
auto CallsiteIt =
FS->callsites().rbegin();
4371 CallsiteIt !=
FS->callsites().rend(); CallsiteIt++) {
4372 auto &Callsite = *CallsiteIt;
4376 if (!Callsite.StackIdIndices.empty())
4378 MapTailCallCalleeVIToCallsite.
insert({Callsite.Callee, Callsite});
4387 for (
auto &BB :
F) {
4388 for (
auto &
I : BB) {
4389 auto *CB = dyn_cast<CallBase>(&
I);
4394 auto *CalledValue = CB->getCalledOperand();
4395 auto *CalledFunction = CB->getCalledFunction();
4396 if (CalledValue && !CalledFunction) {
4397 CalledValue = CalledValue->stripPointerCasts();
4399 CalledFunction = dyn_cast<Function>(CalledValue);
4403 if (
auto *GA = dyn_cast<GlobalAlias>(CalledValue)) {
4404 assert(!CalledFunction &&
4405 "Expected null called function in callsite for alias");
4406 CalledFunction = dyn_cast<Function>(GA->getAliaseeObject());
4410 I.getMetadata(LLVMContext::MD_callsite));
4411 auto *MemProfMD =
I.getMetadata(LLVMContext::MD_memprof);
4415 if (CB->getAttributes().hasFnAttr(
"memprof")) {
4417 CB->getAttributes().getFnAttr(
"memprof").getValueAsString() ==
"cold"
4418 ? AllocTypeColdThinBackend++
4419 : AllocTypeNotColdThinBackend++;
4420 OrigAllocsThinBackend++;
4421 AllocVersionsThinBackend++;
4422 if (!MaxAllocVersionsThinBackend)
4423 MaxAllocVersionsThinBackend = 1;
4430 auto &AllocNode = *(AI++);
4434 auto MIBIter = AllocNode.MIBs.begin();
4435 for (
auto &MDOp : MemProfMD->operands()) {
4436 assert(MIBIter != AllocNode.MIBs.end());
4438 MIBIter->StackIdIndices.begin();
4439 auto *MIBMD = cast<const MDNode>(MDOp);
4443 auto ContextIterBegin =
4447 (ContextIterBegin != StackContext.
end() &&
4448 *ContextIterBegin == 0)
4451 for (
auto ContextIter = ContextIterBegin;
4452 ContextIter != StackContext.
end(); ++ContextIter) {
4456 if (LastStackContextId == *ContextIter)
4458 LastStackContextId = *ContextIter;
4459 assert(StackIdIndexIter != MIBIter->StackIdIndices.end());
4468 CloneFuncIfNeeded(AllocNode.Versions.size());
4470 OrigAllocsThinBackend++;
4471 AllocVersionsThinBackend += AllocNode.Versions.size();
4472 if (MaxAllocVersionsThinBackend < AllocNode.Versions.size())
4473 MaxAllocVersionsThinBackend = AllocNode.Versions.size();
4483 if (AllocNode.Versions.size() == 1 &&
4489 UnclonableAllocsThinBackend++;
4495 return Type == ((uint8_t)AllocationType::NotCold |
4496 (uint8_t)AllocationType::Cold);
4500 for (
unsigned J = 0; J < AllocNode.Versions.size(); J++) {
4506 : AllocTypeNotColdThinBackend++;
4518 CBClone = cast<CallBase>((*VMaps[J - 1])[CB]);
4521 <<
ore::NV(
"AllocationCall", CBClone) <<
" in clone "
4523 <<
" marked with memprof allocation attribute "
4524 <<
ore::NV(
"Attribute", AllocTypeString));
4526 }
else if (!CallsiteContext.empty()) {
4527 if (!CalledFunction) {
4530 auto *CI = dyn_cast<CallInst>(CB);
4531 assert(!CI || !CI->isInlineAsm());
4534 assert(CalledValue && !isa<Constant>(CalledValue));
4541 recordICPInfo(CB,
FS->callsites(), SI, ICallAnalysisInfo);
4547 CloneFuncIfNeeded(NumClones);
4552 assert(SI !=
FS->callsites().end());
4558 auto StackIdIndexIter =
StackNode.StackIdIndices.begin();
4559 for (
auto StackId : CallsiteContext) {
4567 CloneCallsite(
StackNode, CB, CalledFunction);
4569 }
else if (CB->isTailCall() && CalledFunction) {
4574 if (CalleeVI && MapTailCallCalleeVIToCallsite.
count(CalleeVI)) {
4575 auto Callsite = MapTailCallCalleeVIToCallsite.
find(CalleeVI);
4576 assert(Callsite != MapTailCallCalleeVIToCallsite.
end());
4577 CloneCallsite(Callsite->second, CB, CalledFunction);
4584 performICP(M,
FS->callsites(), VMaps, ICallAnalysisInfo, ORE);
4594 for (
auto &BB :
F) {
4595 for (
auto &
I : BB) {
4596 if (!isa<CallBase>(
I))
4598 I.setMetadata(LLVMContext::MD_memprof,
nullptr);
4599 I.setMetadata(LLVMContext::MD_callsite,
nullptr);
4607unsigned MemProfContextDisambiguation::recordICPInfo(
4614 auto CandidateProfileData =
4615 ICallAnalysis->getPromotionCandidatesForInstruction(CB, TotalCount,
4617 if (CandidateProfileData.empty())
4623 bool ICPNeeded =
false;
4624 unsigned NumClones = 0;
4625 size_t CallsiteInfoStartIndex = std::distance(AllCallsites.
begin(), SI);
4626 for (
const auto &Candidate : CandidateProfileData) {
4628 auto CalleeValueInfo =
4633 assert(!CalleeValueInfo ||
SI->Callee == CalleeValueInfo);
4640 [](
unsigned CloneNo) { return CloneNo != 0; });
4650 ICallAnalysisInfo.
push_back({CB, CandidateProfileData.vec(), NumCandidates,
4651 TotalCount, CallsiteInfoStartIndex});
4655void MemProfContextDisambiguation::performICP(
4657 ArrayRef<std::unique_ptr<ValueToValueMapTy>> VMaps,
4666 for (
auto &
Info : ICallAnalysisInfo) {
4668 auto CallsiteIndex =
Info.CallsiteInfoStartIndex;
4669 auto TotalCount =
Info.TotalCount;
4670 unsigned NumPromoted = 0;
4671 unsigned NumClones = 0;
4673 for (
auto &Candidate :
Info.CandidateProfileData) {
4674 auto &
StackNode = AllCallsites[CallsiteIndex++];
4684 Function *TargetFunction = Symtab->getFunction(Candidate.Value);
4685 if (TargetFunction ==
nullptr ||
4694 <<
"Memprof cannot promote indirect call: target with md5sum "
4695 <<
ore::NV(
"target md5sum", Candidate.Value) <<
" not found";
4704 const char *Reason =
nullptr;
4708 <<
"Memprof cannot promote indirect call to "
4709 <<
ore::NV(
"TargetFunction", TargetFunction)
4710 <<
" with count of " <<
ore::NV(
"TotalCount", TotalCount)
4722 for (
unsigned J = 0; J < NumClones; J++) {
4725 CBClone = cast<CallBase>((*VMaps[J - 1])[CB]);
4731 TotalCount, isSamplePGO, &ORE);
4732 auto *TargetToUse = TargetFunction;
4737 cast<Function>(
M.getOrInsertFunction(
4745 <<
ore::NV(
"Call", CBClone) <<
" in clone "
4747 <<
" promoted and assigned to call function clone "
4748 <<
ore::NV(
"Callee", TargetToUse));
4752 TotalCount -= Candidate.Count;
4758 for (
unsigned J = 0; J < NumClones; J++) {
4761 CBClone = cast<CallBase>((*VMaps[J - 1])[CB]);
4763 CBClone->
setMetadata(LLVMContext::MD_prof,
nullptr);
4766 if (TotalCount != 0)
4769 TotalCount, IPVK_IndirectCallTarget,
Info.NumCandidates);
4774template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4775bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::process() {
4777 dbgs() <<
"CCG before cloning:\n";
4781 exportToDot(
"postbuild");
4794 dbgs() <<
"CCG after cloning:\n";
4798 exportToDot(
"cloned");
4800 bool Changed = assignFunctions();
4803 dbgs() <<
"CCG after assigning function clones:\n";
4807 exportToDot(
"clonefuncassign");
4810 printTotalSizes(
errs());
4815bool MemProfContextDisambiguation::processModule(
4822 return applyImport(M);
4835 ModuleCallsiteContextGraph CCG(M, OREGetter);
4836 return CCG.process();
4841 : ImportSummary(Summary), isSamplePGO(isSamplePGO) {
4842 if (ImportSummary) {
4852 auto ReadSummaryFile =
4854 if (!ReadSummaryFile) {
4861 if (!ImportSummaryForTestingOrErr) {
4867 ImportSummaryForTesting = std::move(*ImportSummaryForTestingOrErr);
4868 ImportSummary = ImportSummaryForTesting.get();
4877 if (!processModule(M, OREGetter))
4894 IndexCallsiteContextGraph CCG(
Index, isPrevailing);
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
#define LLVM_ATTRIBUTE_UNUSED
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
Module.h This file contains the declarations for the Module class.
This file implements a map that provides insertion order iteration.
static SmallVector< std::unique_ptr< ValueToValueMapTy >, 4 > createFunctionClones(Function &F, unsigned NumClones, Module &M, OptimizationRemarkEmitter &ORE, std::map< const Function *, SmallPtrSet< const GlobalAlias *, 1 > > &FuncToAliasMap)
static cl::opt< unsigned > TailCallSearchDepth("memprof-tail-call-search-depth", cl::init(5), cl::Hidden, cl::desc("Max depth to recursively search for missing " "frames through tail calls."))
static bool isMemProfClone(const Function &F)
static cl::opt< bool > ExportToDot("memprof-export-to-dot", cl::init(false), cl::Hidden, cl::desc("Export graph to dot files."))
static void checkEdge(const std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > &Edge)
bool checkColdOrNotCold(uint8_t AllocType)
static ValueInfo findValueInfoForFunc(const Function &F, const Module &M, const ModuleSummaryIndex *ImportSummary)
cl::opt< unsigned > MinClonedColdBytePercent
static std::string getAllocTypeString(uint8_t AllocTypes)
cl::opt< bool > MemProfReportHintedSizes
static cl::opt< bool > VerifyCCG("memprof-verify-ccg", cl::init(false), cl::Hidden, cl::desc("Perform verification checks on CallingContextGraph."))
static void checkNode(const ContextNode< DerivedCCG, FuncTy, CallTy > *Node, bool CheckEdges=true)
static std::string getMemProfFuncName(Twine Base, unsigned CloneNo)
static cl::opt< std::string > MemProfImportSummary("memprof-import-summary", cl::desc("Import summary to use for testing the ThinLTO backend via opt"), cl::Hidden)
static const std::string MemProfCloneSuffix
static cl::opt< std::string > DotFilePathPrefix("memprof-dot-file-path-prefix", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path prefix of the MemProf dot files."))
static cl::opt< bool > VerifyNodes("memprof-verify-nodes", cl::init(false), cl::Hidden, cl::desc("Perform frequent verification checks on nodes."))
static cl::opt< bool > DumpCCG("memprof-dump-ccg", cl::init(false), cl::Hidden, cl::desc("Dump CallingContextGraph to stdout after each stage."))
cl::opt< bool > MemProfReportHintedSizes("memprof-report-hinted-sizes", cl::init(false), cl::Hidden, cl::desc("Report total allocation sizes of hinted allocations"))
This is the interface to build a ModuleSummaryIndex for a module.
ModuleSummaryIndex.h This file contains the declarations the classes that hold the module index and s...
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
FunctionAnalysisManager FAM
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines generic set operations that may be used on set's of different types,...
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
void print(OutputBuffer &OB) const
DomTreeNode::const_iterator end() const
Alias summary information.
ValueInfo getAliaseeVI() const
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
bool empty() const
empty - Check if the array is empty.
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
static Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
void addFnAttr(Attribute::AttrKind Kind)
Adds the attribute to the function.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
iterator find(const_arg_type_t< KeyT > Val)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
void reserve(size_type NumEntries)
Grow the densemap so that it can contain at least NumEntries items before resizing again.
Implements a dense probed hash-table based set.
Lightweight error class with error context and mandatory checking.
Function summary information to aid decisions and implementation of importing.
FunctionType * getFunctionType() const
Returns the FunctionType for me.
static GlobalAlias * create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent)
If a parent module is specified, the alias is automatically inserted into the end of the specified mo...
Function and variable summary information to aid decisions and implementation of importing.
static bool isLocalLinkage(LinkageTypes Linkage)
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
GUID getGUID() const
Return a 64-bit global unique ID constructed from global value name (i.e.
std::string getGlobalIdentifier() const
Return the modified name for this global value suitable to be used as the key for a global lookup (e....
@ InternalLinkage
Rename collisions when linking (static functions).
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
const Function * getFunction() const
Return the function this instruction belongs to.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
const MDOperand & getOperand(unsigned I) const
unsigned getNumOperands() const
Return number of MDNode operands.
This class implements a map that also provides access to all stored values in a deterministic order.
size_type count(const KeyT &Key) const
MemProfContextDisambiguation(const ModuleSummaryIndex *Summary=nullptr, bool isSamplePGO=false)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
Class to hold module path string table and global value map, and encapsulate methods for operating on...
static StringRef getOriginalNameBeforePromote(StringRef Name)
Helper to obtain the unpromoted name for a global value (or the original name if not promoted).
ValueInfo getValueInfo(const GlobalValueSummaryMapTy::value_type &R) const
Return a ValueInfo for the index value_type (convenient when iterating index).
uint64_t getStackIdAtIndex(unsigned Index) const
GlobalValueSummary * findSummaryInModule(ValueInfo VI, StringRef ModuleId) const
Find the summary for ValueInfo VI in module ModuleId, or nullptr if not found.
GlobalValue::GUID getGUIDFromOriginalID(GlobalValue::GUID OriginalID) const
Return the GUID for OriginalId in the OidGuidMap.
A Module instance is used to store all the information related to an LLVM module.
A NodeSet contains a set of SUnit DAG nodes with additional information that assigns a priority to th...
A discriminated union of two or more pointer types, with the discriminator in the low bit of the poin...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM Value Representation.
StringRef getName() const
Return a constant reference to the value's name.
std::pair< iterator, bool > insert(const ValueT &V)
void reserve(size_t Size)
Grow the DenseSet so that it can contain at least NumEntries items before resizing again.
void swap(DenseSetImpl &RHS)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
An efficient, type-erasing, non-owning reference to a callable.
Helper class to iterate through stack ids in both metadata (memprof MIB and callsite) and the corresp...
CallStackIterator end() const
CallStackIterator beginAfterSharedPrefix(CallStack &Other)
This class implements an extremely fast bulk output stream that can only output to a stream.
@ C
The default llvm calling convention, compatible with C.
@ CE
Windows NT (Windows on ARM)
initializer< Ty > init(const Ty &Val)
AllocationType getMIBAllocType(const MDNode *MIB)
Returns the allocation type from an MIB metadata node.
bool hasSingleAllocType(uint8_t AllocTypes)
True if the AllocTypes bitmask contains just a single type.
std::string getAllocTypeAttributeString(AllocationType Type)
Returns the string to use in attributes with the given type.
MDNode * getMIBStackNode(const MDNode *MIB)
Returns the stack node from an MIB metadata node.
DiagnosticInfoOptimizationBase::Argument NV
CallBase & promoteIndirectCall(CallBase &CB, Function *F, uint64_t Count, uint64_t TotalCount, bool AttachProfToDirectCall, OptimizationRemarkEmitter *ORE)
NodeAddr< FuncNode * > Func
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
void logAllUnhandledErrors(Error E, raw_ostream &OS, Twine ErrorBanner={})
Log all errors (if any) in E to OS.
bool isLegalToPromote(const CallBase &CB, Function *Callee, const char **FailureReason=nullptr)
Return true if the given indirect call site can be made to call Callee.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
cl::opt< bool > MemProfRequireDefinitionForPromotion("memprof-require-definition-for-promotion", cl::init(false), cl::Hidden, cl::desc("Require target function definition when promoting indirect calls"))
bool set_is_subset(const S1Ty &S1, const S2Ty &S2)
set_is_subset(A, B) - Return true iff A in B
void set_subtract(S1Ty &S1, const S2Ty &S2)
set_subtract(A, B) - Compute A := A - B
raw_ostream & WriteGraph(raw_ostream &O, const GraphType &G, bool ShortNames=false, const Twine &Title="")
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr)
Expected< std::unique_ptr< ModuleSummaryIndex > > getModuleSummaryIndex(MemoryBufferRef Buffer)
Parse the specified bitcode buffer, returning the module summary index.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
bool set_union(S1Ty &S1, const S2Ty &S2)
set_union(A, B) - Compute A := A u B, return whether A changed.
cl::opt< bool > SupportsHotColdNew
Indicate we are linking with an allocator that supports hot/cold operator new interfaces.
S1Ty set_intersection(const S1Ty &S1, const S2Ty &S2)
set_intersection(A, B) - Return A ^ B
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
cl::opt< bool > EnableMemProfContextDisambiguation
Enable MemProf context disambiguation for thin link.
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Expected< T > errorOrToExpected(ErrorOr< T > &&EO)
Convert an ErrorOr<T> to an Expected<T>.
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
const char * toString(DWARFSectionKind Kind)
Function * CloneFunction(Function *F, ValueToValueMapTy &VMap, ClonedCodeInfo *CodeInfo=nullptr)
Return a copy of the specified function and add it to that function's module.
bool mayHaveMemprofSummary(const CallBase *CB)
Returns true if the instruction could have memprof metadata, used to ensure consistency between summa...
Implement std::hash so that hash_code can be used in STL containers.
DOTGraphTraits(bool IsSimple=false)
typename GTraits::NodeRef NodeRef
static std::string getEdgeAttributes(NodeRef, ChildIteratorType ChildIter, GraphType)
const CallsiteContextGraph< DerivedCCG, FuncTy, CallTy > * GraphType
typename GTraits::ChildIteratorType ChildIteratorType
static bool isNodeHidden(NodeRef Node, GraphType)
static std::string getNodeLabel(NodeRef Node, GraphType G)
static std::string getNodeAttributes(NodeRef Node, GraphType)
static NodeRef getNode(const NodePtrTy &P)
static const ContextNode< DerivedCCG, FuncTy, CallTy > * GetCallee(const EdgePtrTy &P)
static ChildIteratorType child_end(NodeRef N)
std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > EdgePtrTy
const CallsiteContextGraph< DerivedCCG, FuncTy, CallTy > * GraphType
const ContextNode< DerivedCCG, FuncTy, CallTy > * NodeRef
static ChildIteratorType child_begin(NodeRef N)
std::unique_ptr< ContextNode< DerivedCCG, FuncTy, CallTy > > NodePtrTy
static NodeRef getEntryNode(GraphType G)
static nodes_iterator nodes_begin(GraphType G)
static nodes_iterator nodes_end(GraphType G)
Summary of memprof metadata on allocations.
SmallVector< uint8_t > Versions
Summary of memprof callsite metadata.
SmallVector< unsigned > Clones
DOTGraphTraits - Template class that can be specialized to customize how graphs are converted to 'dot...
DefaultDOTGraphTraits - This class provides the default implementations of all of the DOTGraphTraits ...
An information struct used to provide DenseMap with the various necessary components for a given valu...
Used in the streaming interface as the general argument type.
typename GraphType::UnknownGraphTypeError NodeRef
Struct that holds a reference to a particular GUID in a global value summary.
ArrayRef< std::unique_ptr< GlobalValueSummary > > getSummaryList() const
GlobalValue::GUID getGUID() const