49#include <unordered_map>
54#define DEBUG_TYPE "memprof-context-disambiguation"
57 "Number of function clones created during whole program analysis");
59 "Number of function clones created during ThinLTO backend");
61 "Number of functions that had clones created during ThinLTO backend");
62STATISTIC(AllocTypeNotCold,
"Number of not cold static allocations (possibly "
63 "cloned) during whole program analysis");
64STATISTIC(AllocTypeCold,
"Number of cold static allocations (possibly cloned) "
65 "during whole program analysis");
67 "Number of not cold static allocations (possibly cloned) during "
69STATISTIC(AllocTypeColdThinBackend,
"Number of cold static allocations "
70 "(possibly cloned) during ThinLTO backend");
72 "Number of original (not cloned) allocations with memprof profiles "
73 "during ThinLTO backend");
75 AllocVersionsThinBackend,
76 "Number of allocation versions (including clones) during ThinLTO backend");
78 "Maximum number of allocation versions created for an original "
79 "allocation during ThinLTO backend");
81 "Number of unclonable ambigous allocations during ThinLTO backend");
83 "Number of edges removed due to mismatched callees (profiled vs IR)");
85 "Number of profiled callees found via tail calls");
87 "Aggregate depth of profiled callees found via tail calls");
89 "Maximum depth of profiled callees found via tail calls");
91 "Number of profiled callees found via multiple tail call chains");
96 cl::desc(
"Specify the path prefix of the MemProf dot files."));
100 cl::desc(
"Export graph to dot files."));
104 cl::desc(
"Dump CallingContextGraph to stdout after each stage."));
108 cl::desc(
"Perform verification checks on CallingContextGraph."));
112 cl::desc(
"Perform frequent verification checks on nodes."));
115 "memprof-import-summary",
116 cl::desc(
"Import summary to use for testing the ThinLTO backend via opt"),
122 cl::desc(
"Max depth to recursively search for missing "
123 "frames through tail calls."));
134 cl::desc(
"Linking with hot/cold operator new interfaces"));
152template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
153class CallsiteContextGraph {
155 CallsiteContextGraph() =
default;
156 CallsiteContextGraph(
const CallsiteContextGraph &) =
default;
157 CallsiteContextGraph(CallsiteContextGraph &&) =
default;
164 void identifyClones();
171 bool assignFunctions();
177 const CallsiteContextGraph &CCG) {
183 const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>;
185 const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>;
187 void exportToDot(std::string Label)
const;
190 struct FuncInfo final
191 :
public std::pair<FuncTy *, unsigned > {
192 using Base = std::pair<FuncTy *, unsigned>;
193 FuncInfo(
const Base &
B) :
Base(
B) {}
194 FuncInfo(FuncTy *
F =
nullptr,
unsigned CloneNo = 0) :
Base(
F, CloneNo) {}
195 explicit operator bool()
const {
return this->first !=
nullptr; }
196 FuncTy *func()
const {
return this->first; }
197 unsigned cloneNo()
const {
return this->second; }
201 struct CallInfo final :
public std::pair<CallTy, unsigned > {
202 using Base = std::pair<CallTy, unsigned>;
204 CallInfo(CallTy Call =
nullptr,
unsigned CloneNo = 0)
206 explicit operator bool()
const {
return (
bool)this->first; }
207 CallTy call()
const {
return this->first; }
208 unsigned cloneNo()
const {
return this->second; }
209 void setCloneNo(
unsigned N) { this->second =
N; }
211 if (!
operator bool()) {
217 OS <<
"\t(clone " << cloneNo() <<
")";
240 bool Recursive =
false;
256 uint8_t AllocTypes = 0;
260 std::vector<std::shared_ptr<ContextEdge>> CalleeEdges;
264 std::vector<std::shared_ptr<ContextEdge>> CallerEdges;
268 const std::vector<std::shared_ptr<ContextEdge>> *
269 getEdgesWithAllocInfo()
const {
272 if (!CalleeEdges.empty())
274 if (!CallerEdges.empty()) {
287 auto *Edges = getEdgesWithAllocInfo();
291 for (
auto &Edge : *Edges)
292 Count += Edge->getContextIds().size();
294 for (
auto &Edge : *Edges)
295 ContextIds.
insert(Edge->getContextIds().begin(),
296 Edge->getContextIds().end());
302 uint8_t computeAllocType()
const {
303 auto *Edges = getEdgesWithAllocInfo();
305 return (uint8_t)AllocationType::None;
307 (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
308 uint8_t
AllocType = (uint8_t)AllocationType::None;
309 for (
auto &Edge : *Edges) {
320 bool emptyContextIds()
const {
321 auto *Edges = getEdgesWithAllocInfo();
324 for (
auto &Edge : *Edges) {
325 if (!Edge->getContextIds().empty())
332 std::vector<ContextNode *> Clones;
335 ContextNode *CloneOf =
nullptr;
337 ContextNode(
bool IsAllocation) : IsAllocation(IsAllocation),
Call() {}
339 ContextNode(
bool IsAllocation,
CallInfo C)
340 : IsAllocation(IsAllocation),
Call(
C) {}
342 void addClone(ContextNode *Clone) {
344 CloneOf->Clones.push_back(Clone);
345 Clone->CloneOf = CloneOf;
347 Clones.push_back(Clone);
349 Clone->CloneOf =
this;
353 ContextNode *getOrigNode() {
360 unsigned int ContextId);
362 ContextEdge *findEdgeFromCallee(
const ContextNode *Callee);
363 ContextEdge *findEdgeFromCaller(
const ContextNode *Caller);
364 void eraseCalleeEdge(
const ContextEdge *Edge);
365 void eraseCallerEdge(
const ContextEdge *Edge);
369 bool hasCall()
const {
return (
bool)
Call.call(); }
375 bool isRemoved()
const {
376 assert((AllocTypes == (uint8_t)AllocationType::None) ==
378 return AllocTypes == (uint8_t)AllocationType::None;
398 uint8_t AllocTypes = 0;
403 ContextEdge(ContextNode *Callee, ContextNode *Caller, uint8_t
AllocType,
406 ContextIds(
std::
move(ContextIds)) {}
421 void removeNoneTypeCalleeEdges(ContextNode *
Node);
423 recursivelyRemoveNoneTypeCalleeEdges(ContextNode *
Node,
429 template <
class NodeT,
class IteratorT>
430 std::vector<uint64_t>
435 ContextNode *addAllocNode(
CallInfo Call,
const FuncTy *
F);
438 template <
class NodeT,
class IteratorT>
439 void addStackNodesForMIB(ContextNode *AllocNode,
447 void updateStackNodes();
451 void handleCallsitesWithMultipleTargets();
458 std::map<const ContextNode *, const FuncTy *> NodeToCallingFunc;
461 using EdgeIter =
typename std::vector<std::shared_ptr<ContextEdge>>::iterator;
463 using CallContextInfo = std::tuple<CallTy, std::vector<uint64_t>,
472 void assignStackNodesPostOrder(
484 void propagateDuplicateContextIds(
490 void connectNewNode(ContextNode *NewNode, ContextNode *OrigNode,
498 return static_cast<const DerivedCCG *
>(
this)->getStackId(IdOrIndex);
509 calleesMatch(CallTy Call, EdgeIter &EI,
515 bool calleeMatchesFunc(
516 CallTy Call,
const FuncTy *Func,
const FuncTy *CallerFunc,
517 std::vector<std::pair<CallTy, FuncTy *>> &FoundCalleeChain) {
518 return static_cast<DerivedCCG *
>(
this)->calleeMatchesFunc(
519 Call, Func, CallerFunc, FoundCalleeChain);
524 std::vector<uint64_t> getStackIdsWithContextNodesForCall(CallTy Call) {
525 return static_cast<DerivedCCG *
>(
this)->getStackIdsWithContextNodesForCall(
530 uint64_t getLastStackId(CallTy Call) {
531 return static_cast<DerivedCCG *
>(
this)->getLastStackId(Call);
536 AllocType == AllocationType::Cold ? AllocTypeCold++ : AllocTypeNotCold++;
537 static_cast<DerivedCCG *
>(
this)->updateAllocationCall(Call,
AllocType);
542 void updateCall(
CallInfo &CallerCall, FuncInfo CalleeFunc) {
543 static_cast<DerivedCCG *
>(
this)->updateCall(CallerCall, CalleeFunc);
549 FuncInfo cloneFunctionForCallsite(
550 FuncInfo &Func,
CallInfo &Call, std::map<CallInfo, CallInfo> &CallMap,
551 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
552 return static_cast<DerivedCCG *
>(
this)->cloneFunctionForCallsite(
553 Func, Call, CallMap, CallsWithMetadataInFunc, CloneNo);
558 std::string getLabel(
const FuncTy *Func,
const CallTy Call,
559 unsigned CloneNo)
const {
560 return static_cast<const DerivedCCG *
>(
this)->getLabel(Func, Call, CloneNo);
564 ContextNode *getNodeForInst(
const CallInfo &
C);
565 ContextNode *getNodeForAlloc(
const CallInfo &
C);
566 ContextNode *getNodeForStackId(
uint64_t StackId);
589 moveEdgeToNewCalleeClone(
const std::shared_ptr<ContextEdge> &Edge,
590 EdgeIter *CallerEdgeI =
nullptr,
598 void moveEdgeToExistingCalleeClone(
const std::shared_ptr<ContextEdge> &Edge,
599 ContextNode *NewCallee,
600 EdgeIter *CallerEdgeI =
nullptr,
601 bool NewClone =
false,
625 std::vector<std::unique_ptr<ContextNode>> NodeOwner;
631 unsigned int LastContextId = 0;
634template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
636 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode;
637template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
639 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge;
640template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
642 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::FuncInfo;
643template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
645 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::CallInfo;
648class ModuleCallsiteContextGraph
649 :
public CallsiteContextGraph<ModuleCallsiteContextGraph, Function,
652 ModuleCallsiteContextGraph(
657 friend CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
661 bool calleeMatchesFunc(
663 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain);
664 bool findProfiledCalleeThroughTailCalls(
666 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain,
667 bool &FoundMultipleCalleeChains);
669 std::vector<uint64_t> getStackIdsWithContextNodesForCall(
Instruction *Call);
671 void updateCall(
CallInfo &CallerCall, FuncInfo CalleeFunc);
672 CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
674 cloneFunctionForCallsite(FuncInfo &Func,
CallInfo &Call,
675 std::map<CallInfo, CallInfo> &CallMap,
676 std::vector<CallInfo> &CallsWithMetadataInFunc,
679 unsigned CloneNo)
const;
688struct IndexCall :
public PointerUnion<CallsiteInfo *, AllocInfo *> {
690 IndexCall(std::nullptr_t) : IndexCall() {}
695 IndexCall *operator->() {
return this; }
700 if (
auto *AI = llvm::dyn_cast_if_present<AllocInfo *>(getBase())) {
703 auto *CI = llvm::dyn_cast_if_present<CallsiteInfo *>(getBase());
711class IndexCallsiteContextGraph
712 :
public CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
715 IndexCallsiteContextGraph(
720 ~IndexCallsiteContextGraph() {
725 for (
auto &
I : FunctionCalleesToSynthesizedCallsiteInfos) {
727 for (
auto &Callsite :
I.second)
728 FS->addCallsite(*Callsite.second);
733 friend CallsiteContextGraph<IndexCallsiteContextGraph,
FunctionSummary,
737 bool calleeMatchesFunc(
740 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain);
741 bool findProfiledCalleeThroughTailCalls(
743 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain,
744 bool &FoundMultipleCalleeChains);
745 uint64_t getLastStackId(IndexCall &Call);
746 std::vector<uint64_t> getStackIdsWithContextNodesForCall(IndexCall &Call);
748 void updateCall(
CallInfo &CallerCall, FuncInfo CalleeFunc);
751 cloneFunctionForCallsite(FuncInfo &Func,
CallInfo &Call,
752 std::map<CallInfo, CallInfo> &CallMap,
753 std::vector<CallInfo> &CallsWithMetadataInFunc,
755 std::string getLabel(
const FunctionSummary *Func,
const IndexCall &Call,
756 unsigned CloneNo)
const;
760 std::map<const FunctionSummary *, ValueInfo> FSToVIMap;
771 std::map<ValueInfo, std::unique_ptr<CallsiteInfo>>>
772 FunctionCalleesToSynthesizedCallsiteInfos;
784 :
public DenseMapInfo<std::pair<IndexCall, unsigned>> {};
787 :
public DenseMapInfo<PointerUnion<CallsiteInfo *, AllocInfo *>> {};
792struct FieldSeparator {
796 FieldSeparator(
const char *Sep =
", ") : Sep(Sep) {}
813 assert(AllocTypes != (uint8_t)AllocationType::None);
815 ((uint8_t)AllocationType::NotCold | (uint8_t)AllocationType::Cold))
816 return AllocationType::NotCold;
824template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
826 const std::vector<uint8_t> &InAllocTypes,
827 const std::vector<std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>>
830 InAllocTypes.begin(), InAllocTypes.end(), Edges.begin(),
832 const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &r) {
836 if (l == (uint8_t)AllocationType::None ||
837 r->AllocTypes == (uint8_t)AllocationType::None)
839 return allocTypeToUse(l) == allocTypeToUse(r->AllocTypes);
845template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
846typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
847CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForInst(
849 ContextNode *
Node = getNodeForAlloc(
C);
853 return NonAllocationCallToContextNodeMap.lookup(
C);
856template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
857typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
858CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForAlloc(
860 return AllocationCallToContextNodeMap.lookup(
C);
863template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
864typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
865CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForStackId(
867 auto StackEntryNode = StackEntryIdToContextNodeMap.find(StackId);
868 if (StackEntryNode != StackEntryIdToContextNodeMap.end())
869 return StackEntryNode->second;
873template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
874void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
876 unsigned int ContextId) {
877 for (
auto &Edge : CallerEdges) {
878 if (Edge->Caller == Caller) {
880 Edge->getContextIds().insert(ContextId);
884 std::shared_ptr<ContextEdge> Edge = std::make_shared<ContextEdge>(
886 CallerEdges.push_back(Edge);
887 Caller->CalleeEdges.push_back(Edge);
890template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
891void CallsiteContextGraph<
892 DerivedCCG, FuncTy, CallTy>::removeNoneTypeCalleeEdges(ContextNode *
Node) {
893 for (
auto EI =
Node->CalleeEdges.begin(); EI !=
Node->CalleeEdges.end();) {
895 if (Edge->AllocTypes == (uint8_t)AllocationType::None) {
896 assert(Edge->ContextIds.empty());
897 Edge->Callee->eraseCallerEdge(Edge.get());
898 EI =
Node->CalleeEdges.erase(EI);
904template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
905typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge *
906CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
907 findEdgeFromCallee(
const ContextNode *Callee) {
908 for (
const auto &Edge : CalleeEdges)
909 if (Edge->Callee == Callee)
914template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
915typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge *
916CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
917 findEdgeFromCaller(
const ContextNode *Caller) {
918 for (
const auto &Edge : CallerEdges)
919 if (Edge->Caller == Caller)
924template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
925void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
926 eraseCalleeEdge(
const ContextEdge *Edge) {
928 CalleeEdges, [Edge](
const std::shared_ptr<ContextEdge> &CalleeEdge) {
929 return CalleeEdge.get() == Edge;
931 assert(EI != CalleeEdges.end());
932 CalleeEdges.erase(EI);
935template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
936void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
937 eraseCallerEdge(
const ContextEdge *Edge) {
939 CallerEdges, [Edge](
const std::shared_ptr<ContextEdge> &CallerEdge) {
940 return CallerEdge.get() == Edge;
942 assert(EI != CallerEdges.end());
943 CallerEdges.erase(EI);
946template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
947uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::computeAllocType(
950 (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
951 uint8_t
AllocType = (uint8_t)AllocationType::None;
952 for (
auto Id : ContextIds) {
953 AllocType |= (uint8_t)ContextIdToAllocationType[Id];
961template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
963CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypesImpl(
966 (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
967 uint8_t
AllocType = (uint8_t)AllocationType::None;
968 for (
auto Id : Node1Ids) {
969 if (!Node2Ids.
count(Id))
971 AllocType |= (uint8_t)ContextIdToAllocationType[Id];
979template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
980uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypes(
982 if (Node1Ids.
size() < Node2Ids.
size())
983 return intersectAllocTypesImpl(Node1Ids, Node2Ids);
985 return intersectAllocTypesImpl(Node2Ids, Node1Ids);
988template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
989typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
990CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addAllocNode(
992 assert(!getNodeForAlloc(Call));
994 std::make_unique<ContextNode>(
true, Call));
995 ContextNode *AllocNode = NodeOwner.back().get();
996 AllocationCallToContextNodeMap[
Call] = AllocNode;
997 NodeToCallingFunc[AllocNode] =
F;
999 AllocNode->OrigStackOrAllocId = LastContextId;
1002 AllocNode->AllocTypes = (uint8_t)AllocationType::None;
1007template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1008template <
class NodeT,
class IteratorT>
1009void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addStackNodesForMIB(
1017 ContextIdToAllocationType[++LastContextId] =
AllocType;
1020 AllocNode->AllocTypes |= (uint8_t)
AllocType;
1025 ContextNode *PrevNode = AllocNode;
1032 ContextIter != StackContext.
end(); ++ContextIter) {
1033 auto StackId = getStackId(*ContextIter);
1034 ContextNode *
StackNode = getNodeForStackId(StackId);
1036 NodeOwner.push_back(
1037 std::make_unique<ContextNode>(
false));
1039 StackEntryIdToContextNodeMap[StackId] =
StackNode;
1040 StackNode->OrigStackOrAllocId = StackId;
1051template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1053CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::duplicateContextIds(
1057 for (
auto OldId : StackSequenceContextIds) {
1058 NewContextIds.
insert(++LastContextId);
1059 OldToNewContextIds[OldId].insert(LastContextId);
1060 assert(ContextIdToAllocationType.count(OldId));
1062 ContextIdToAllocationType[LastContextId] = ContextIdToAllocationType[OldId];
1064 return NewContextIds;
1067template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1068void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
1069 propagateDuplicateContextIds(
1074 for (
auto Id : ContextIds)
1075 if (
auto NewId = OldToNewContextIds.find(Id);
1076 NewId != OldToNewContextIds.end())
1077 NewIds.
insert(NewId->second.begin(), NewId->second.end());
1082 auto UpdateCallers = [&](ContextNode *
Node,
1084 auto &&UpdateCallers) ->
void {
1085 for (
const auto &Edge :
Node->CallerEdges) {
1086 auto Inserted = Visited.insert(Edge.get());
1089 ContextNode *NextNode = Edge->Caller;
1093 if (!NewIdsToAdd.
empty()) {
1094 Edge->getContextIds().insert(NewIdsToAdd.
begin(), NewIdsToAdd.
end());
1095 UpdateCallers(NextNode, Visited, UpdateCallers);
1101 for (
auto &Entry : AllocationCallToContextNodeMap) {
1103 UpdateCallers(
Node, Visited, UpdateCallers);
1107template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1108void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::connectNewNode(
1109 ContextNode *NewNode, ContextNode *OrigNode,
bool TowardsCallee,
1114 TowardsCallee ? OrigNode->CalleeEdges : OrigNode->CallerEdges;
1116 for (
auto EI = OrigEdges.begin(); EI != OrigEdges.end();) {
1122 set_subtract(Edge->getContextIds(), RemainingContextIds, NewEdgeContextIds,
1123 NotFoundContextIds);
1124 RemainingContextIds.
swap(NotFoundContextIds);
1126 if (NewEdgeContextIds.
empty()) {
1130 if (TowardsCallee) {
1131 uint8_t NewAllocType = computeAllocType(NewEdgeContextIds);
1132 auto NewEdge = std::make_shared<ContextEdge>(
1133 Edge->Callee, NewNode, NewAllocType, std::move(NewEdgeContextIds));
1134 NewNode->CalleeEdges.push_back(NewEdge);
1135 NewEdge->Callee->CallerEdges.push_back(NewEdge);
1137 uint8_t NewAllocType = computeAllocType(NewEdgeContextIds);
1138 auto NewEdge = std::make_shared<ContextEdge>(
1139 NewNode, Edge->Caller, NewAllocType, std::move(NewEdgeContextIds));
1140 NewNode->CallerEdges.push_back(NewEdge);
1141 NewEdge->Caller->CalleeEdges.push_back(NewEdge);
1144 if (Edge->getContextIds().empty()) {
1145 if (TowardsCallee) {
1146 Edge->Callee->eraseCallerEdge(Edge.get());
1147 EI = OrigNode->CalleeEdges.erase(EI);
1149 Edge->Caller->eraseCalleeEdge(Edge.get());
1150 EI = OrigNode->CallerEdges.erase(EI);
1158template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1160 const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &Edge) {
1163 assert(Edge->AllocTypes != (uint8_t)AllocationType::None);
1164 assert(!Edge->ContextIds.empty());
1167template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1169 bool CheckEdges =
true) {
1170 if (
Node->isRemoved())
1174 auto NodeContextIds =
Node->getContextIds();
1178 if (
Node->CallerEdges.size()) {
1180 Node->CallerEdges.front()->ContextIds);
1183 checkEdge<DerivedCCG, FuncTy, CallTy>(Edge);
1184 set_union(CallerEdgeContextIds, Edge->ContextIds);
1188 assert(NodeContextIds == CallerEdgeContextIds ||
1191 if (
Node->CalleeEdges.size()) {
1193 Node->CalleeEdges.front()->ContextIds);
1196 checkEdge<DerivedCCG, FuncTy, CallTy>(Edge);
1197 set_union(CalleeEdgeContextIds, Edge->getContextIds());
1199 assert(NodeContextIds == CalleeEdgeContextIds);
1203template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1204void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
1205 assignStackNodesPostOrder(ContextNode *
Node,
1208 &StackIdToMatchingCalls) {
1216 auto CallerEdges =
Node->CallerEdges;
1217 for (
auto &Edge : CallerEdges) {
1221 assignStackNodesPostOrder(Edge->Caller, Visited, StackIdToMatchingCalls);
1230 if (
Node->IsAllocation ||
1231 !StackIdToMatchingCalls.count(
Node->OrigStackOrAllocId))
1234 auto &Calls = StackIdToMatchingCalls[
Node->OrigStackOrAllocId];
1238 if (Calls.size() == 1) {
1239 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[0];
1240 if (Ids.size() == 1) {
1241 assert(SavedContextIds.empty());
1244 if (
Node->Recursive)
1246 Node->setCall(Call);
1247 NonAllocationCallToContextNodeMap[
Call] =
Node;
1256 ContextNode *LastNode = getNodeForStackId(LastId);
1260 for (
unsigned I = 0;
I < Calls.size();
I++) {
1261 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[
I];
1264 if (SavedContextIds.empty())
1267 assert(LastId == Ids.back());
1269 ContextNode *FirstNode = getNodeForStackId(Ids[0]);
1279 ContextNode *PrevNode =
nullptr;
1280 for (
auto Id : Ids) {
1281 ContextNode *CurNode = getNodeForStackId(Id);
1285 assert(!CurNode->Recursive);
1290 auto *Edge = CurNode->findEdgeFromCallee(PrevNode);
1292 SavedContextIds.clear();
1299 if (SavedContextIds.empty())
1302 if (SavedContextIds.empty())
1306 NodeOwner.push_back(
1307 std::make_unique<ContextNode>(
false, Call));
1308 ContextNode *NewNode = NodeOwner.back().get();
1309 NodeToCallingFunc[NewNode] =
Func;
1310 NonAllocationCallToContextNodeMap[
Call] = NewNode;
1311 NewNode->AllocTypes = computeAllocType(SavedContextIds);
1316 connectNewNode(NewNode, FirstNode,
true, SavedContextIds);
1321 connectNewNode(NewNode, LastNode,
false, SavedContextIds);
1326 for (
auto Id : Ids) {
1327 ContextNode *CurNode = getNodeForStackId(Id);
1334 auto *PrevEdge = CurNode->findEdgeFromCallee(PrevNode);
1336 set_subtract(PrevEdge->getContextIds(), SavedContextIds);
1337 if (PrevEdge->getContextIds().empty()) {
1338 PrevNode->eraseCallerEdge(PrevEdge);
1339 CurNode->eraseCalleeEdge(PrevEdge);
1345 CurNode->AllocTypes = CurNode->CalleeEdges.empty()
1346 ? (uint8_t)AllocationType::None
1347 : CurNode->computeAllocType();
1351 checkNode<DerivedCCG, FuncTy, CallTy>(NewNode,
true);
1352 for (
auto Id : Ids) {
1353 ContextNode *CurNode = getNodeForStackId(Id);
1356 checkNode<DerivedCCG, FuncTy, CallTy>(CurNode,
true);
1362template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1363void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
1372 for (
auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) {
1373 for (
auto &Call : CallsWithMetadata) {
1375 if (AllocationCallToContextNodeMap.count(Call))
1377 auto StackIdsWithContextNodes =
1378 getStackIdsWithContextNodesForCall(
Call.call());
1381 if (StackIdsWithContextNodes.empty())
1385 StackIdToMatchingCalls[StackIdsWithContextNodes.back()].push_back(
1386 {
Call.call(), StackIdsWithContextNodes, Func, {}});
1397 for (
auto &It : StackIdToMatchingCalls) {
1398 auto &Calls = It.getSecond();
1400 if (Calls.size() == 1) {
1401 auto &Ids = std::get<1>(Calls[0]);
1402 if (Ids.size() == 1)
1411 std::stable_sort(Calls.begin(), Calls.end(),
1412 [](
const CallContextInfo &
A,
const CallContextInfo &
B) {
1413 auto &IdsA = std::get<1>(A);
1414 auto &IdsB = std::get<1>(B);
1415 return IdsA.size() > IdsB.size() ||
1416 (IdsA.size() == IdsB.size() && IdsA < IdsB);
1423 ContextNode *LastNode = getNodeForStackId(LastId);
1427 if (LastNode->Recursive)
1435 for (
unsigned I = 0;
I < Calls.size();
I++) {
1436 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[
I];
1437 assert(SavedContextIds.empty());
1438 assert(LastId == Ids.back());
1446 ContextNode *PrevNode = LastNode;
1447 ContextNode *CurNode = LastNode;
1452 for (
auto IdIter = Ids.rbegin() + 1; IdIter != Ids.rend(); IdIter++) {
1454 CurNode = getNodeForStackId(Id);
1458 if (CurNode->Recursive) {
1463 auto *Edge = CurNode->findEdgeFromCaller(PrevNode);
1481 set_intersect(StackSequenceContextIds, Edge->getContextIds());
1484 if (StackSequenceContextIds.
empty()) {
1497 if (Ids.back() != getLastStackId(Call)) {
1498 for (
const auto &PE : LastNode->CallerEdges) {
1499 set_subtract(StackSequenceContextIds, PE->getContextIds());
1500 if (StackSequenceContextIds.
empty())
1504 if (StackSequenceContextIds.
empty())
1510 bool DuplicateContextIds =
false;
1511 if (
I + 1 < Calls.size()) {
1512 auto NextIds = std::get<1>(Calls[
I + 1]);
1513 DuplicateContextIds = Ids == NextIds;
1522 OldToNewContextIds.
reserve(OldToNewContextIds.
size() +
1523 StackSequenceContextIds.
size());
1526 ? duplicateContextIds(StackSequenceContextIds, OldToNewContextIds)
1527 : StackSequenceContextIds;
1528 assert(!SavedContextIds.empty());
1530 if (!DuplicateContextIds) {
1534 set_subtract(LastNodeContextIds, StackSequenceContextIds);
1535 if (LastNodeContextIds.
empty())
1542 propagateDuplicateContextIds(OldToNewContextIds);
1553 for (
auto &Entry : AllocationCallToContextNodeMap)
1554 assignStackNodesPostOrder(
Entry.second, Visited, StackIdToMatchingCalls);
1561 Call->getMetadata(LLVMContext::MD_callsite));
1562 return CallsiteContext.
back();
1565uint64_t IndexCallsiteContextGraph::getLastStackId(IndexCall &Call) {
1568 CallsiteContext(dyn_cast_if_present<CallsiteInfo *>(
Call.getBase()));
1570 return Index.getStackIdAtIndex(CallsiteContext.
back());
1583std::string ModuleCallsiteContextGraph::getLabel(
const Function *Func,
1585 unsigned CloneNo)
const {
1586 return (
Twine(
Call->getFunction()->getName()) +
" -> " +
1587 cast<CallBase>(Call)->getCalledFunction()->getName())
1591std::string IndexCallsiteContextGraph::getLabel(
const FunctionSummary *Func,
1592 const IndexCall &Call,
1593 unsigned CloneNo)
const {
1594 auto VI = FSToVIMap.find(Func);
1595 assert(VI != FSToVIMap.end());
1596 if (isa<AllocInfo *>(
Call.getBase()))
1597 return (
VI->second.name() +
" -> alloc").str();
1599 auto *Callsite = dyn_cast_if_present<CallsiteInfo *>(
Call.getBase());
1600 return (
VI->second.name() +
" -> " +
1602 Callsite->Clones[CloneNo]))
1607std::vector<uint64_t>
1608ModuleCallsiteContextGraph::getStackIdsWithContextNodesForCall(
1611 Call->getMetadata(LLVMContext::MD_callsite));
1612 return getStackIdsWithContextNodes<MDNode, MDNode::op_iterator>(
1616std::vector<uint64_t>
1617IndexCallsiteContextGraph::getStackIdsWithContextNodesForCall(IndexCall &Call) {
1620 CallsiteContext(dyn_cast_if_present<CallsiteInfo *>(
Call.getBase()));
1626template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1627template <
class NodeT,
class IteratorT>
1628std::vector<uint64_t>
1629CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getStackIdsWithContextNodes(
1631 std::vector<uint64_t> StackIds;
1632 for (
auto IdOrIndex : CallsiteContext) {
1633 auto StackId = getStackId(IdOrIndex);
1634 ContextNode *
Node = getNodeForStackId(StackId);
1637 StackIds.push_back(StackId);
1642ModuleCallsiteContextGraph::ModuleCallsiteContextGraph(
1645 :
Mod(
M), OREGetter(OREGetter) {
1647 std::vector<CallInfo> CallsWithMetadata;
1648 for (
auto &BB :
F) {
1649 for (
auto &
I : BB) {
1650 if (!isa<CallBase>(
I))
1652 if (
auto *MemProfMD =
I.getMetadata(LLVMContext::MD_memprof)) {
1653 CallsWithMetadata.push_back(&
I);
1654 auto *AllocNode = addAllocNode(&
I, &
F);
1655 auto *CallsiteMD =
I.getMetadata(LLVMContext::MD_callsite);
1659 for (
auto &MDOp : MemProfMD->operands()) {
1660 auto *MIBMD = cast<const MDNode>(MDOp);
1664 addStackNodesForMIB<MDNode, MDNode::op_iterator>(
1665 AllocNode, StackContext, CallsiteContext,
1668 assert(AllocNode->AllocTypes != (uint8_t)AllocationType::None);
1671 I.setMetadata(LLVMContext::MD_memprof,
nullptr);
1672 I.setMetadata(LLVMContext::MD_callsite,
nullptr);
1675 else if (
I.getMetadata(LLVMContext::MD_callsite))
1676 CallsWithMetadata.push_back(&
I);
1679 if (!CallsWithMetadata.empty())
1680 FuncToCallsWithMetadata[&
F] = CallsWithMetadata;
1684 dbgs() <<
"CCG before updating call stack chains:\n";
1689 exportToDot(
"prestackupdate");
1693 handleCallsitesWithMultipleTargets();
1696 for (
auto &FuncEntry : FuncToCallsWithMetadata)
1697 for (
auto &Call : FuncEntry.second)
1698 Call.call()->setMetadata(LLVMContext::MD_callsite,
nullptr);
1701IndexCallsiteContextGraph::IndexCallsiteContextGraph(
1708 for (
auto &S :
VI.getSummaryList()) {
1717 !isPrevailing(
VI.getGUID(), S.get()))
1719 auto *
FS = dyn_cast<FunctionSummary>(S.get());
1722 std::vector<CallInfo> CallsWithMetadata;
1723 if (!
FS->allocs().empty()) {
1724 for (
auto &AN :
FS->mutableAllocs()) {
1729 if (AN.MIBs.empty())
1731 CallsWithMetadata.push_back({&AN});
1732 auto *AllocNode = addAllocNode({&AN},
FS);
1739 for (
auto &MIB : AN.MIBs) {
1742 addStackNodesForMIB<MIBInfo, SmallVector<unsigned>::const_iterator>(
1743 AllocNode, StackContext, EmptyContext, MIB.AllocType);
1745 assert(AllocNode->AllocTypes != (uint8_t)AllocationType::None);
1750 AN.Versions[0] = (uint8_t)allocTypeToUse(AllocNode->AllocTypes);
1754 if (!
FS->callsites().empty())
1755 for (
auto &SN :
FS->mutableCallsites())
1756 CallsWithMetadata.push_back({&SN});
1758 if (!CallsWithMetadata.empty())
1759 FuncToCallsWithMetadata[FS] = CallsWithMetadata;
1761 if (!
FS->allocs().empty() || !
FS->callsites().empty())
1767 dbgs() <<
"CCG before updating call stack chains:\n";
1772 exportToDot(
"prestackupdate");
1776 handleCallsitesWithMultipleTargets();
1779template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1780void CallsiteContextGraph<DerivedCCG, FuncTy,
1781 CallTy>::handleCallsitesWithMultipleTargets() {
1796 for (
auto &Entry : NonAllocationCallToContextNodeMap) {
1801 for (
auto EI =
Node->CalleeEdges.begin(); EI !=
Node->CalleeEdges.end();) {
1803 if (!Edge->Callee->hasCall()) {
1807 assert(NodeToCallingFunc.count(Edge->Callee));
1809 if (calleesMatch(Call, EI, TailCallToContextNodeMap))
1811 RemovedEdgesWithMismatchedCallees++;
1824 NonAllocationCallToContextNodeMap.remove_if(
1825 [](
const auto &it) {
return !it.second->hasCall(); });
1829 for (
auto &[Call,
Node] : TailCallToContextNodeMap)
1830 NonAllocationCallToContextNodeMap[
Call] =
Node;
1841 return Index.getStackIdAtIndex(IdOrIndex);
1844template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1845bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::calleesMatch(
1846 CallTy Call, EdgeIter &EI,
1849 const FuncTy *ProfiledCalleeFunc = NodeToCallingFunc[Edge->Callee];
1850 const FuncTy *CallerFunc = NodeToCallingFunc[Edge->Caller];
1853 std::vector<std::pair<CallTy, FuncTy *>> FoundCalleeChain;
1854 if (!calleeMatchesFunc(Call, ProfiledCalleeFunc, CallerFunc,
1855 FoundCalleeChain)) {
1861 if (FoundCalleeChain.empty()) {
1866 auto AddEdge = [Edge, &EI](ContextNode *
Caller, ContextNode *
Callee) {
1867 auto *CurEdge =
Callee->findEdgeFromCaller(Caller);
1871 CurEdge->ContextIds.insert(Edge->ContextIds.begin(),
1872 Edge->ContextIds.end());
1873 CurEdge->AllocTypes |= Edge->AllocTypes;
1878 auto NewEdge = std::make_shared<ContextEdge>(
1879 Callee, Caller, Edge->AllocTypes, Edge->ContextIds);
1880 Callee->CallerEdges.push_back(NewEdge);
1881 if (Caller == Edge->Caller) {
1885 EI =
Caller->CalleeEdges.insert(EI, NewEdge);
1888 "Iterator position not restored after insert and increment");
1890 Caller->CalleeEdges.push_back(NewEdge);
1895 auto *CurCalleeNode = Edge->Callee;
1896 for (
auto &[NewCall, Func] : FoundCalleeChain) {
1897 ContextNode *NewNode =
nullptr;
1899 if (TailCallToContextNodeMap.
count(NewCall)) {
1900 NewNode = TailCallToContextNodeMap[NewCall];
1901 NewNode->AllocTypes |= Edge->AllocTypes;
1903 FuncToCallsWithMetadata[
Func].push_back({NewCall});
1905 NodeOwner.push_back(
1906 std::make_unique<ContextNode>(
false, NewCall));
1907 NewNode = NodeOwner.
back().get();
1908 NodeToCallingFunc[NewNode] =
Func;
1909 TailCallToContextNodeMap[NewCall] = NewNode;
1910 NewNode->AllocTypes = Edge->AllocTypes;
1914 AddEdge(NewNode, CurCalleeNode);
1916 CurCalleeNode = NewNode;
1920 AddEdge(Edge->Caller, CurCalleeNode);
1923 Edge->Callee->eraseCallerEdge(Edge.get());
1924 EI = Edge->Caller->CalleeEdges.erase(EI);
1929bool ModuleCallsiteContextGraph::findProfiledCalleeThroughTailCalls(
1931 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain,
1932 bool &FoundMultipleCalleeChains) {
1939 FoundCalleeChain.push_back({Callsite,
F});
1942 auto *CalleeFunc = dyn_cast<Function>(CurCallee);
1944 auto *Alias = dyn_cast<GlobalAlias>(CurCallee);
1946 CalleeFunc = dyn_cast<Function>(Alias->getAliasee());
1954 bool FoundSingleCalleeChain =
false;
1955 for (
auto &BB : *CalleeFunc) {
1956 for (
auto &
I : BB) {
1957 auto *CB = dyn_cast<CallBase>(&
I);
1958 if (!CB || !CB->isTailCall())
1960 auto *CalledValue = CB->getCalledOperand();
1961 auto *CalledFunction = CB->getCalledFunction();
1962 if (CalledValue && !CalledFunction) {
1963 CalledValue = CalledValue->stripPointerCasts();
1965 CalledFunction = dyn_cast<Function>(CalledValue);
1969 if (
auto *GA = dyn_cast<GlobalAlias>(CalledValue)) {
1970 assert(!CalledFunction &&
1971 "Expected null called function in callsite for alias");
1972 CalledFunction = dyn_cast<Function>(GA->getAliaseeObject());
1974 if (!CalledFunction)
1976 if (CalledFunction == ProfiledCallee) {
1977 if (FoundSingleCalleeChain) {
1978 FoundMultipleCalleeChains =
true;
1981 FoundSingleCalleeChain =
true;
1982 FoundProfiledCalleeCount++;
1983 FoundProfiledCalleeDepth +=
Depth;
1984 if (
Depth > FoundProfiledCalleeMaxDepth)
1985 FoundProfiledCalleeMaxDepth =
Depth;
1986 SaveCallsiteInfo(&
I, CalleeFunc);
1987 }
else if (findProfiledCalleeThroughTailCalls(
1988 ProfiledCallee, CalledFunction,
Depth + 1,
1989 FoundCalleeChain, FoundMultipleCalleeChains)) {
1992 assert(!FoundMultipleCalleeChains);
1993 if (FoundSingleCalleeChain) {
1994 FoundMultipleCalleeChains =
true;
1997 FoundSingleCalleeChain =
true;
1998 SaveCallsiteInfo(&
I, CalleeFunc);
1999 }
else if (FoundMultipleCalleeChains)
2004 return FoundSingleCalleeChain;
2007bool ModuleCallsiteContextGraph::calleeMatchesFunc(
2009 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain) {
2010 auto *CB = dyn_cast<CallBase>(Call);
2011 if (!CB->getCalledOperand())
2013 auto *CalleeVal = CB->getCalledOperand()->stripPointerCasts();
2014 auto *CalleeFunc = dyn_cast<Function>(CalleeVal);
2015 if (CalleeFunc == Func)
2017 auto *Alias = dyn_cast<GlobalAlias>(CalleeVal);
2018 if (Alias && Alias->getAliasee() == Func)
2029 bool FoundMultipleCalleeChains =
false;
2030 if (!findProfiledCalleeThroughTailCalls(Func, CalleeVal,
Depth,
2032 FoundMultipleCalleeChains)) {
2033 LLVM_DEBUG(
dbgs() <<
"Not found through unique tail call chain: "
2034 <<
Func->getName() <<
" from " << CallerFunc->
getName()
2035 <<
" that actually called " << CalleeVal->getName()
2036 << (FoundMultipleCalleeChains
2037 ?
" (found multiple possible chains)"
2040 if (FoundMultipleCalleeChains)
2041 FoundProfiledCalleeNonUniquelyCount++;
2048bool IndexCallsiteContextGraph::findProfiledCalleeThroughTailCalls(
2050 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain,
2051 bool &FoundMultipleCalleeChains) {
2060 if (!FunctionCalleesToSynthesizedCallsiteInfos.count(FS) ||
2061 !FunctionCalleesToSynthesizedCallsiteInfos[
FS].count(Callee))
2064 FunctionCalleesToSynthesizedCallsiteInfos[FS][Callee] =
2067 FunctionCalleesToSynthesizedCallsiteInfos[
FS][
Callee].get();
2068 FoundCalleeChain.push_back({NewCallsiteInfo,
FS});
2075 bool FoundSingleCalleeChain =
false;
2078 !isPrevailing(CurCallee.
getGUID(), S.get()))
2080 auto *
FS = dyn_cast<FunctionSummary>(S->getBaseObject());
2083 auto FSVI = CurCallee;
2084 auto *AS = dyn_cast<AliasSummary>(S.get());
2086 FSVI = AS->getAliaseeVI();
2087 for (
auto &CallEdge :
FS->calls()) {
2088 if (!CallEdge.second.hasTailCall())
2090 if (CallEdge.first == ProfiledCallee) {
2091 if (FoundSingleCalleeChain) {
2092 FoundMultipleCalleeChains =
true;
2095 FoundSingleCalleeChain =
true;
2096 FoundProfiledCalleeCount++;
2097 FoundProfiledCalleeDepth +=
Depth;
2098 if (
Depth > FoundProfiledCalleeMaxDepth)
2099 FoundProfiledCalleeMaxDepth =
Depth;
2100 CreateAndSaveCallsiteInfo(CallEdge.first, FS);
2102 assert(!FSToVIMap.count(FS) || FSToVIMap[FS] == FSVI);
2103 FSToVIMap[
FS] = FSVI;
2104 }
else if (findProfiledCalleeThroughTailCalls(
2105 ProfiledCallee, CallEdge.first,
Depth + 1,
2106 FoundCalleeChain, FoundMultipleCalleeChains)) {
2109 assert(!FoundMultipleCalleeChains);
2110 if (FoundSingleCalleeChain) {
2111 FoundMultipleCalleeChains =
true;
2114 FoundSingleCalleeChain =
true;
2115 CreateAndSaveCallsiteInfo(CallEdge.first, FS);
2117 assert(!FSToVIMap.count(FS) || FSToVIMap[FS] == FSVI);
2118 FSToVIMap[
FS] = FSVI;
2119 }
else if (FoundMultipleCalleeChains)
2124 return FoundSingleCalleeChain;
2127bool IndexCallsiteContextGraph::calleeMatchesFunc(
2130 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain) {
2132 dyn_cast_if_present<CallsiteInfo *>(
Call.getBase())->Callee;
2136 Callee.getSummaryList().empty()
2138 : dyn_cast<AliasSummary>(
Callee.getSummaryList()[0].get());
2139 assert(FSToVIMap.count(Func));
2140 auto FuncVI = FSToVIMap[
Func];
2141 if (Callee == FuncVI ||
2156 bool FoundMultipleCalleeChains =
false;
2157 if (!findProfiledCalleeThroughTailCalls(
2158 FuncVI, Callee,
Depth, FoundCalleeChain, FoundMultipleCalleeChains)) {
2159 LLVM_DEBUG(
dbgs() <<
"Not found through unique tail call chain: " << FuncVI
2160 <<
" from " << FSToVIMap[CallerFunc]
2161 <<
" that actually called " << Callee
2162 << (FoundMultipleCalleeChains
2163 ?
" (found multiple possible chains)"
2166 if (FoundMultipleCalleeChains)
2167 FoundProfiledCalleeNonUniquelyCount++;
2178 if (AllocTypes & (uint8_t)AllocationType::NotCold)
2180 if (AllocTypes & (uint8_t)AllocationType::Cold)
2185template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2186void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::dump()
2192template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2193void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::print(
2195 OS <<
"Node " <<
this <<
"\n";
2199 OS <<
" (recursive)";
2202 OS <<
"\tContextIds:";
2204 auto ContextIds = getContextIds();
2205 std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
2206 std::sort(SortedIds.begin(), SortedIds.end());
2207 for (
auto Id : SortedIds)
2210 OS <<
"\tCalleeEdges:\n";
2211 for (
auto &Edge : CalleeEdges)
2212 OS <<
"\t\t" << *Edge <<
"\n";
2213 OS <<
"\tCallerEdges:\n";
2214 for (
auto &Edge : CallerEdges)
2215 OS <<
"\t\t" << *Edge <<
"\n";
2216 if (!Clones.empty()) {
2219 for (
auto *Clone : Clones)
2222 }
else if (CloneOf) {
2223 OS <<
"\tClone of " << CloneOf <<
"\n";
2227template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2228void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::dump()
2234template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2235void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::print(
2239 OS <<
" ContextIds:";
2240 std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
2241 std::sort(SortedIds.begin(), SortedIds.end());
2242 for (
auto Id : SortedIds)
2246template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2247void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::dump()
const {
2251template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2252void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::print(
2254 OS <<
"Callsite Context Graph:\n";
2255 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
2256 for (
const auto Node : nodes<GraphType>(
this)) {
2257 if (
Node->isRemoved())
2264template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2265void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::check()
const {
2266 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
2267 for (
const auto Node : nodes<GraphType>(
this)) {
2268 checkNode<DerivedCCG, FuncTy, CallTy>(
Node,
false);
2269 for (
auto &Edge :
Node->CallerEdges)
2270 checkEdge<DerivedCCG, FuncTy, CallTy>(Edge);
2274template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2276 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
2277 using NodeRef =
const ContextNode<DerivedCCG, FuncTy, CallTy> *;
2279 using NodePtrTy = std::unique_ptr<ContextNode<DerivedCCG, FuncTy, CallTy>>;
2295 return G->NodeOwner.begin()->get();
2298 using EdgePtrTy = std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>;
2299 static const ContextNode<DerivedCCG, FuncTy, CallTy> *
2307 decltype(&GetCallee)>;
2318template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2323 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
2329 std::string LabelString =
2330 (
Twine(
"OrigId: ") + (Node->IsAllocation ?
"Alloc" :
"") +
2331 Twine(Node->OrigStackOrAllocId))
2333 LabelString +=
"\n";
2334 if (Node->hasCall()) {
2335 auto Func =
G->NodeToCallingFunc.find(Node);
2336 assert(Func !=
G->NodeToCallingFunc.end());
2338 G->getLabel(Func->second, Node->Call.call(), Node->Call.cloneNo());
2340 LabelString +=
"null call";
2341 if (Node->Recursive)
2342 LabelString +=
" (recursive)";
2344 LabelString +=
" (external)";
2351 getContextIds(Node->getContextIds()) +
"\"")
2354 (
Twine(
",fillcolor=\"") + getColor(Node->AllocTypes) +
"\"").str();
2356 if (Node->CloneOf) {
2366 auto &Edge = *(ChildIter.getCurrent());
2367 return (
Twine(
"tooltip=\"") + getContextIds(Edge->ContextIds) +
"\"" +
2368 Twine(
",fillcolor=\"") + getColor(Edge->AllocTypes) +
"\"")
2375 return Node->isRemoved();
2380 std::string IdString =
"ContextIds:";
2381 if (ContextIds.
size() < 100) {
2382 std::vector<uint32_t> SortedIds(ContextIds.
begin(), ContextIds.
end());
2383 std::sort(SortedIds.begin(), SortedIds.end());
2384 for (
auto Id : SortedIds)
2385 IdString += (
" " +
Twine(Id)).str();
2387 IdString += (
" (" +
Twine(ContextIds.
size()) +
" ids)").str();
2392 static std::string getColor(uint8_t AllocTypes) {
2401 return "mediumorchid1";
2405 static std::string getNodeId(NodeRef
Node) {
2406 std::stringstream SStream;
2407 SStream << std::hex <<
"N0x" << (
unsigned long long)
Node;
2408 std::string
Result = SStream.str();
2413template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2414void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::exportToDot(
2415 std::string Label)
const {
2420template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2421typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
2422CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::moveEdgeToNewCalleeClone(
2423 const std::shared_ptr<ContextEdge> &Edge, EdgeIter *CallerEdgeI,
2425 ContextNode *
Node = Edge->Callee;
2426 NodeOwner.push_back(
2427 std::make_unique<ContextNode>(
Node->IsAllocation,
Node->Call));
2428 ContextNode *Clone = NodeOwner.back().get();
2429 Node->addClone(Clone);
2431 NodeToCallingFunc[Clone] = NodeToCallingFunc[
Node];
2432 moveEdgeToExistingCalleeClone(Edge, Clone, CallerEdgeI,
true,
2437template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2438void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
2439 moveEdgeToExistingCalleeClone(
const std::shared_ptr<ContextEdge> &Edge,
2440 ContextNode *NewCallee, EdgeIter *CallerEdgeI,
2445 assert(NewCallee->getOrigNode() == Edge->Callee->getOrigNode());
2447 ContextNode *OldCallee = Edge->Callee;
2451 auto ExistingEdgeToNewCallee = NewCallee->findEdgeFromCaller(Edge->Caller);
2455 if (ContextIdsToMove.
empty())
2456 ContextIdsToMove = Edge->getContextIds();
2460 if (Edge->getContextIds().size() == ContextIdsToMove.
size()) {
2463 *CallerEdgeI = OldCallee->CallerEdges.
erase(*CallerEdgeI);
2465 OldCallee->eraseCallerEdge(Edge.get());
2466 if (ExistingEdgeToNewCallee) {
2469 ExistingEdgeToNewCallee->getContextIds().insert(ContextIdsToMove.
begin(),
2470 ContextIdsToMove.
end());
2471 ExistingEdgeToNewCallee->AllocTypes |= Edge->AllocTypes;
2472 assert(Edge->ContextIds == ContextIdsToMove);
2473 Edge->ContextIds.clear();
2475 Edge->Caller->eraseCalleeEdge(Edge.get());
2478 Edge->Callee = NewCallee;
2479 NewCallee->CallerEdges.push_back(Edge);
2484 NewCallee->AllocTypes |= Edge->AllocTypes;
2490 auto CallerEdgeAllocType = computeAllocType(ContextIdsToMove);
2491 if (ExistingEdgeToNewCallee) {
2494 ExistingEdgeToNewCallee->getContextIds().insert(ContextIdsToMove.
begin(),
2495 ContextIdsToMove.
end());
2496 ExistingEdgeToNewCallee->AllocTypes |= CallerEdgeAllocType;
2499 auto NewEdge = std::make_shared<ContextEdge>(
2500 NewCallee, Edge->Caller, CallerEdgeAllocType, ContextIdsToMove);
2501 Edge->Caller->CalleeEdges.push_back(NewEdge);
2502 NewCallee->CallerEdges.push_back(NewEdge);
2506 NewCallee->AllocTypes |= CallerEdgeAllocType;
2508 Edge->AllocTypes = computeAllocType(Edge->ContextIds);
2513 for (
auto &OldCalleeEdge : OldCallee->CalleeEdges) {
2518 set_subtract(OldCalleeEdge->getContextIds(), EdgeContextIdsToMove);
2519 OldCalleeEdge->AllocTypes =
2520 computeAllocType(OldCalleeEdge->getContextIds());
2527 if (
auto *NewCalleeEdge =
2528 NewCallee->findEdgeFromCallee(OldCalleeEdge->Callee)) {
2529 NewCalleeEdge->getContextIds().insert(EdgeContextIdsToMove.
begin(),
2530 EdgeContextIdsToMove.
end());
2531 NewCalleeEdge->AllocTypes |= computeAllocType(EdgeContextIdsToMove);
2535 auto NewEdge = std::make_shared<ContextEdge>(
2536 OldCalleeEdge->Callee, NewCallee,
2537 computeAllocType(EdgeContextIdsToMove), EdgeContextIdsToMove);
2538 NewCallee->CalleeEdges.push_back(NewEdge);
2539 NewEdge->Callee->CallerEdges.push_back(NewEdge);
2543 OldCallee->AllocTypes = OldCallee->computeAllocType();
2546 OldCallee->emptyContextIds());
2548 checkNode<DerivedCCG, FuncTy, CallTy>(OldCallee,
false);
2549 checkNode<DerivedCCG, FuncTy, CallTy>(NewCallee,
false);
2550 for (
const auto &OldCalleeEdge : OldCallee->CalleeEdges)
2551 checkNode<DerivedCCG, FuncTy, CallTy>(OldCalleeEdge->Callee,
2553 for (
const auto &NewCalleeEdge : NewCallee->CalleeEdges)
2554 checkNode<DerivedCCG, FuncTy, CallTy>(NewCalleeEdge->Callee,
2559template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2560void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
2561 recursivelyRemoveNoneTypeCalleeEdges(
2567 removeNoneTypeCalleeEdges(
Node);
2569 for (
auto *Clone :
Node->Clones)
2570 recursivelyRemoveNoneTypeCalleeEdges(Clone, Visited);
2574 auto CallerEdges =
Node->CallerEdges;
2575 for (
auto &Edge : CallerEdges) {
2577 if (Edge->Callee ==
nullptr && Edge->Caller ==
nullptr) {
2581 recursivelyRemoveNoneTypeCalleeEdges(Edge->Caller, Visited);
2585template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2586void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones() {
2588 for (
auto &Entry : AllocationCallToContextNodeMap) {
2590 identifyClones(
Entry.second, Visited,
Entry.second->getContextIds());
2593 for (
auto &Entry : AllocationCallToContextNodeMap)
2594 recursivelyRemoveNoneTypeCalleeEdges(
Entry.second, Visited);
2607template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2608void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones(
2612 checkNode<DerivedCCG, FuncTy, CallTy>(
Node,
false);
2620 if (!
Node->hasCall())
2635 auto CallerEdges =
Node->CallerEdges;
2636 for (
auto &Edge : CallerEdges) {
2638 if (Edge->Callee ==
nullptr && Edge->Caller ==
nullptr) {
2643 if (!Visited.
count(Edge->Caller) && !Edge->Caller->CloneOf) {
2644 identifyClones(Edge->Caller, Visited, AllocContextIds);
2667 const unsigned AllocTypeCloningPriority[] = { 3, 4,
2670 std::stable_sort(
Node->CallerEdges.begin(),
Node->CallerEdges.end(),
2671 [&](
const std::shared_ptr<ContextEdge> &
A,
2672 const std::shared_ptr<ContextEdge> &
B) {
2675 if (A->ContextIds.empty())
2681 if (B->ContextIds.empty())
2684 if (A->AllocTypes == B->AllocTypes)
2687 return *A->ContextIds.begin() < *B->ContextIds.begin();
2688 return AllocTypeCloningPriority[A->AllocTypes] <
2689 AllocTypeCloningPriority[B->AllocTypes];
2699 for (
auto EI =
Node->CallerEdges.begin(); EI !=
Node->CallerEdges.end();) {
2700 auto CallerEdge = *EI;
2709 auto CallerEdgeContextsForAlloc =
2711 if (CallerEdgeContextsForAlloc.empty()) {
2715 auto CallerAllocTypeForAlloc = computeAllocType(CallerEdgeContextsForAlloc);
2719 std::vector<uint8_t> CalleeEdgeAllocTypesForCallerEdge;
2720 CalleeEdgeAllocTypesForCallerEdge.reserve(
Node->CalleeEdges.size());
2721 for (
auto &CalleeEdge :
Node->CalleeEdges)
2722 CalleeEdgeAllocTypesForCallerEdge.push_back(intersectAllocTypes(
2723 CalleeEdge->getContextIds(), CallerEdgeContextsForAlloc));
2738 if (allocTypeToUse(CallerAllocTypeForAlloc) ==
2739 allocTypeToUse(
Node->AllocTypes) &&
2740 allocTypesMatch<DerivedCCG, FuncTy, CallTy>(
2741 CalleeEdgeAllocTypesForCallerEdge,
Node->CalleeEdges)) {
2748 ContextNode *Clone =
nullptr;
2749 for (
auto *CurClone :
Node->Clones) {
2750 if (allocTypeToUse(CurClone->AllocTypes) !=
2751 allocTypeToUse(CallerAllocTypeForAlloc))
2754 if (!allocTypesMatch<DerivedCCG, FuncTy, CallTy>(
2755 CalleeEdgeAllocTypesForCallerEdge, CurClone->CalleeEdges))
2763 moveEdgeToExistingCalleeClone(CallerEdge, Clone, &EI,
false,
2764 CallerEdgeContextsForAlloc);
2767 moveEdgeToNewCalleeClone(CallerEdge, &EI, CallerEdgeContextsForAlloc);
2782 checkNode<DerivedCCG, FuncTy, CallTy>(
Node,
false);
2785void ModuleCallsiteContextGraph::updateAllocationCall(
2789 "memprof", AllocTypeString);
2790 cast<CallBase>(
Call.call())->addFnAttr(
A);
2791 OREGetter(
Call.call()->getFunction())
2793 <<
ore::NV(
"AllocationCall",
Call.call()) <<
" in clone "
2795 <<
" marked with memprof allocation attribute "
2796 <<
ore::NV(
"Attribute", AllocTypeString));
2799void IndexCallsiteContextGraph::updateAllocationCall(
CallInfo &Call,
2803 assert(AI->Versions.size() >
Call.cloneNo());
2807void ModuleCallsiteContextGraph::updateCall(
CallInfo &CallerCall,
2808 FuncInfo CalleeFunc) {
2809 if (CalleeFunc.cloneNo() > 0)
2810 cast<CallBase>(CallerCall.call())->setCalledFunction(CalleeFunc.func());
2811 OREGetter(CallerCall.call()->getFunction())
2813 <<
ore::NV(
"Call", CallerCall.call()) <<
" in clone "
2814 <<
ore::NV(
"Caller", CallerCall.call()->getFunction())
2815 <<
" assigned to call function clone "
2816 <<
ore::NV(
"Callee", CalleeFunc.func()));
2819void IndexCallsiteContextGraph::updateCall(
CallInfo &CallerCall,
2820 FuncInfo CalleeFunc) {
2821 auto *CI = CallerCall.call().dyn_cast<
CallsiteInfo *>();
2823 "Caller cannot be an allocation which should not have profiled calls");
2824 assert(CI->Clones.size() > CallerCall.cloneNo());
2825 CI->Clones[CallerCall.cloneNo()] = CalleeFunc.cloneNo();
2828CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
2830ModuleCallsiteContextGraph::cloneFunctionForCallsite(
2831 FuncInfo &Func,
CallInfo &Call, std::map<CallInfo, CallInfo> &CallMap,
2832 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
2838 NewFunc->setName(
Name);
2839 for (
auto &Inst : CallsWithMetadataInFunc) {
2841 assert(Inst.cloneNo() == 0);
2842 CallMap[Inst] = {cast<Instruction>(VMap[Inst.call()]), CloneNo};
2844 OREGetter(
Func.func())
2846 <<
"created clone " <<
ore::NV(
"NewFunction", NewFunc));
2847 return {NewFunc, CloneNo};
2851 IndexCall>::FuncInfo
2852IndexCallsiteContextGraph::cloneFunctionForCallsite(
2853 FuncInfo &Func,
CallInfo &Call, std::map<CallInfo, CallInfo> &CallMap,
2854 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
2869 for (
auto &Inst : CallsWithMetadataInFunc) {
2871 assert(Inst.cloneNo() == 0);
2872 if (
auto *AI = Inst.call().dyn_cast<
AllocInfo *>()) {
2873 assert(AI->Versions.size() == CloneNo);
2876 AI->Versions.push_back(0);
2879 assert(CI && CI->Clones.size() == CloneNo);
2882 CI->Clones.push_back(0);
2884 CallMap[Inst] = {Inst.call(), CloneNo};
2886 return {
Func.func(), CloneNo};
2920template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2921bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
2922 bool Changed =
false;
2930 auto RecordCalleeFuncOfCallsite = [&](ContextNode *
Caller,
2931 const FuncInfo &CalleeFunc) {
2933 CallsiteToCalleeFuncCloneMap[
Caller] = CalleeFunc;
2938 for (
auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) {
2939 FuncInfo OrigFunc(Func);
2943 std::map<FuncInfo, std::map<CallInfo, CallInfo>> FuncClonesToCallMap;
2944 for (
auto &Call : CallsWithMetadata) {
2945 ContextNode *
Node = getNodeForInst(Call);
2952 "Not having a call should have prevented cloning");
2956 std::map<FuncInfo, ContextNode *> FuncCloneToCurNodeCloneMap;
2960 auto AssignCallsiteCloneToFuncClone = [&](
const FuncInfo &FuncClone,
2962 ContextNode *CallsiteClone,
2965 FuncCloneToCurNodeCloneMap[FuncClone] = CallsiteClone;
2967 assert(FuncClonesToCallMap.count(FuncClone));
2968 std::map<CallInfo, CallInfo> &CallMap = FuncClonesToCallMap[FuncClone];
2970 if (CallMap.count(Call))
2971 CallClone = CallMap[
Call];
2972 CallsiteClone->setCall(CallClone);
2978 std::deque<ContextNode *> ClonesWorklist;
2980 if (!
Node->emptyContextIds())
2981 ClonesWorklist.push_back(
Node);
2982 ClonesWorklist.insert(ClonesWorklist.end(),
Node->Clones.begin(),
2983 Node->Clones.end());
2988 unsigned NodeCloneCount = 0;
2989 while (!ClonesWorklist.empty()) {
2990 ContextNode *Clone = ClonesWorklist.front();
2991 ClonesWorklist.pop_front();
2994 checkNode<DerivedCCG, FuncTy, CallTy>(Clone);
3000 if (FuncClonesToCallMap.size() < NodeCloneCount) {
3002 if (NodeCloneCount == 1) {
3007 Clone->CallerEdges, [&](
const std::shared_ptr<ContextEdge> &E) {
3008 return CallsiteToCalleeFuncCloneMap.count(E->Caller);
3012 FuncClonesToCallMap[OrigFunc] = {};
3013 AssignCallsiteCloneToFuncClone(
3014 OrigFunc, Call, Clone,
3015 AllocationCallToContextNodeMap.count(Call));
3016 for (
auto &CE : Clone->CallerEdges) {
3018 if (!
CE->Caller->hasCall())
3020 RecordCalleeFuncOfCallsite(
CE->Caller, OrigFunc);
3030 FuncInfo PreviousAssignedFuncClone;
3032 Clone->CallerEdges, [&](
const std::shared_ptr<ContextEdge> &E) {
3033 return CallsiteToCalleeFuncCloneMap.count(E->Caller);
3035 bool CallerAssignedToCloneOfFunc =
false;
3036 if (EI != Clone->CallerEdges.end()) {
3037 const std::shared_ptr<ContextEdge> &Edge = *EI;
3038 PreviousAssignedFuncClone =
3039 CallsiteToCalleeFuncCloneMap[Edge->Caller];
3040 CallerAssignedToCloneOfFunc =
true;
3045 std::map<CallInfo, CallInfo> NewCallMap;
3046 unsigned CloneNo = FuncClonesToCallMap.size();
3047 assert(CloneNo > 0 &&
"Clone 0 is the original function, which "
3048 "should already exist in the map");
3049 FuncInfo NewFuncClone = cloneFunctionForCallsite(
3050 OrigFunc, Call, NewCallMap, CallsWithMetadata, CloneNo);
3051 FuncClonesToCallMap.emplace(NewFuncClone, std::move(NewCallMap));
3052 FunctionClonesAnalysis++;
3058 if (!CallerAssignedToCloneOfFunc) {
3059 AssignCallsiteCloneToFuncClone(
3060 NewFuncClone, Call, Clone,
3061 AllocationCallToContextNodeMap.count(Call));
3062 for (
auto &CE : Clone->CallerEdges) {
3064 if (!
CE->Caller->hasCall())
3066 RecordCalleeFuncOfCallsite(
CE->Caller, NewFuncClone);
3075 for (
auto CE : Clone->CallerEdges) {
3080 if (!
CE->Caller->hasCall())
3083 if (!CallsiteToCalleeFuncCloneMap.
count(
CE->Caller) ||
3087 CallsiteToCalleeFuncCloneMap[
CE->Caller] !=
3088 PreviousAssignedFuncClone)
3091 RecordCalleeFuncOfCallsite(
CE->Caller, NewFuncClone);
3101 for (
auto CalleeEdge :
CE->Caller->CalleeEdges) {
3106 ContextNode *
Callee = CalleeEdge->Callee;
3110 if (Callee == Clone || !
Callee->hasCall())
3112 ContextNode *NewClone = moveEdgeToNewCalleeClone(CalleeEdge);
3113 removeNoneTypeCalleeEdges(NewClone);
3116 removeNoneTypeCalleeEdges(Callee);
3121 if (CallsiteToCalleeFuncCloneMap.
count(Callee))
3122 RecordCalleeFuncOfCallsite(
3123 NewClone, CallsiteToCalleeFuncCloneMap[Callee]);
3132 OrigCall.setCloneNo(0);
3133 std::map<CallInfo, CallInfo> &CallMap =
3134 FuncClonesToCallMap[NewFuncClone];
3135 assert(CallMap.count(OrigCall));
3136 CallInfo NewCall(CallMap[OrigCall]);
3138 NewClone->setCall(NewCall);
3160 std::map<FuncInfo, ContextNode *> FuncCloneToNewCallsiteCloneMap;
3161 FuncInfo FuncCloneAssignedToCurCallsiteClone;
3164 for (
auto EI = Clone->CallerEdges.begin();
3165 EI != Clone->CallerEdges.end();) {
3168 if (!Edge->Caller->hasCall()) {
3174 if (CallsiteToCalleeFuncCloneMap.
count(Edge->Caller)) {
3175 FuncInfo FuncCloneCalledByCaller =
3176 CallsiteToCalleeFuncCloneMap[Edge->Caller];
3186 if ((FuncCloneToCurNodeCloneMap.count(FuncCloneCalledByCaller) &&
3187 FuncCloneToCurNodeCloneMap[FuncCloneCalledByCaller] !=
3195 (FuncCloneAssignedToCurCallsiteClone &&
3196 FuncCloneAssignedToCurCallsiteClone !=
3197 FuncCloneCalledByCaller)) {
3212 if (FuncCloneToNewCallsiteCloneMap.count(
3213 FuncCloneCalledByCaller)) {
3214 ContextNode *NewClone =
3215 FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller];
3216 moveEdgeToExistingCalleeClone(Edge, NewClone, &EI);
3218 removeNoneTypeCalleeEdges(NewClone);
3221 ContextNode *NewClone = moveEdgeToNewCalleeClone(Edge, &EI);
3222 removeNoneTypeCalleeEdges(NewClone);
3223 FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller] =
3226 ClonesWorklist.push_back(NewClone);
3227 assert(EI == Clone->CallerEdges.end() ||
3233 removeNoneTypeCalleeEdges(Clone);
3242 if (!FuncCloneAssignedToCurCallsiteClone) {
3243 FuncCloneAssignedToCurCallsiteClone = FuncCloneCalledByCaller;
3245 AssignCallsiteCloneToFuncClone(
3246 FuncCloneCalledByCaller, Call, Clone,
3247 AllocationCallToContextNodeMap.count(Call));
3251 assert(FuncCloneAssignedToCurCallsiteClone ==
3252 FuncCloneCalledByCaller);
3261 if (!FuncCloneAssignedToCurCallsiteClone) {
3266 for (
auto &CF : FuncClonesToCallMap) {
3267 if (!FuncCloneToCurNodeCloneMap.count(CF.first)) {
3268 FuncCloneAssignedToCurCallsiteClone = CF.first;
3272 assert(FuncCloneAssignedToCurCallsiteClone);
3274 AssignCallsiteCloneToFuncClone(
3275 FuncCloneAssignedToCurCallsiteClone, Call, Clone,
3276 AllocationCallToContextNodeMap.count(Call));
3278 assert(FuncCloneToCurNodeCloneMap
3279 [FuncCloneAssignedToCurCallsiteClone] == Clone);
3281 RecordCalleeFuncOfCallsite(Edge->Caller,
3282 FuncCloneAssignedToCurCallsiteClone);
3289 checkNode<DerivedCCG, FuncTy, CallTy>(
Node);
3290 for (
const auto &PE :
Node->CalleeEdges)
3291 checkNode<DerivedCCG, FuncTy, CallTy>(PE->Callee);
3292 for (
const auto &CE :
Node->CallerEdges)
3293 checkNode<DerivedCCG, FuncTy, CallTy>(
CE->Caller);
3294 for (
auto *Clone :
Node->Clones) {
3295 checkNode<DerivedCCG, FuncTy, CallTy>(Clone);
3296 for (
const auto &PE : Clone->CalleeEdges)
3297 checkNode<DerivedCCG, FuncTy, CallTy>(PE->Callee);
3298 for (
const auto &CE : Clone->CallerEdges)
3299 checkNode<DerivedCCG, FuncTy, CallTy>(
CE->Caller);
3305 auto UpdateCalls = [&](ContextNode *
Node,
3307 auto &&UpdateCalls) {
3312 for (
auto *Clone :
Node->Clones)
3313 UpdateCalls(Clone, Visited, UpdateCalls);
3315 for (
auto &Edge :
Node->CallerEdges)
3316 UpdateCalls(Edge->Caller, Visited, UpdateCalls);
3320 if (!
Node->hasCall() ||
Node->emptyContextIds())
3323 if (
Node->IsAllocation) {
3324 updateAllocationCall(
Node->Call, allocTypeToUse(
Node->AllocTypes));
3328 if (!CallsiteToCalleeFuncCloneMap.
count(
Node))
3331 auto CalleeFunc = CallsiteToCalleeFuncCloneMap[
Node];
3332 updateCall(
Node->Call, CalleeFunc);
3341 for (
auto &Entry : AllocationCallToContextNodeMap)
3342 UpdateCalls(
Entry.second, Visited, UpdateCalls);
3356 FunctionsClonedThinBackend++;
3357 for (
unsigned I = 1;
I < NumClones;
I++) {
3358 VMaps.
emplace_back(std::make_unique<ValueToValueMapTy>());
3360 FunctionClonesThinBackend++;
3363 for (
auto &BB : *NewF) {
3364 for (
auto &Inst : BB) {
3365 Inst.setMetadata(LLVMContext::MD_memprof,
nullptr);
3366 Inst.setMetadata(LLVMContext::MD_callsite,
nullptr);
3370 auto *PrevF = M.getFunction(
Name);
3374 assert(PrevF->isDeclaration());
3375 NewF->takeName(PrevF);
3376 PrevF->replaceAllUsesWith(NewF);
3377 PrevF->eraseFromParent();
3379 NewF->setName(
Name);
3381 <<
"created clone " <<
ore::NV(
"NewFunction", NewF));
3384 if (!FuncToAliasMap.count(&
F))
3386 for (
auto *
A : FuncToAliasMap[&
F]) {
3388 auto *PrevA = M.getNamedAlias(
Name);
3390 A->getType()->getPointerAddressSpace(),
3391 A->getLinkage(),
Name, NewF);
3392 NewA->copyAttributesFrom(
A);
3396 assert(PrevA->isDeclaration());
3397 NewA->takeName(PrevA);
3398 PrevA->replaceAllUsesWith(NewA);
3399 PrevA->eraseFromParent();
3441bool MemProfContextDisambiguation::applyImport(
Module &M) {
3443 bool Changed =
false;
3445 auto IsMemProfClone = [](
const Function &
F) {
3452 std::map<const Function *, SmallPtrSet<const GlobalAlias *, 1>>
3454 for (
auto &
A :
M.aliases()) {
3455 auto *Aliasee =
A.getAliaseeObject();
3456 if (
auto *
F = dyn_cast<Function>(Aliasee))
3457 FuncToAliasMap[
F].insert(&
A);
3461 if (
F.isDeclaration() || IsMemProfClone(
F))
3467 bool ClonesCreated =
false;
3468 unsigned NumClonesCreated = 0;
3469 auto CloneFuncIfNeeded = [&](
unsigned NumClones) {
3479 if (ClonesCreated) {
3480 assert(NumClonesCreated == NumClones);
3487 ClonesCreated =
true;
3488 NumClonesCreated = NumClones;
3494 CloneFuncIfNeeded(
StackNode.Clones.size());
3498 assert(!IsMemProfClone(*CalledFunction));
3503 auto CalleeOrigName = CalledFunction->getName();
3504 for (
unsigned J = 0; J <
StackNode.Clones.size(); J++) {
3509 auto NewF =
M.getOrInsertFunction(
3511 CalledFunction->getFunctionType());
3517 CBClone = cast<CallBase>((*VMaps[J - 1])[CB]);
3520 <<
ore::NV(
"Call", CBClone) <<
" in clone "
3522 <<
" assigned to call function clone "
3523 <<
ore::NV(
"Callee", NewF.getCallee()));
3541 auto SrcModuleMD =
F.getMetadata(
"thinlto_src_module");
3543 "enable-import-metadata is needed to emit thinlto_src_module");
3545 dyn_cast<MDString>(SrcModuleMD->getOperand(0))->getString();
3547 if (GVS->modulePath() == SrcModule) {
3548 GVSummary = GVS.get();
3552 assert(GVSummary && GVSummary->modulePath() == SrcModule);
3557 if (isa<AliasSummary>(GVSummary))
3560 auto *
FS = cast<FunctionSummary>(GVSummary->getBaseObject());
3562 if (
FS->allocs().empty() &&
FS->callsites().empty())
3565 auto SI =
FS->callsites().begin();
3566 auto AI =
FS->allocs().begin();
3574 for (
auto CallsiteIt =
FS->callsites().rbegin();
3575 CallsiteIt !=
FS->callsites().rend(); CallsiteIt++) {
3576 auto &Callsite = *CallsiteIt;
3580 if (!Callsite.StackIdIndices.empty())
3582 MapTailCallCalleeVIToCallsite.
insert({Callsite.Callee, Callsite});
3588 for (
auto &BB :
F) {
3589 for (
auto &
I : BB) {
3590 auto *CB = dyn_cast<CallBase>(&
I);
3595 auto *CalledValue = CB->getCalledOperand();
3596 auto *CalledFunction = CB->getCalledFunction();
3597 if (CalledValue && !CalledFunction) {
3598 CalledValue = CalledValue->stripPointerCasts();
3600 CalledFunction = dyn_cast<Function>(CalledValue);
3604 if (
auto *GA = dyn_cast<GlobalAlias>(CalledValue)) {
3605 assert(!CalledFunction &&
3606 "Expected null called function in callsite for alias");
3607 CalledFunction = dyn_cast<Function>(GA->getAliaseeObject());
3611 I.getMetadata(LLVMContext::MD_callsite));
3612 auto *MemProfMD =
I.getMetadata(LLVMContext::MD_memprof);
3616 if (CB->getAttributes().hasFnAttr(
"memprof")) {
3618 CB->getAttributes().getFnAttr(
"memprof").getValueAsString() ==
"cold"
3619 ? AllocTypeColdThinBackend++
3620 : AllocTypeNotColdThinBackend++;
3621 OrigAllocsThinBackend++;
3622 AllocVersionsThinBackend++;
3623 if (!MaxAllocVersionsThinBackend)
3624 MaxAllocVersionsThinBackend = 1;
3627 I.setMetadata(LLVMContext::MD_callsite,
nullptr);
3634 auto &AllocNode = *(AI++);
3638 auto MIBIter = AllocNode.MIBs.begin();
3639 for (
auto &MDOp : MemProfMD->operands()) {
3640 assert(MIBIter != AllocNode.MIBs.end());
3642 MIBIter->StackIdIndices.begin();
3643 auto *MIBMD = cast<const MDNode>(MDOp);
3647 auto ContextIterBegin =
3651 (ContextIterBegin != StackContext.
end() &&
3652 *ContextIterBegin == 0)
3655 for (
auto ContextIter = ContextIterBegin;
3656 ContextIter != StackContext.
end(); ++ContextIter) {
3660 if (LastStackContextId == *ContextIter)
3662 LastStackContextId = *ContextIter;
3663 assert(StackIdIndexIter != MIBIter->StackIdIndices.end());
3672 CloneFuncIfNeeded(AllocNode.Versions.size());
3674 OrigAllocsThinBackend++;
3675 AllocVersionsThinBackend += AllocNode.Versions.size();
3676 if (MaxAllocVersionsThinBackend < AllocNode.Versions.size())
3677 MaxAllocVersionsThinBackend = AllocNode.Versions.size();
3684 if (AllocNode.Versions.size() == 1) {
3689 UnclonableAllocsThinBackend++;
3695 return Type == ((uint8_t)AllocationType::NotCold |
3696 (uint8_t)AllocationType::Cold);
3700 for (
unsigned J = 0; J < AllocNode.Versions.size(); J++) {
3706 : AllocTypeNotColdThinBackend++;
3718 CBClone = cast<CallBase>((*VMaps[J - 1])[CB]);
3721 <<
ore::NV(
"AllocationCall", CBClone) <<
" in clone "
3723 <<
" marked with memprof allocation attribute "
3724 <<
ore::NV(
"Attribute", AllocTypeString));
3726 }
else if (!CallsiteContext.empty()) {
3728 assert(SI !=
FS->callsites().end());
3734 auto StackIdIndexIter =
StackNode.StackIdIndices.begin();
3735 for (
auto StackId : CallsiteContext) {
3743 CloneCallsite(
StackNode, CB, CalledFunction);
3744 }
else if (CB->isTailCall()) {
3749 if (CalleeVI && MapTailCallCalleeVIToCallsite.
count(CalleeVI)) {
3750 auto Callsite = MapTailCallCalleeVIToCallsite.
find(CalleeVI);
3751 assert(Callsite != MapTailCallCalleeVIToCallsite.
end());
3752 CloneCallsite(Callsite->second, CB, CalledFunction);
3756 I.setMetadata(LLVMContext::MD_memprof,
nullptr);
3757 I.setMetadata(LLVMContext::MD_callsite,
nullptr);
3765template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3766bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::process() {
3768 dbgs() <<
"CCG before cloning:\n";
3772 exportToDot(
"postbuild");
3785 dbgs() <<
"CCG after cloning:\n";
3789 exportToDot(
"cloned");
3791 bool Changed = assignFunctions();
3794 dbgs() <<
"CCG after assigning function clones:\n";
3798 exportToDot(
"clonefuncassign");
3803bool MemProfContextDisambiguation::processModule(
3810 return applyImport(M);
3823 ModuleCallsiteContextGraph CCG(M, OREGetter);
3824 return CCG.process();
3829 : ImportSummary(Summary) {
3830 if (ImportSummary) {
3840 auto ReadSummaryFile =
3842 if (!ReadSummaryFile) {
3849 if (!ImportSummaryForTestingOrErr) {
3855 ImportSummaryForTesting = std::move(*ImportSummaryForTestingOrErr);
3856 ImportSummary = ImportSummaryForTesting.get();
3865 if (!processModule(M, OREGetter))
3882 IndexCallsiteContextGraph CCG(
Index, isPrevailing);
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
#define LLVM_ATTRIBUTE_UNUSED
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
This file implements a map that provides insertion order iteration.
static SmallVector< std::unique_ptr< ValueToValueMapTy >, 4 > createFunctionClones(Function &F, unsigned NumClones, Module &M, OptimizationRemarkEmitter &ORE, std::map< const Function *, SmallPtrSet< const GlobalAlias *, 1 > > &FuncToAliasMap)
static cl::opt< unsigned > TailCallSearchDepth("memprof-tail-call-search-depth", cl::init(5), cl::Hidden, cl::desc("Max depth to recursively search for missing " "frames through tail calls."))
static cl::opt< bool > ExportToDot("memprof-export-to-dot", cl::init(false), cl::Hidden, cl::desc("Export graph to dot files."))
static void checkEdge(const std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > &Edge)
bool checkColdOrNotCold(uint8_t AllocType)
static ValueInfo findValueInfoForFunc(const Function &F, const Module &M, const ModuleSummaryIndex *ImportSummary)
static std::string getAllocTypeString(uint8_t AllocTypes)
static cl::opt< bool > VerifyCCG("memprof-verify-ccg", cl::init(false), cl::Hidden, cl::desc("Perform verification checks on CallingContextGraph."))
static void checkNode(const ContextNode< DerivedCCG, FuncTy, CallTy > *Node, bool CheckEdges=true)
static std::string getMemProfFuncName(Twine Base, unsigned CloneNo)
static cl::opt< std::string > MemProfImportSummary("memprof-import-summary", cl::desc("Import summary to use for testing the ThinLTO backend via opt"), cl::Hidden)
static const std::string MemProfCloneSuffix
static cl::opt< std::string > DotFilePathPrefix("memprof-dot-file-path-prefix", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path prefix of the MemProf dot files."))
static cl::opt< bool > VerifyNodes("memprof-verify-nodes", cl::init(false), cl::Hidden, cl::desc("Perform frequent verification checks on nodes."))
static cl::opt< bool > DumpCCG("memprof-dump-ccg", cl::init(false), cl::Hidden, cl::desc("Dump CallingContextGraph to stdout after each stage."))
This is the interface to build a ModuleSummaryIndex for a module.
ModuleSummaryIndex.h This file contains the declarations the classes that hold the module index and s...
Module.h This file contains the declarations for the Module class.
FunctionAnalysisManager FAM
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines generic set operations that may be used on set's of different types,...
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
void print(OutputBuffer &OB) const
DomTreeNode::const_iterator end() const
Alias summary information.
ValueInfo getAliaseeVI() const
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
static Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
void addFnAttr(Attribute::AttrKind Kind)
Adds the attribute to the function.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
iterator find(const_arg_type_t< KeyT > Val)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
void reserve(size_type NumEntries)
Grow the densemap so that it can contain at least NumEntries items before resizing again.
Implements a dense probed hash-table based set.
Function summary information to aid decisions and implementation of importing.
static GlobalAlias * create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent)
If a parent module is specified, the alias is automatically inserted into the end of the specified mo...
Function and variable summary information to aid decisions and implementation of importing.
static bool isLocalLinkage(LinkageTypes Linkage)
GUID getGUID() const
Return a 64-bit global unique ID constructed from global value name (i.e.
std::string getGlobalIdentifier() const
Return the modified name for this global value suitable to be used as the key for a global lookup (e....
@ InternalLinkage
Rename collisions when linking (static functions).
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
const Function * getFunction() const
Return the function this instruction belongs to.
This class implements a map that also provides access to all stored values in a deterministic order.
size_type count(const KeyT &Key) const
std::pair< KeyT, ValueT > & back()
MemProfContextDisambiguation(const ModuleSummaryIndex *Summary=nullptr)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
Class to hold module path string table and global value map, and encapsulate methods for operating on...
static StringRef getOriginalNameBeforePromote(StringRef Name)
Helper to obtain the unpromoted name for a global value (or the original name if not promoted).
ValueInfo getValueInfo(const GlobalValueSummaryMapTy::value_type &R) const
Return a ValueInfo for the index value_type (convenient when iterating index).
uint64_t getStackIdAtIndex(unsigned Index) const
GlobalValueSummary * findSummaryInModule(ValueInfo VI, StringRef ModuleId) const
Find the summary for ValueInfo VI in module ModuleId, or nullptr if not found.
GlobalValue::GUID getGUIDFromOriginalID(GlobalValue::GUID OriginalID) const
Return the GUID for OriginalId in the OidGuidMap.
A Module instance is used to store all the information related to an LLVM module.
A discriminated union of two or more pointer types, with the discriminator in the low bit of the poin...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM Value Representation.
StringRef getName() const
Return a constant reference to the value's name.
std::pair< iterator, bool > insert(const ValueT &V)
void reserve(size_t Size)
Grow the DenseSet so that it can contain at least NumEntries items before resizing again.
void swap(DenseSetImpl &RHS)
bool erase(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
An efficient, type-erasing, non-owning reference to a callable.
Helper class to iterate through stack ids in both metadata (memprof MIB and callsite) and the corresp...
CallStackIterator end() const
CallStackIterator beginAfterSharedPrefix(CallStack &Other)
This class implements an extremely fast bulk output stream that can only output to a stream.
StringRef AttributeString(unsigned Attribute)
@ C
The default llvm calling convention, compatible with C.
@ CE
Windows NT (Windows on ARM)
initializer< Ty > init(const Ty &Val)
AllocationType getMIBAllocType(const MDNode *MIB)
Returns the allocation type from an MIB metadata node.
bool hasSingleAllocType(uint8_t AllocTypes)
True if the AllocTypes bitmask contains just a single type.
std::string getAllocTypeAttributeString(AllocationType Type)
Returns the string to use in attributes with the given type.
MDNode * getMIBStackNode(const MDNode *MIB)
Returns the stack node from an MIB metadata node.
NodeAddr< FuncNode * > Func
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
void logAllUnhandledErrors(Error E, raw_ostream &OS, Twine ErrorBanner={})
Log all errors (if any) in E to OS.
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
bool set_is_subset(const S1Ty &S1, const S2Ty &S2)
set_is_subset(A, B) - Return true iff A in B
void set_subtract(S1Ty &S1, const S2Ty &S2)
set_subtract(A, B) - Compute A := A - B
raw_ostream & WriteGraph(raw_ostream &O, const GraphType &G, bool ShortNames=false, const Twine &Title="")
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr)
Expected< std::unique_ptr< ModuleSummaryIndex > > getModuleSummaryIndex(MemoryBufferRef Buffer)
Parse the specified bitcode buffer, returning the module summary index.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
bool set_union(S1Ty &S1, const S2Ty &S2)
set_union(A, B) - Compute A := A u B, return whether A changed.
cl::opt< bool > SupportsHotColdNew
Indicate we are linking with an allocator that supports hot/cold operator new interfaces.
S1Ty set_intersection(const S1Ty &S1, const S2Ty &S2)
set_intersection(A, B) - Return A ^ B
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
cl::opt< bool > EnableMemProfContextDisambiguation
Enable MemProf context disambiguation for thin link.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Expected< T > errorOrToExpected(ErrorOr< T > &&EO)
Convert an ErrorOr<T> to an Expected<T>.
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Function * CloneFunction(Function *F, ValueToValueMapTy &VMap, ClonedCodeInfo *CodeInfo=nullptr)
Return a copy of the specified function and add it to that function's module.
bool mayHaveMemprofSummary(const CallBase *CB)
Returns true if the instruction could have memprof metadata, used to ensure consistency between summa...
Implement std::hash so that hash_code can be used in STL containers.
DOTGraphTraits(bool IsSimple=false)
typename GTraits::NodeRef NodeRef
static std::string getEdgeAttributes(NodeRef, ChildIteratorType ChildIter, GraphType)
const CallsiteContextGraph< DerivedCCG, FuncTy, CallTy > * GraphType
typename GTraits::ChildIteratorType ChildIteratorType
static bool isNodeHidden(NodeRef Node, GraphType)
static std::string getNodeLabel(NodeRef Node, GraphType G)
static std::string getNodeAttributes(NodeRef Node, GraphType)
static NodeRef getNode(const NodePtrTy &P)
static const ContextNode< DerivedCCG, FuncTy, CallTy > * GetCallee(const EdgePtrTy &P)
static ChildIteratorType child_end(NodeRef N)
std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > EdgePtrTy
const CallsiteContextGraph< DerivedCCG, FuncTy, CallTy > * GraphType
const ContextNode< DerivedCCG, FuncTy, CallTy > * NodeRef
static ChildIteratorType child_begin(NodeRef N)
std::unique_ptr< ContextNode< DerivedCCG, FuncTy, CallTy > > NodePtrTy
static NodeRef getEntryNode(GraphType G)
static nodes_iterator nodes_begin(GraphType G)
static nodes_iterator nodes_end(GraphType G)
Summary of memprof metadata on allocations.
SmallVector< uint8_t > Versions
Summary of memprof callsite metadata.
SmallVector< unsigned > Clones
DOTGraphTraits - Template class that can be specialized to customize how graphs are converted to 'dot...
DefaultDOTGraphTraits - This class provides the default implementations of all of the DOTGraphTraits ...
An information struct used to provide DenseMap with the various necessary components for a given valu...
Used in the streaming interface as the general argument type.
typename GraphType::UnknownGraphTypeError NodeRef
Struct that holds a reference to a particular GUID in a global value summary.
ArrayRef< std::unique_ptr< GlobalValueSummary > > getSummaryList() const
GlobalValue::GUID getGUID() const