49#include <unordered_map>
54#define DEBUG_TYPE "memprof-context-disambiguation"
57 "Number of function clones created during whole program analysis");
59 "Number of function clones created during ThinLTO backend");
61 "Number of functions that had clones created during ThinLTO backend");
62STATISTIC(AllocTypeNotCold,
"Number of not cold static allocations (possibly "
63 "cloned) during whole program analysis");
64STATISTIC(AllocTypeCold,
"Number of cold static allocations (possibly cloned) "
65 "during whole program analysis");
67 "Number of not cold static allocations (possibly cloned) during "
69STATISTIC(AllocTypeColdThinBackend,
"Number of cold static allocations "
70 "(possibly cloned) during ThinLTO backend");
72 "Number of original (not cloned) allocations with memprof profiles "
73 "during ThinLTO backend");
75 AllocVersionsThinBackend,
76 "Number of allocation versions (including clones) during ThinLTO backend");
78 "Maximum number of allocation versions created for an original "
79 "allocation during ThinLTO backend");
81 "Number of unclonable ambigous allocations during ThinLTO backend");
83 "Number of edges removed due to mismatched callees (profiled vs IR)");
85 "Number of profiled callees found via tail calls");
87 "Aggregate depth of profiled callees found via tail calls");
89 "Maximum depth of profiled callees found via tail calls");
91 "Number of profiled callees found via multiple tail call chains");
96 cl::desc(
"Specify the path prefix of the MemProf dot files."));
100 cl::desc(
"Export graph to dot files."));
104 cl::desc(
"Dump CallingContextGraph to stdout after each stage."));
108 cl::desc(
"Perform verification checks on CallingContextGraph."));
112 cl::desc(
"Perform frequent verification checks on nodes."));
115 "memprof-import-summary",
116 cl::desc(
"Import summary to use for testing the ThinLTO backend via opt"),
122 cl::desc(
"Max depth to recursively search for missing "
123 "frames through tail calls."));
134 cl::desc(
"Linking with hot/cold operator new interfaces"));
154template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
155class CallsiteContextGraph {
157 CallsiteContextGraph() =
default;
158 CallsiteContextGraph(
const CallsiteContextGraph &) =
default;
159 CallsiteContextGraph(CallsiteContextGraph &&) =
default;
166 void identifyClones();
173 bool assignFunctions();
180 const CallsiteContextGraph &CCG) {
186 const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>;
188 const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>;
190 void exportToDot(std::string Label)
const;
193 struct FuncInfo final
194 :
public std::pair<FuncTy *, unsigned > {
195 using Base = std::pair<FuncTy *, unsigned>;
196 FuncInfo(
const Base &
B) :
Base(
B) {}
197 FuncInfo(FuncTy *
F =
nullptr,
unsigned CloneNo = 0) :
Base(
F, CloneNo) {}
198 explicit operator bool()
const {
return this->first !=
nullptr; }
199 FuncTy *func()
const {
return this->first; }
200 unsigned cloneNo()
const {
return this->second; }
204 struct CallInfo final :
public std::pair<CallTy, unsigned > {
205 using Base = std::pair<CallTy, unsigned>;
207 CallInfo(CallTy Call =
nullptr,
unsigned CloneNo = 0)
209 explicit operator bool()
const {
return (
bool)this->first; }
210 CallTy call()
const {
return this->first; }
211 unsigned cloneNo()
const {
return this->second; }
212 void setCloneNo(
unsigned N) { this->second =
N; }
214 if (!
operator bool()) {
220 OS <<
"\t(clone " << cloneNo() <<
")";
243 bool Recursive =
false;
253 std::vector<CallInfo> MatchingCalls;
266 uint8_t AllocTypes = 0;
270 std::vector<std::shared_ptr<ContextEdge>> CalleeEdges;
274 std::vector<std::shared_ptr<ContextEdge>> CallerEdges;
278 const std::vector<std::shared_ptr<ContextEdge>> *
279 getEdgesWithAllocInfo()
const {
282 if (!CalleeEdges.empty())
284 if (!CallerEdges.empty()) {
297 auto *Edges = getEdgesWithAllocInfo();
301 for (
auto &Edge : *Edges)
302 Count += Edge->getContextIds().size();
304 for (
auto &Edge : *Edges)
305 ContextIds.
insert(Edge->getContextIds().begin(),
306 Edge->getContextIds().end());
312 uint8_t computeAllocType()
const {
313 auto *Edges = getEdgesWithAllocInfo();
315 return (uint8_t)AllocationType::None;
317 (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
318 uint8_t
AllocType = (uint8_t)AllocationType::None;
319 for (
auto &Edge : *Edges) {
330 bool emptyContextIds()
const {
331 auto *Edges = getEdgesWithAllocInfo();
334 for (
auto &Edge : *Edges) {
335 if (!Edge->getContextIds().empty())
342 std::vector<ContextNode *> Clones;
345 ContextNode *CloneOf =
nullptr;
347 ContextNode(
bool IsAllocation) : IsAllocation(IsAllocation),
Call() {}
349 ContextNode(
bool IsAllocation,
CallInfo C)
350 : IsAllocation(IsAllocation),
Call(
C) {}
352 void addClone(ContextNode *Clone) {
354 CloneOf->Clones.push_back(Clone);
355 Clone->CloneOf = CloneOf;
357 Clones.push_back(Clone);
359 Clone->CloneOf =
this;
363 ContextNode *getOrigNode() {
370 unsigned int ContextId);
372 ContextEdge *findEdgeFromCallee(
const ContextNode *Callee);
373 ContextEdge *findEdgeFromCaller(
const ContextNode *Caller);
374 void eraseCalleeEdge(
const ContextEdge *Edge);
375 void eraseCallerEdge(
const ContextEdge *Edge);
379 bool hasCall()
const {
return (
bool)
Call.call(); }
385 bool isRemoved()
const {
386 assert((AllocTypes == (uint8_t)AllocationType::None) ==
388 return AllocTypes == (uint8_t)AllocationType::None;
408 uint8_t AllocTypes = 0;
413 ContextEdge(ContextNode *Callee, ContextNode *Caller, uint8_t
AllocType,
416 ContextIds(
std::
move(ContextIds)) {}
431 void removeNoneTypeCalleeEdges(ContextNode *
Node);
433 recursivelyRemoveNoneTypeCalleeEdges(ContextNode *
Node,
439 template <
class NodeT,
class IteratorT>
440 std::vector<uint64_t>
445 ContextNode *addAllocNode(
CallInfo Call,
const FuncTy *
F);
448 template <
class NodeT,
class IteratorT>
449 void addStackNodesForMIB(ContextNode *AllocNode,
457 void updateStackNodes();
461 void handleCallsitesWithMultipleTargets();
471 std::map<const ContextNode *, const FuncTy *> NodeToCallingFunc;
474 using EdgeIter =
typename std::vector<std::shared_ptr<ContextEdge>>::iterator;
476 using CallContextInfo = std::tuple<CallTy, std::vector<uint64_t>,
485 void assignStackNodesPostOrder(
498 void propagateDuplicateContextIds(
504 void connectNewNode(ContextNode *NewNode, ContextNode *OrigNode,
512 return static_cast<const DerivedCCG *
>(
this)->getStackId(IdOrIndex);
522 calleesMatch(CallTy Call, EdgeIter &EI,
528 bool calleeMatchesFunc(
529 CallTy Call,
const FuncTy *Func,
const FuncTy *CallerFunc,
530 std::vector<std::pair<CallTy, FuncTy *>> &FoundCalleeChain) {
531 return static_cast<DerivedCCG *
>(
this)->calleeMatchesFunc(
532 Call, Func, CallerFunc, FoundCalleeChain);
537 std::vector<uint64_t> getStackIdsWithContextNodesForCall(CallTy Call) {
538 return static_cast<DerivedCCG *
>(
this)->getStackIdsWithContextNodesForCall(
543 uint64_t getLastStackId(CallTy Call) {
544 return static_cast<DerivedCCG *
>(
this)->getLastStackId(Call);
549 AllocType == AllocationType::Cold ? AllocTypeCold++ : AllocTypeNotCold++;
550 static_cast<DerivedCCG *
>(
this)->updateAllocationCall(Call,
AllocType);
555 void updateCall(
CallInfo &CallerCall, FuncInfo CalleeFunc) {
556 static_cast<DerivedCCG *
>(
this)->updateCall(CallerCall, CalleeFunc);
562 FuncInfo cloneFunctionForCallsite(
563 FuncInfo &Func,
CallInfo &Call, std::map<CallInfo, CallInfo> &CallMap,
564 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
565 return static_cast<DerivedCCG *
>(
this)->cloneFunctionForCallsite(
566 Func, Call, CallMap, CallsWithMetadataInFunc, CloneNo);
571 std::string getLabel(
const FuncTy *Func,
const CallTy Call,
572 unsigned CloneNo)
const {
573 return static_cast<const DerivedCCG *
>(
this)->getLabel(Func, Call, CloneNo);
577 ContextNode *getNodeForInst(
const CallInfo &
C);
578 ContextNode *getNodeForAlloc(
const CallInfo &
C);
579 ContextNode *getNodeForStackId(
uint64_t StackId);
602 moveEdgeToNewCalleeClone(
const std::shared_ptr<ContextEdge> &Edge,
603 EdgeIter *CallerEdgeI =
nullptr,
611 void moveEdgeToExistingCalleeClone(
const std::shared_ptr<ContextEdge> &Edge,
612 ContextNode *NewCallee,
613 EdgeIter *CallerEdgeI =
nullptr,
614 bool NewClone =
false,
642 std::vector<std::unique_ptr<ContextNode>> NodeOwner;
648 unsigned int LastContextId = 0;
651template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
653 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode;
654template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
656 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge;
657template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
659 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::FuncInfo;
660template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
662 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::CallInfo;
665class ModuleCallsiteContextGraph
666 :
public CallsiteContextGraph<ModuleCallsiteContextGraph, Function,
669 ModuleCallsiteContextGraph(
674 friend CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
678 bool calleeMatchesFunc(
680 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain);
681 bool findProfiledCalleeThroughTailCalls(
683 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain,
684 bool &FoundMultipleCalleeChains);
686 std::vector<uint64_t> getStackIdsWithContextNodesForCall(
Instruction *Call);
688 void updateCall(
CallInfo &CallerCall, FuncInfo CalleeFunc);
689 CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
691 cloneFunctionForCallsite(FuncInfo &Func,
CallInfo &Call,
692 std::map<CallInfo, CallInfo> &CallMap,
693 std::vector<CallInfo> &CallsWithMetadataInFunc,
696 unsigned CloneNo)
const;
705struct IndexCall :
public PointerUnion<CallsiteInfo *, AllocInfo *> {
707 IndexCall(std::nullptr_t) : IndexCall() {}
712 IndexCall *operator->() {
return this; }
717 if (
auto *AI = llvm::dyn_cast_if_present<AllocInfo *>(getBase())) {
720 auto *CI = llvm::dyn_cast_if_present<CallsiteInfo *>(getBase());
728class IndexCallsiteContextGraph
729 :
public CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
732 IndexCallsiteContextGraph(
737 ~IndexCallsiteContextGraph() {
742 for (
auto &
I : FunctionCalleesToSynthesizedCallsiteInfos) {
744 for (
auto &Callsite :
I.second)
745 FS->addCallsite(*Callsite.second);
750 friend CallsiteContextGraph<IndexCallsiteContextGraph,
FunctionSummary,
754 bool calleeMatchesFunc(
757 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain);
758 bool findProfiledCalleeThroughTailCalls(
760 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain,
761 bool &FoundMultipleCalleeChains);
762 uint64_t getLastStackId(IndexCall &Call);
763 std::vector<uint64_t> getStackIdsWithContextNodesForCall(IndexCall &Call);
765 void updateCall(
CallInfo &CallerCall, FuncInfo CalleeFunc);
768 cloneFunctionForCallsite(FuncInfo &Func,
CallInfo &Call,
769 std::map<CallInfo, CallInfo> &CallMap,
770 std::vector<CallInfo> &CallsWithMetadataInFunc,
772 std::string getLabel(
const FunctionSummary *Func,
const IndexCall &Call,
773 unsigned CloneNo)
const;
777 std::map<const FunctionSummary *, ValueInfo> FSToVIMap;
788 std::map<ValueInfo, std::unique_ptr<CallsiteInfo>>>
789 FunctionCalleesToSynthesizedCallsiteInfos;
801 :
public DenseMapInfo<std::pair<IndexCall, unsigned>> {};
804 :
public DenseMapInfo<PointerUnion<CallsiteInfo *, AllocInfo *>> {};
809struct FieldSeparator {
813 FieldSeparator(
const char *Sep =
", ") : Sep(Sep) {}
830 assert(AllocTypes != (uint8_t)AllocationType::None);
832 ((uint8_t)AllocationType::NotCold | (uint8_t)AllocationType::Cold))
833 return AllocationType::NotCold;
841template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
843 const std::vector<uint8_t> &InAllocTypes,
844 const std::vector<std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>>
847 InAllocTypes.begin(), InAllocTypes.end(), Edges.begin(),
849 const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &r) {
853 if (l == (uint8_t)AllocationType::None ||
854 r->AllocTypes == (uint8_t)AllocationType::None)
856 return allocTypeToUse(l) == allocTypeToUse(r->AllocTypes);
862template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
863typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
864CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForInst(
866 ContextNode *
Node = getNodeForAlloc(
C);
870 return NonAllocationCallToContextNodeMap.lookup(
C);
873template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
874typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
875CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForAlloc(
877 return AllocationCallToContextNodeMap.lookup(
C);
880template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
881typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
882CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForStackId(
884 auto StackEntryNode = StackEntryIdToContextNodeMap.find(StackId);
885 if (StackEntryNode != StackEntryIdToContextNodeMap.end())
886 return StackEntryNode->second;
890template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
891void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
893 unsigned int ContextId) {
894 for (
auto &Edge : CallerEdges) {
895 if (Edge->Caller == Caller) {
897 Edge->getContextIds().insert(ContextId);
901 std::shared_ptr<ContextEdge> Edge = std::make_shared<ContextEdge>(
903 CallerEdges.push_back(Edge);
904 Caller->CalleeEdges.push_back(Edge);
907template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
908void CallsiteContextGraph<
909 DerivedCCG, FuncTy, CallTy>::removeNoneTypeCalleeEdges(ContextNode *
Node) {
910 for (
auto EI =
Node->CalleeEdges.begin(); EI !=
Node->CalleeEdges.end();) {
912 if (Edge->AllocTypes == (uint8_t)AllocationType::None) {
913 assert(Edge->ContextIds.empty());
914 Edge->Callee->eraseCallerEdge(Edge.get());
915 EI =
Node->CalleeEdges.erase(EI);
921template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
922typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge *
923CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
924 findEdgeFromCallee(
const ContextNode *Callee) {
925 for (
const auto &Edge : CalleeEdges)
926 if (Edge->Callee == Callee)
931template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
932typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge *
933CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
934 findEdgeFromCaller(
const ContextNode *Caller) {
935 for (
const auto &Edge : CallerEdges)
936 if (Edge->Caller == Caller)
941template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
942void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
943 eraseCalleeEdge(
const ContextEdge *Edge) {
945 CalleeEdges, [Edge](
const std::shared_ptr<ContextEdge> &CalleeEdge) {
946 return CalleeEdge.get() == Edge;
948 assert(EI != CalleeEdges.end());
949 CalleeEdges.erase(EI);
952template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
953void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
954 eraseCallerEdge(
const ContextEdge *Edge) {
956 CallerEdges, [Edge](
const std::shared_ptr<ContextEdge> &CallerEdge) {
957 return CallerEdge.get() == Edge;
959 assert(EI != CallerEdges.end());
960 CallerEdges.erase(EI);
963template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
964uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::computeAllocType(
967 (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
968 uint8_t
AllocType = (uint8_t)AllocationType::None;
969 for (
auto Id : ContextIds) {
970 AllocType |= (uint8_t)ContextIdToAllocationType[Id];
978template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
980CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypesImpl(
983 (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
984 uint8_t
AllocType = (uint8_t)AllocationType::None;
985 for (
auto Id : Node1Ids) {
986 if (!Node2Ids.
count(Id))
988 AllocType |= (uint8_t)ContextIdToAllocationType[Id];
996template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
997uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypes(
999 if (Node1Ids.
size() < Node2Ids.
size())
1000 return intersectAllocTypesImpl(Node1Ids, Node2Ids);
1002 return intersectAllocTypesImpl(Node2Ids, Node1Ids);
1005template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1006typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1007CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addAllocNode(
1009 assert(!getNodeForAlloc(Call));
1010 NodeOwner.push_back(
1011 std::make_unique<ContextNode>(
true, Call));
1012 ContextNode *AllocNode = NodeOwner.back().get();
1013 AllocationCallToContextNodeMap[
Call] = AllocNode;
1014 NodeToCallingFunc[AllocNode] =
F;
1016 AllocNode->OrigStackOrAllocId = LastContextId;
1019 AllocNode->AllocTypes = (uint8_t)AllocationType::None;
1028 if (AllocTypes & (uint8_t)AllocationType::NotCold)
1030 if (AllocTypes & (uint8_t)AllocationType::Cold)
1035template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1036template <
class NodeT,
class IteratorT>
1037void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addStackNodesForMIB(
1047 ContextIdToAllocationType[++LastContextId] =
AllocType;
1051 ContextIdToTotalSize[LastContextId] = TotalSize;
1055 AllocNode->AllocTypes |= (uint8_t)
AllocType;
1060 ContextNode *PrevNode = AllocNode;
1067 ContextIter != StackContext.
end(); ++ContextIter) {
1068 auto StackId = getStackId(*ContextIter);
1069 ContextNode *
StackNode = getNodeForStackId(StackId);
1071 NodeOwner.push_back(
1072 std::make_unique<ContextNode>(
false));
1074 StackEntryIdToContextNodeMap[StackId] =
StackNode;
1075 StackNode->OrigStackOrAllocId = StackId;
1086template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1088CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::duplicateContextIds(
1092 for (
auto OldId : StackSequenceContextIds) {
1093 NewContextIds.
insert(++LastContextId);
1094 OldToNewContextIds[OldId].insert(LastContextId);
1095 assert(ContextIdToAllocationType.count(OldId));
1097 ContextIdToAllocationType[LastContextId] = ContextIdToAllocationType[OldId];
1101 ContextIdToTotalSize[LastContextId] = 0;
1103 return NewContextIds;
1106template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1107void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
1108 propagateDuplicateContextIds(
1113 for (
auto Id : ContextIds)
1114 if (
auto NewId = OldToNewContextIds.find(Id);
1115 NewId != OldToNewContextIds.end())
1116 NewIds.
insert(NewId->second.begin(), NewId->second.end());
1121 auto UpdateCallers = [&](ContextNode *
Node,
1123 auto &&UpdateCallers) ->
void {
1124 for (
const auto &Edge :
Node->CallerEdges) {
1125 auto Inserted = Visited.insert(Edge.get());
1128 ContextNode *NextNode = Edge->Caller;
1132 if (!NewIdsToAdd.
empty()) {
1133 Edge->getContextIds().insert(NewIdsToAdd.
begin(), NewIdsToAdd.
end());
1134 UpdateCallers(NextNode, Visited, UpdateCallers);
1140 for (
auto &Entry : AllocationCallToContextNodeMap) {
1142 UpdateCallers(
Node, Visited, UpdateCallers);
1146template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1147void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::connectNewNode(
1148 ContextNode *NewNode, ContextNode *OrigNode,
bool TowardsCallee,
1153 TowardsCallee ? OrigNode->CalleeEdges : OrigNode->CallerEdges;
1155 for (
auto EI = OrigEdges.begin(); EI != OrigEdges.end();) {
1161 set_subtract(Edge->getContextIds(), RemainingContextIds, NewEdgeContextIds,
1162 NotFoundContextIds);
1163 RemainingContextIds.
swap(NotFoundContextIds);
1165 if (NewEdgeContextIds.
empty()) {
1169 if (TowardsCallee) {
1170 uint8_t NewAllocType = computeAllocType(NewEdgeContextIds);
1171 auto NewEdge = std::make_shared<ContextEdge>(
1172 Edge->Callee, NewNode, NewAllocType, std::move(NewEdgeContextIds));
1173 NewNode->CalleeEdges.push_back(NewEdge);
1174 NewEdge->Callee->CallerEdges.push_back(NewEdge);
1176 uint8_t NewAllocType = computeAllocType(NewEdgeContextIds);
1177 auto NewEdge = std::make_shared<ContextEdge>(
1178 NewNode, Edge->Caller, NewAllocType, std::move(NewEdgeContextIds));
1179 NewNode->CallerEdges.push_back(NewEdge);
1180 NewEdge->Caller->CalleeEdges.push_back(NewEdge);
1183 if (Edge->getContextIds().empty()) {
1184 if (TowardsCallee) {
1185 Edge->Callee->eraseCallerEdge(Edge.get());
1186 EI = OrigNode->CalleeEdges.erase(EI);
1188 Edge->Caller->eraseCalleeEdge(Edge.get());
1189 EI = OrigNode->CallerEdges.erase(EI);
1197template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1199 const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &Edge) {
1202 assert(Edge->AllocTypes != (uint8_t)AllocationType::None);
1203 assert(!Edge->ContextIds.empty());
1206template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1208 bool CheckEdges =
true) {
1209 if (
Node->isRemoved())
1213 auto NodeContextIds =
Node->getContextIds();
1217 if (
Node->CallerEdges.size()) {
1219 Node->CallerEdges.front()->ContextIds);
1222 checkEdge<DerivedCCG, FuncTy, CallTy>(Edge);
1223 set_union(CallerEdgeContextIds, Edge->ContextIds);
1227 assert(NodeContextIds == CallerEdgeContextIds ||
1230 if (
Node->CalleeEdges.size()) {
1232 Node->CalleeEdges.front()->ContextIds);
1235 checkEdge<DerivedCCG, FuncTy, CallTy>(Edge);
1236 set_union(CalleeEdgeContextIds, Edge->getContextIds());
1238 assert(NodeContextIds == CalleeEdgeContextIds);
1242template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1243void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
1244 assignStackNodesPostOrder(
1247 &StackIdToMatchingCalls,
1256 auto CallerEdges =
Node->CallerEdges;
1257 for (
auto &Edge : CallerEdges) {
1261 assignStackNodesPostOrder(Edge->Caller, Visited, StackIdToMatchingCalls,
1262 CallToMatchingCall);
1271 if (
Node->IsAllocation ||
1272 !StackIdToMatchingCalls.count(
Node->OrigStackOrAllocId))
1275 auto &Calls = StackIdToMatchingCalls[
Node->OrigStackOrAllocId];
1279 if (Calls.size() == 1) {
1280 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[0];
1281 if (Ids.size() == 1) {
1282 assert(SavedContextIds.empty());
1285 if (
Node->Recursive)
1287 Node->setCall(Call);
1288 NonAllocationCallToContextNodeMap[
Call] =
Node;
1297 ContextNode *LastNode = getNodeForStackId(LastId);
1301 for (
unsigned I = 0;
I < Calls.size();
I++) {
1302 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[
I];
1305 if (SavedContextIds.empty()) {
1310 if (!CallToMatchingCall.
contains(Call))
1312 auto MatchingCall = CallToMatchingCall[
Call];
1313 assert(NonAllocationCallToContextNodeMap.contains(MatchingCall));
1314 NonAllocationCallToContextNodeMap[MatchingCall]->MatchingCalls.push_back(
1319 assert(LastId == Ids.back());
1321 ContextNode *FirstNode = getNodeForStackId(Ids[0]);
1331 ContextNode *PrevNode =
nullptr;
1332 for (
auto Id : Ids) {
1333 ContextNode *CurNode = getNodeForStackId(Id);
1337 assert(!CurNode->Recursive);
1342 auto *Edge = CurNode->findEdgeFromCallee(PrevNode);
1344 SavedContextIds.clear();
1351 if (SavedContextIds.empty())
1354 if (SavedContextIds.empty())
1358 NodeOwner.push_back(
1359 std::make_unique<ContextNode>(
false, Call));
1360 ContextNode *NewNode = NodeOwner.back().get();
1361 NodeToCallingFunc[NewNode] =
Func;
1362 NonAllocationCallToContextNodeMap[
Call] = NewNode;
1363 NewNode->AllocTypes = computeAllocType(SavedContextIds);
1368 connectNewNode(NewNode, FirstNode,
true, SavedContextIds);
1373 connectNewNode(NewNode, LastNode,
false, SavedContextIds);
1378 for (
auto Id : Ids) {
1379 ContextNode *CurNode = getNodeForStackId(Id);
1386 auto *PrevEdge = CurNode->findEdgeFromCallee(PrevNode);
1388 set_subtract(PrevEdge->getContextIds(), SavedContextIds);
1389 if (PrevEdge->getContextIds().empty()) {
1390 PrevNode->eraseCallerEdge(PrevEdge);
1391 CurNode->eraseCalleeEdge(PrevEdge);
1397 CurNode->AllocTypes = CurNode->CalleeEdges.empty()
1398 ? (uint8_t)AllocationType::None
1399 : CurNode->computeAllocType();
1403 checkNode<DerivedCCG, FuncTy, CallTy>(NewNode,
true);
1404 for (
auto Id : Ids) {
1405 ContextNode *CurNode = getNodeForStackId(Id);
1408 checkNode<DerivedCCG, FuncTy, CallTy>(CurNode,
true);
1414template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1415void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
1424 for (
auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) {
1425 for (
auto &Call : CallsWithMetadata) {
1427 if (AllocationCallToContextNodeMap.count(Call))
1429 auto StackIdsWithContextNodes =
1430 getStackIdsWithContextNodesForCall(
Call.call());
1433 if (StackIdsWithContextNodes.empty())
1437 StackIdToMatchingCalls[StackIdsWithContextNodes.back()].push_back(
1438 {
Call.call(), StackIdsWithContextNodes, Func, {}});
1453 for (
auto &It : StackIdToMatchingCalls) {
1454 auto &Calls = It.getSecond();
1456 if (Calls.size() == 1) {
1457 auto &Ids = std::get<1>(Calls[0]);
1458 if (Ids.size() == 1)
1467 std::stable_sort(Calls.begin(), Calls.end(),
1468 [](
const CallContextInfo &
A,
const CallContextInfo &
B) {
1469 auto &IdsA = std::get<1>(A);
1470 auto &IdsB = std::get<1>(B);
1471 return IdsA.size() > IdsB.size() ||
1472 (IdsA.size() == IdsB.size() && IdsA < IdsB);
1479 ContextNode *LastNode = getNodeForStackId(LastId);
1483 if (LastNode->Recursive)
1498 for (
unsigned I = 0;
I < Calls.size();
I++) {
1499 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[
I];
1500 assert(SavedContextIds.empty());
1501 assert(LastId == Ids.back());
1509 ContextNode *PrevNode = LastNode;
1510 ContextNode *CurNode = LastNode;
1515 for (
auto IdIter = Ids.rbegin() + 1; IdIter != Ids.rend(); IdIter++) {
1517 CurNode = getNodeForStackId(Id);
1521 if (CurNode->Recursive) {
1526 auto *Edge = CurNode->findEdgeFromCaller(PrevNode);
1544 set_intersect(StackSequenceContextIds, Edge->getContextIds());
1547 if (StackSequenceContextIds.
empty()) {
1560 if (Ids.back() != getLastStackId(Call)) {
1561 for (
const auto &PE : LastNode->CallerEdges) {
1562 set_subtract(StackSequenceContextIds, PE->getContextIds());
1563 if (StackSequenceContextIds.
empty())
1567 if (StackSequenceContextIds.
empty())
1571 const FuncTy *CallFunc = CallToFunc[
Call];
1576 if (FuncToCallMap.
contains(CallFunc)) {
1579 CallToMatchingCall[
Call] = FuncToCallMap[CallFunc];
1585 bool DuplicateContextIds =
false;
1586 if (
I + 1 < Calls.size()) {
1587 auto NextIds = std::get<1>(Calls[
I + 1]);
1588 DuplicateContextIds = Ids == NextIds;
1597 OldToNewContextIds.
reserve(OldToNewContextIds.
size() +
1598 StackSequenceContextIds.
size());
1601 ? duplicateContextIds(StackSequenceContextIds, OldToNewContextIds)
1602 : StackSequenceContextIds;
1603 assert(!SavedContextIds.empty());
1605 if (!DuplicateContextIds) {
1609 set_subtract(LastNodeContextIds, StackSequenceContextIds);
1610 if (LastNodeContextIds.
empty())
1614 FuncToCallMap.
clear();
1619 FuncToCallMap[CallFunc] =
Call;
1624 propagateDuplicateContextIds(OldToNewContextIds);
1635 for (
auto &Entry : AllocationCallToContextNodeMap)
1636 assignStackNodesPostOrder(
Entry.second, Visited, StackIdToMatchingCalls,
1637 CallToMatchingCall);
1644 Call->getMetadata(LLVMContext::MD_callsite));
1645 return CallsiteContext.
back();
1648uint64_t IndexCallsiteContextGraph::getLastStackId(IndexCall &Call) {
1651 CallsiteContext(dyn_cast_if_present<CallsiteInfo *>(
Call.getBase()));
1653 return Index.getStackIdAtIndex(CallsiteContext.
back());
1666std::string ModuleCallsiteContextGraph::getLabel(
const Function *Func,
1668 unsigned CloneNo)
const {
1669 return (
Twine(
Call->getFunction()->getName()) +
" -> " +
1670 cast<CallBase>(Call)->getCalledFunction()->getName())
1674std::string IndexCallsiteContextGraph::getLabel(
const FunctionSummary *Func,
1675 const IndexCall &Call,
1676 unsigned CloneNo)
const {
1677 auto VI = FSToVIMap.find(Func);
1678 assert(VI != FSToVIMap.end());
1679 if (isa<AllocInfo *>(
Call.getBase()))
1680 return (
VI->second.name() +
" -> alloc").str();
1682 auto *Callsite = dyn_cast_if_present<CallsiteInfo *>(
Call.getBase());
1683 return (
VI->second.name() +
" -> " +
1685 Callsite->Clones[CloneNo]))
1690std::vector<uint64_t>
1691ModuleCallsiteContextGraph::getStackIdsWithContextNodesForCall(
1694 Call->getMetadata(LLVMContext::MD_callsite));
1695 return getStackIdsWithContextNodes<MDNode, MDNode::op_iterator>(
1699std::vector<uint64_t>
1700IndexCallsiteContextGraph::getStackIdsWithContextNodesForCall(IndexCall &Call) {
1703 CallsiteContext(dyn_cast_if_present<CallsiteInfo *>(
Call.getBase()));
1709template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1710template <
class NodeT,
class IteratorT>
1711std::vector<uint64_t>
1712CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getStackIdsWithContextNodes(
1714 std::vector<uint64_t> StackIds;
1715 for (
auto IdOrIndex : CallsiteContext) {
1716 auto StackId = getStackId(IdOrIndex);
1717 ContextNode *
Node = getNodeForStackId(StackId);
1720 StackIds.push_back(StackId);
1725ModuleCallsiteContextGraph::ModuleCallsiteContextGraph(
1728 :
Mod(
M), OREGetter(OREGetter) {
1730 std::vector<CallInfo> CallsWithMetadata;
1731 for (
auto &BB :
F) {
1732 for (
auto &
I : BB) {
1733 if (!isa<CallBase>(
I))
1735 if (
auto *MemProfMD =
I.getMetadata(LLVMContext::MD_memprof)) {
1736 CallsWithMetadata.push_back(&
I);
1737 CallToFunc[&
I] = &
F;
1738 auto *AllocNode = addAllocNode(&
I, &
F);
1739 auto *CallsiteMD =
I.getMetadata(LLVMContext::MD_callsite);
1743 for (
auto &MDOp : MemProfMD->operands()) {
1744 auto *MIBMD = cast<const MDNode>(MDOp);
1748 addStackNodesForMIB<MDNode, MDNode::op_iterator>(
1749 AllocNode, StackContext, CallsiteContext,
1752 assert(AllocNode->AllocTypes != (uint8_t)AllocationType::None);
1755 I.setMetadata(LLVMContext::MD_memprof,
nullptr);
1756 I.setMetadata(LLVMContext::MD_callsite,
nullptr);
1759 else if (
I.getMetadata(LLVMContext::MD_callsite)) {
1760 CallsWithMetadata.push_back(&
I);
1761 CallToFunc[&
I] = &
F;
1765 if (!CallsWithMetadata.empty())
1766 FuncToCallsWithMetadata[&
F] = CallsWithMetadata;
1770 dbgs() <<
"CCG before updating call stack chains:\n";
1775 exportToDot(
"prestackupdate");
1779 handleCallsitesWithMultipleTargets();
1782 for (
auto &FuncEntry : FuncToCallsWithMetadata)
1783 for (
auto &Call : FuncEntry.second)
1784 Call.call()->setMetadata(LLVMContext::MD_callsite,
nullptr);
1787IndexCallsiteContextGraph::IndexCallsiteContextGraph(
1794 for (
auto &S :
VI.getSummaryList()) {
1803 !isPrevailing(
VI.getGUID(), S.get()))
1805 auto *
FS = dyn_cast<FunctionSummary>(S.get());
1808 std::vector<CallInfo> CallsWithMetadata;
1809 if (!
FS->allocs().empty()) {
1810 for (
auto &AN :
FS->mutableAllocs()) {
1815 if (AN.MIBs.empty())
1817 IndexCall AllocCall(&AN);
1818 CallsWithMetadata.push_back(AllocCall);
1819 CallToFunc[AllocCall] =
FS;
1820 auto *AllocNode = addAllocNode(AllocCall, FS);
1828 AN.TotalSizes.size() == AN.MIBs.size());
1830 for (
auto &MIB : AN.MIBs) {
1835 TotalSize = AN.TotalSizes[
I];
1836 addStackNodesForMIB<MIBInfo, SmallVector<unsigned>::const_iterator>(
1837 AllocNode, StackContext, EmptyContext, MIB.AllocType,
1841 assert(AllocNode->AllocTypes != (uint8_t)AllocationType::None);
1846 AN.Versions[0] = (uint8_t)allocTypeToUse(AllocNode->AllocTypes);
1850 if (!
FS->callsites().empty())
1851 for (
auto &SN :
FS->mutableCallsites()) {
1852 IndexCall StackNodeCall(&SN);
1853 CallsWithMetadata.push_back(StackNodeCall);
1854 CallToFunc[StackNodeCall] =
FS;
1857 if (!CallsWithMetadata.empty())
1858 FuncToCallsWithMetadata[
FS] = CallsWithMetadata;
1860 if (!
FS->allocs().empty() || !
FS->callsites().empty())
1866 dbgs() <<
"CCG before updating call stack chains:\n";
1871 exportToDot(
"prestackupdate");
1875 handleCallsitesWithMultipleTargets();
1878template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1879void CallsiteContextGraph<DerivedCCG, FuncTy,
1880 CallTy>::handleCallsitesWithMultipleTargets() {
1895 for (
auto &Entry : NonAllocationCallToContextNodeMap) {
1900 for (
auto EI =
Node->CalleeEdges.begin(); EI !=
Node->CalleeEdges.end();
1903 if (!Edge->Callee->hasCall())
1905 assert(NodeToCallingFunc.count(Edge->Callee));
1907 if (calleesMatch(Call, EI, TailCallToContextNodeMap))
1909 RemovedEdgesWithMismatchedCallees++;
1922 NonAllocationCallToContextNodeMap.remove_if(
1923 [](
const auto &it) {
return !it.second->hasCall(); });
1927 for (
auto &[Call,
Node] : TailCallToContextNodeMap)
1928 NonAllocationCallToContextNodeMap[
Call] =
Node;
1939 return Index.getStackIdAtIndex(IdOrIndex);
1942template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1943bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::calleesMatch(
1944 CallTy Call, EdgeIter &EI,
1947 const FuncTy *ProfiledCalleeFunc = NodeToCallingFunc[Edge->Callee];
1948 const FuncTy *CallerFunc = NodeToCallingFunc[Edge->Caller];
1951 std::vector<std::pair<CallTy, FuncTy *>> FoundCalleeChain;
1952 if (!calleeMatchesFunc(Call, ProfiledCalleeFunc, CallerFunc,
1957 if (FoundCalleeChain.empty())
1960 auto AddEdge = [Edge, &EI](ContextNode *
Caller, ContextNode *
Callee) {
1961 auto *CurEdge =
Callee->findEdgeFromCaller(Caller);
1965 CurEdge->ContextIds.insert(Edge->ContextIds.begin(),
1966 Edge->ContextIds.end());
1967 CurEdge->AllocTypes |= Edge->AllocTypes;
1972 auto NewEdge = std::make_shared<ContextEdge>(
1973 Callee, Caller, Edge->AllocTypes, Edge->ContextIds);
1974 Callee->CallerEdges.push_back(NewEdge);
1975 if (Caller == Edge->Caller) {
1979 EI =
Caller->CalleeEdges.insert(EI, NewEdge);
1982 "Iterator position not restored after insert and increment");
1984 Caller->CalleeEdges.push_back(NewEdge);
1989 auto *CurCalleeNode = Edge->Callee;
1990 for (
auto &[NewCall, Func] : FoundCalleeChain) {
1991 ContextNode *NewNode =
nullptr;
1993 if (TailCallToContextNodeMap.
count(NewCall)) {
1994 NewNode = TailCallToContextNodeMap[NewCall];
1995 NewNode->AllocTypes |= Edge->AllocTypes;
1997 FuncToCallsWithMetadata[
Func].push_back({NewCall});
1999 NodeOwner.push_back(
2000 std::make_unique<ContextNode>(
false, NewCall));
2001 NewNode = NodeOwner.
back().get();
2002 NodeToCallingFunc[NewNode] =
Func;
2003 TailCallToContextNodeMap[NewCall] = NewNode;
2004 NewNode->AllocTypes = Edge->AllocTypes;
2008 AddEdge(NewNode, CurCalleeNode);
2010 CurCalleeNode = NewNode;
2014 AddEdge(Edge->Caller, CurCalleeNode);
2017 Edge->Callee->eraseCallerEdge(Edge.get());
2018 EI = Edge->Caller->CalleeEdges.erase(EI);
2024 assert(!Edge->Caller->CalleeEdges.empty());
2030bool ModuleCallsiteContextGraph::findProfiledCalleeThroughTailCalls(
2032 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain,
2033 bool &FoundMultipleCalleeChains) {
2040 FoundCalleeChain.push_back({Callsite,
F});
2043 auto *CalleeFunc = dyn_cast<Function>(CurCallee);
2045 auto *Alias = dyn_cast<GlobalAlias>(CurCallee);
2047 CalleeFunc = dyn_cast<Function>(Alias->getAliasee());
2055 bool FoundSingleCalleeChain =
false;
2056 for (
auto &BB : *CalleeFunc) {
2057 for (
auto &
I : BB) {
2058 auto *CB = dyn_cast<CallBase>(&
I);
2059 if (!CB || !CB->isTailCall())
2061 auto *CalledValue = CB->getCalledOperand();
2062 auto *CalledFunction = CB->getCalledFunction();
2063 if (CalledValue && !CalledFunction) {
2064 CalledValue = CalledValue->stripPointerCasts();
2066 CalledFunction = dyn_cast<Function>(CalledValue);
2070 if (
auto *GA = dyn_cast<GlobalAlias>(CalledValue)) {
2071 assert(!CalledFunction &&
2072 "Expected null called function in callsite for alias");
2073 CalledFunction = dyn_cast<Function>(GA->getAliaseeObject());
2075 if (!CalledFunction)
2077 if (CalledFunction == ProfiledCallee) {
2078 if (FoundSingleCalleeChain) {
2079 FoundMultipleCalleeChains =
true;
2082 FoundSingleCalleeChain =
true;
2083 FoundProfiledCalleeCount++;
2084 FoundProfiledCalleeDepth +=
Depth;
2085 if (
Depth > FoundProfiledCalleeMaxDepth)
2086 FoundProfiledCalleeMaxDepth =
Depth;
2087 SaveCallsiteInfo(&
I, CalleeFunc);
2088 }
else if (findProfiledCalleeThroughTailCalls(
2089 ProfiledCallee, CalledFunction,
Depth + 1,
2090 FoundCalleeChain, FoundMultipleCalleeChains)) {
2093 assert(!FoundMultipleCalleeChains);
2094 if (FoundSingleCalleeChain) {
2095 FoundMultipleCalleeChains =
true;
2098 FoundSingleCalleeChain =
true;
2099 SaveCallsiteInfo(&
I, CalleeFunc);
2100 }
else if (FoundMultipleCalleeChains)
2105 return FoundSingleCalleeChain;
2108bool ModuleCallsiteContextGraph::calleeMatchesFunc(
2110 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain) {
2111 auto *CB = dyn_cast<CallBase>(Call);
2112 if (!CB->getCalledOperand())
2114 auto *CalleeVal = CB->getCalledOperand()->stripPointerCasts();
2115 auto *CalleeFunc = dyn_cast<Function>(CalleeVal);
2116 if (CalleeFunc == Func)
2118 auto *Alias = dyn_cast<GlobalAlias>(CalleeVal);
2119 if (Alias && Alias->getAliasee() == Func)
2130 bool FoundMultipleCalleeChains =
false;
2131 if (!findProfiledCalleeThroughTailCalls(Func, CalleeVal,
Depth,
2133 FoundMultipleCalleeChains)) {
2134 LLVM_DEBUG(
dbgs() <<
"Not found through unique tail call chain: "
2135 <<
Func->getName() <<
" from " << CallerFunc->
getName()
2136 <<
" that actually called " << CalleeVal->getName()
2137 << (FoundMultipleCalleeChains
2138 ?
" (found multiple possible chains)"
2141 if (FoundMultipleCalleeChains)
2142 FoundProfiledCalleeNonUniquelyCount++;
2149bool IndexCallsiteContextGraph::findProfiledCalleeThroughTailCalls(
2151 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain,
2152 bool &FoundMultipleCalleeChains) {
2161 if (!FunctionCalleesToSynthesizedCallsiteInfos.count(FS) ||
2162 !FunctionCalleesToSynthesizedCallsiteInfos[
FS].count(Callee))
2165 FunctionCalleesToSynthesizedCallsiteInfos[FS][Callee] =
2168 FunctionCalleesToSynthesizedCallsiteInfos[
FS][
Callee].get();
2169 FoundCalleeChain.push_back({NewCallsiteInfo,
FS});
2176 bool FoundSingleCalleeChain =
false;
2179 !isPrevailing(CurCallee.
getGUID(), S.get()))
2181 auto *
FS = dyn_cast<FunctionSummary>(S->getBaseObject());
2184 auto FSVI = CurCallee;
2185 auto *AS = dyn_cast<AliasSummary>(S.get());
2187 FSVI = AS->getAliaseeVI();
2188 for (
auto &CallEdge :
FS->calls()) {
2189 if (!CallEdge.second.hasTailCall())
2191 if (CallEdge.first == ProfiledCallee) {
2192 if (FoundSingleCalleeChain) {
2193 FoundMultipleCalleeChains =
true;
2196 FoundSingleCalleeChain =
true;
2197 FoundProfiledCalleeCount++;
2198 FoundProfiledCalleeDepth +=
Depth;
2199 if (
Depth > FoundProfiledCalleeMaxDepth)
2200 FoundProfiledCalleeMaxDepth =
Depth;
2201 CreateAndSaveCallsiteInfo(CallEdge.first, FS);
2203 assert(!FSToVIMap.count(FS) || FSToVIMap[FS] == FSVI);
2204 FSToVIMap[
FS] = FSVI;
2205 }
else if (findProfiledCalleeThroughTailCalls(
2206 ProfiledCallee, CallEdge.first,
Depth + 1,
2207 FoundCalleeChain, FoundMultipleCalleeChains)) {
2210 assert(!FoundMultipleCalleeChains);
2211 if (FoundSingleCalleeChain) {
2212 FoundMultipleCalleeChains =
true;
2215 FoundSingleCalleeChain =
true;
2216 CreateAndSaveCallsiteInfo(CallEdge.first, FS);
2218 assert(!FSToVIMap.count(FS) || FSToVIMap[FS] == FSVI);
2219 FSToVIMap[
FS] = FSVI;
2220 }
else if (FoundMultipleCalleeChains)
2225 return FoundSingleCalleeChain;
2228bool IndexCallsiteContextGraph::calleeMatchesFunc(
2231 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain) {
2233 dyn_cast_if_present<CallsiteInfo *>(
Call.getBase())->Callee;
2237 Callee.getSummaryList().empty()
2239 : dyn_cast<AliasSummary>(
Callee.getSummaryList()[0].get());
2240 assert(FSToVIMap.count(Func));
2241 auto FuncVI = FSToVIMap[
Func];
2242 if (Callee == FuncVI ||
2257 bool FoundMultipleCalleeChains =
false;
2258 if (!findProfiledCalleeThroughTailCalls(
2259 FuncVI, Callee,
Depth, FoundCalleeChain, FoundMultipleCalleeChains)) {
2260 LLVM_DEBUG(
dbgs() <<
"Not found through unique tail call chain: " << FuncVI
2261 <<
" from " << FSToVIMap[CallerFunc]
2262 <<
" that actually called " << Callee
2263 << (FoundMultipleCalleeChains
2264 ?
" (found multiple possible chains)"
2267 if (FoundMultipleCalleeChains)
2268 FoundProfiledCalleeNonUniquelyCount++;
2275template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2276void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::dump()
2282template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2283void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::print(
2285 OS <<
"Node " <<
this <<
"\n";
2289 OS <<
" (recursive)";
2291 if (!MatchingCalls.empty()) {
2292 OS <<
"\tMatchingCalls:\n";
2293 for (
auto &MatchingCall : MatchingCalls) {
2295 MatchingCall.print(
OS);
2300 OS <<
"\tContextIds:";
2302 auto ContextIds = getContextIds();
2303 std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
2304 std::sort(SortedIds.begin(), SortedIds.end());
2305 for (
auto Id : SortedIds)
2308 OS <<
"\tCalleeEdges:\n";
2309 for (
auto &Edge : CalleeEdges)
2310 OS <<
"\t\t" << *Edge <<
"\n";
2311 OS <<
"\tCallerEdges:\n";
2312 for (
auto &Edge : CallerEdges)
2313 OS <<
"\t\t" << *Edge <<
"\n";
2314 if (!Clones.empty()) {
2317 for (
auto *Clone : Clones)
2320 }
else if (CloneOf) {
2321 OS <<
"\tClone of " << CloneOf <<
"\n";
2325template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2326void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::dump()
2332template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2333void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::print(
2337 OS <<
" ContextIds:";
2338 std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
2339 std::sort(SortedIds.begin(), SortedIds.end());
2340 for (
auto Id : SortedIds)
2344template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2345void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::dump()
const {
2349template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2350void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::print(
2352 OS <<
"Callsite Context Graph:\n";
2353 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
2354 for (
const auto Node : nodes<GraphType>(
this)) {
2355 if (
Node->isRemoved())
2362template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2363void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::printTotalSizes(
2365 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
2366 for (
const auto Node : nodes<GraphType>(
this)) {
2367 if (
Node->isRemoved())
2369 if (!
Node->IsAllocation)
2372 std::vector<uint32_t> SortedIds(ContextIds.
begin(), ContextIds.
end());
2373 std::sort(SortedIds.begin(), SortedIds.end());
2374 for (
auto Id : SortedIds) {
2375 auto SizeI = ContextIdToTotalSize.find(Id);
2376 assert(SizeI != ContextIdToTotalSize.end());
2377 auto TypeI = ContextIdToAllocationType.find(Id);
2378 assert(TypeI != ContextIdToAllocationType.end());
2380 <<
" with total size " << SizeI->second <<
" is "
2386template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2387void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::check()
const {
2388 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
2389 for (
const auto Node : nodes<GraphType>(
this)) {
2390 checkNode<DerivedCCG, FuncTy, CallTy>(
Node,
false);
2391 for (
auto &Edge :
Node->CallerEdges)
2392 checkEdge<DerivedCCG, FuncTy, CallTy>(Edge);
2396template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2398 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
2399 using NodeRef =
const ContextNode<DerivedCCG, FuncTy, CallTy> *;
2401 using NodePtrTy = std::unique_ptr<ContextNode<DerivedCCG, FuncTy, CallTy>>;
2417 return G->NodeOwner.begin()->get();
2420 using EdgePtrTy = std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>;
2421 static const ContextNode<DerivedCCG, FuncTy, CallTy> *
2429 decltype(&GetCallee)>;
2440template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2445 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
2451 std::string LabelString =
2452 (
Twine(
"OrigId: ") + (Node->IsAllocation ?
"Alloc" :
"") +
2453 Twine(Node->OrigStackOrAllocId))
2455 LabelString +=
"\n";
2456 if (Node->hasCall()) {
2457 auto Func =
G->NodeToCallingFunc.find(Node);
2458 assert(Func !=
G->NodeToCallingFunc.end());
2460 G->getLabel(Func->second, Node->Call.call(), Node->Call.cloneNo());
2462 LabelString +=
"null call";
2463 if (Node->Recursive)
2464 LabelString +=
" (recursive)";
2466 LabelString +=
" (external)";
2473 getContextIds(Node->getContextIds()) +
"\"")
2476 (
Twine(
",fillcolor=\"") + getColor(Node->AllocTypes) +
"\"").str();
2478 if (Node->CloneOf) {
2488 auto &Edge = *(ChildIter.getCurrent());
2489 return (
Twine(
"tooltip=\"") + getContextIds(Edge->ContextIds) +
"\"" +
2490 Twine(
",fillcolor=\"") + getColor(Edge->AllocTypes) +
"\"")
2497 return Node->isRemoved();
2502 std::string IdString =
"ContextIds:";
2503 if (ContextIds.
size() < 100) {
2504 std::vector<uint32_t> SortedIds(ContextIds.
begin(), ContextIds.
end());
2505 std::sort(SortedIds.begin(), SortedIds.end());
2506 for (
auto Id : SortedIds)
2507 IdString += (
" " +
Twine(Id)).str();
2509 IdString += (
" (" +
Twine(ContextIds.
size()) +
" ids)").str();
2514 static std::string getColor(uint8_t AllocTypes) {
2523 return "mediumorchid1";
2527 static std::string getNodeId(NodeRef
Node) {
2528 std::stringstream SStream;
2529 SStream << std::hex <<
"N0x" << (
unsigned long long)
Node;
2530 std::string
Result = SStream.str();
2535template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2536void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::exportToDot(
2537 std::string Label)
const {
2542template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2543typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
2544CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::moveEdgeToNewCalleeClone(
2545 const std::shared_ptr<ContextEdge> &Edge, EdgeIter *CallerEdgeI,
2547 ContextNode *
Node = Edge->Callee;
2548 NodeOwner.push_back(
2549 std::make_unique<ContextNode>(
Node->IsAllocation,
Node->Call));
2550 ContextNode *Clone = NodeOwner.back().get();
2551 Node->addClone(Clone);
2552 Clone->MatchingCalls =
Node->MatchingCalls;
2554 NodeToCallingFunc[Clone] = NodeToCallingFunc[
Node];
2555 moveEdgeToExistingCalleeClone(Edge, Clone, CallerEdgeI,
true,
2560template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2561void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
2562 moveEdgeToExistingCalleeClone(
const std::shared_ptr<ContextEdge> &Edge,
2563 ContextNode *NewCallee, EdgeIter *CallerEdgeI,
2568 assert(NewCallee->getOrigNode() == Edge->Callee->getOrigNode());
2570 ContextNode *OldCallee = Edge->Callee;
2574 auto ExistingEdgeToNewCallee = NewCallee->findEdgeFromCaller(Edge->Caller);
2578 if (ContextIdsToMove.
empty())
2579 ContextIdsToMove = Edge->getContextIds();
2583 if (Edge->getContextIds().size() == ContextIdsToMove.
size()) {
2586 *CallerEdgeI = OldCallee->CallerEdges.
erase(*CallerEdgeI);
2588 OldCallee->eraseCallerEdge(Edge.get());
2589 if (ExistingEdgeToNewCallee) {
2592 ExistingEdgeToNewCallee->getContextIds().insert(ContextIdsToMove.
begin(),
2593 ContextIdsToMove.
end());
2594 ExistingEdgeToNewCallee->AllocTypes |= Edge->AllocTypes;
2595 assert(Edge->ContextIds == ContextIdsToMove);
2596 Edge->ContextIds.clear();
2598 Edge->Caller->eraseCalleeEdge(Edge.get());
2601 Edge->Callee = NewCallee;
2602 NewCallee->CallerEdges.push_back(Edge);
2607 NewCallee->AllocTypes |= Edge->AllocTypes;
2613 auto CallerEdgeAllocType = computeAllocType(ContextIdsToMove);
2614 if (ExistingEdgeToNewCallee) {
2617 ExistingEdgeToNewCallee->getContextIds().insert(ContextIdsToMove.
begin(),
2618 ContextIdsToMove.
end());
2619 ExistingEdgeToNewCallee->AllocTypes |= CallerEdgeAllocType;
2622 auto NewEdge = std::make_shared<ContextEdge>(
2623 NewCallee, Edge->Caller, CallerEdgeAllocType, ContextIdsToMove);
2624 Edge->Caller->CalleeEdges.push_back(NewEdge);
2625 NewCallee->CallerEdges.push_back(NewEdge);
2629 NewCallee->AllocTypes |= CallerEdgeAllocType;
2631 Edge->AllocTypes = computeAllocType(Edge->ContextIds);
2636 for (
auto &OldCalleeEdge : OldCallee->CalleeEdges) {
2641 set_subtract(OldCalleeEdge->getContextIds(), EdgeContextIdsToMove);
2642 OldCalleeEdge->AllocTypes =
2643 computeAllocType(OldCalleeEdge->getContextIds());
2650 if (
auto *NewCalleeEdge =
2651 NewCallee->findEdgeFromCallee(OldCalleeEdge->Callee)) {
2652 NewCalleeEdge->getContextIds().insert(EdgeContextIdsToMove.
begin(),
2653 EdgeContextIdsToMove.
end());
2654 NewCalleeEdge->AllocTypes |= computeAllocType(EdgeContextIdsToMove);
2658 auto NewEdge = std::make_shared<ContextEdge>(
2659 OldCalleeEdge->Callee, NewCallee,
2660 computeAllocType(EdgeContextIdsToMove), EdgeContextIdsToMove);
2661 NewCallee->CalleeEdges.push_back(NewEdge);
2662 NewEdge->Callee->CallerEdges.push_back(NewEdge);
2666 OldCallee->AllocTypes = OldCallee->computeAllocType();
2669 OldCallee->emptyContextIds());
2671 checkNode<DerivedCCG, FuncTy, CallTy>(OldCallee,
false);
2672 checkNode<DerivedCCG, FuncTy, CallTy>(NewCallee,
false);
2673 for (
const auto &OldCalleeEdge : OldCallee->CalleeEdges)
2674 checkNode<DerivedCCG, FuncTy, CallTy>(OldCalleeEdge->Callee,
2676 for (
const auto &NewCalleeEdge : NewCallee->CalleeEdges)
2677 checkNode<DerivedCCG, FuncTy, CallTy>(NewCalleeEdge->Callee,
2682template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2683void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
2684 recursivelyRemoveNoneTypeCalleeEdges(
2690 removeNoneTypeCalleeEdges(
Node);
2692 for (
auto *Clone :
Node->Clones)
2693 recursivelyRemoveNoneTypeCalleeEdges(Clone, Visited);
2697 auto CallerEdges =
Node->CallerEdges;
2698 for (
auto &Edge : CallerEdges) {
2700 if (Edge->Callee ==
nullptr && Edge->Caller ==
nullptr) {
2704 recursivelyRemoveNoneTypeCalleeEdges(Edge->Caller, Visited);
2708template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2709void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones() {
2711 for (
auto &Entry : AllocationCallToContextNodeMap) {
2713 identifyClones(
Entry.second, Visited,
Entry.second->getContextIds());
2716 for (
auto &Entry : AllocationCallToContextNodeMap)
2717 recursivelyRemoveNoneTypeCalleeEdges(
Entry.second, Visited);
2730template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2731void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones(
2735 checkNode<DerivedCCG, FuncTy, CallTy>(
Node,
false);
2743 if (!
Node->hasCall())
2758 auto CallerEdges =
Node->CallerEdges;
2759 for (
auto &Edge : CallerEdges) {
2761 if (Edge->Callee ==
nullptr && Edge->Caller ==
nullptr) {
2766 if (!Visited.
count(Edge->Caller) && !Edge->Caller->CloneOf) {
2767 identifyClones(Edge->Caller, Visited, AllocContextIds);
2790 const unsigned AllocTypeCloningPriority[] = { 3, 4,
2793 std::stable_sort(
Node->CallerEdges.begin(),
Node->CallerEdges.end(),
2794 [&](
const std::shared_ptr<ContextEdge> &
A,
2795 const std::shared_ptr<ContextEdge> &
B) {
2798 if (A->ContextIds.empty())
2804 if (B->ContextIds.empty())
2807 if (A->AllocTypes == B->AllocTypes)
2810 return *A->ContextIds.begin() < *B->ContextIds.begin();
2811 return AllocTypeCloningPriority[A->AllocTypes] <
2812 AllocTypeCloningPriority[B->AllocTypes];
2822 for (
auto EI =
Node->CallerEdges.begin(); EI !=
Node->CallerEdges.end();) {
2823 auto CallerEdge = *EI;
2832 auto CallerEdgeContextsForAlloc =
2834 if (CallerEdgeContextsForAlloc.empty()) {
2838 auto CallerAllocTypeForAlloc = computeAllocType(CallerEdgeContextsForAlloc);
2842 std::vector<uint8_t> CalleeEdgeAllocTypesForCallerEdge;
2843 CalleeEdgeAllocTypesForCallerEdge.reserve(
Node->CalleeEdges.size());
2844 for (
auto &CalleeEdge :
Node->CalleeEdges)
2845 CalleeEdgeAllocTypesForCallerEdge.push_back(intersectAllocTypes(
2846 CalleeEdge->getContextIds(), CallerEdgeContextsForAlloc));
2861 if (allocTypeToUse(CallerAllocTypeForAlloc) ==
2862 allocTypeToUse(
Node->AllocTypes) &&
2863 allocTypesMatch<DerivedCCG, FuncTy, CallTy>(
2864 CalleeEdgeAllocTypesForCallerEdge,
Node->CalleeEdges)) {
2871 ContextNode *Clone =
nullptr;
2872 for (
auto *CurClone :
Node->Clones) {
2873 if (allocTypeToUse(CurClone->AllocTypes) !=
2874 allocTypeToUse(CallerAllocTypeForAlloc))
2877 if (!allocTypesMatch<DerivedCCG, FuncTy, CallTy>(
2878 CalleeEdgeAllocTypesForCallerEdge, CurClone->CalleeEdges))
2886 moveEdgeToExistingCalleeClone(CallerEdge, Clone, &EI,
false,
2887 CallerEdgeContextsForAlloc);
2890 moveEdgeToNewCalleeClone(CallerEdge, &EI, CallerEdgeContextsForAlloc);
2905 checkNode<DerivedCCG, FuncTy, CallTy>(
Node,
false);
2908void ModuleCallsiteContextGraph::updateAllocationCall(
2912 "memprof", AllocTypeString);
2913 cast<CallBase>(
Call.call())->addFnAttr(
A);
2914 OREGetter(
Call.call()->getFunction())
2916 <<
ore::NV(
"AllocationCall",
Call.call()) <<
" in clone "
2918 <<
" marked with memprof allocation attribute "
2919 <<
ore::NV(
"Attribute", AllocTypeString));
2922void IndexCallsiteContextGraph::updateAllocationCall(
CallInfo &Call,
2926 assert(AI->Versions.size() >
Call.cloneNo());
2930void ModuleCallsiteContextGraph::updateCall(
CallInfo &CallerCall,
2931 FuncInfo CalleeFunc) {
2932 if (CalleeFunc.cloneNo() > 0)
2933 cast<CallBase>(CallerCall.call())->setCalledFunction(CalleeFunc.func());
2934 OREGetter(CallerCall.call()->getFunction())
2936 <<
ore::NV(
"Call", CallerCall.call()) <<
" in clone "
2937 <<
ore::NV(
"Caller", CallerCall.call()->getFunction())
2938 <<
" assigned to call function clone "
2939 <<
ore::NV(
"Callee", CalleeFunc.func()));
2942void IndexCallsiteContextGraph::updateCall(
CallInfo &CallerCall,
2943 FuncInfo CalleeFunc) {
2944 auto *CI = CallerCall.call().dyn_cast<
CallsiteInfo *>();
2946 "Caller cannot be an allocation which should not have profiled calls");
2947 assert(CI->Clones.size() > CallerCall.cloneNo());
2948 CI->Clones[CallerCall.cloneNo()] = CalleeFunc.cloneNo();
2951CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
2953ModuleCallsiteContextGraph::cloneFunctionForCallsite(
2954 FuncInfo &Func,
CallInfo &Call, std::map<CallInfo, CallInfo> &CallMap,
2955 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
2961 NewFunc->setName(
Name);
2962 for (
auto &Inst : CallsWithMetadataInFunc) {
2964 assert(Inst.cloneNo() == 0);
2965 CallMap[Inst] = {cast<Instruction>(VMap[Inst.call()]), CloneNo};
2967 OREGetter(
Func.func())
2969 <<
"created clone " <<
ore::NV(
"NewFunction", NewFunc));
2970 return {NewFunc, CloneNo};
2974 IndexCall>::FuncInfo
2975IndexCallsiteContextGraph::cloneFunctionForCallsite(
2976 FuncInfo &Func,
CallInfo &Call, std::map<CallInfo, CallInfo> &CallMap,
2977 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
2992 for (
auto &Inst : CallsWithMetadataInFunc) {
2994 assert(Inst.cloneNo() == 0);
2995 if (
auto *AI = Inst.call().dyn_cast<
AllocInfo *>()) {
2996 assert(AI->Versions.size() == CloneNo);
2999 AI->Versions.push_back(0);
3002 assert(CI && CI->Clones.size() == CloneNo);
3005 CI->Clones.push_back(0);
3007 CallMap[Inst] = {Inst.call(), CloneNo};
3009 return {
Func.func(), CloneNo};
3043template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3044bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
3045 bool Changed =
false;
3053 auto RecordCalleeFuncOfCallsite = [&](ContextNode *
Caller,
3054 const FuncInfo &CalleeFunc) {
3056 CallsiteToCalleeFuncCloneMap[
Caller] = CalleeFunc;
3061 for (
auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) {
3062 FuncInfo OrigFunc(Func);
3066 std::map<FuncInfo, std::map<CallInfo, CallInfo>> FuncClonesToCallMap;
3067 for (
auto &Call : CallsWithMetadata) {
3068 ContextNode *
Node = getNodeForInst(Call);
3075 "Not having a call should have prevented cloning");
3079 std::map<FuncInfo, ContextNode *> FuncCloneToCurNodeCloneMap;
3083 auto AssignCallsiteCloneToFuncClone = [&](
const FuncInfo &FuncClone,
3085 ContextNode *CallsiteClone,
3088 FuncCloneToCurNodeCloneMap[FuncClone] = CallsiteClone;
3090 assert(FuncClonesToCallMap.count(FuncClone));
3091 std::map<CallInfo, CallInfo> &CallMap = FuncClonesToCallMap[FuncClone];
3093 if (CallMap.count(Call))
3094 CallClone = CallMap[
Call];
3095 CallsiteClone->setCall(CallClone);
3097 for (
auto &MatchingCall :
Node->MatchingCalls) {
3099 if (CallMap.count(MatchingCall))
3100 CallClone = CallMap[MatchingCall];
3102 MatchingCall = CallClone;
3109 std::deque<ContextNode *> ClonesWorklist;
3111 if (!
Node->emptyContextIds())
3112 ClonesWorklist.push_back(
Node);
3113 ClonesWorklist.insert(ClonesWorklist.end(),
Node->Clones.begin(),
3114 Node->Clones.end());
3119 unsigned NodeCloneCount = 0;
3120 while (!ClonesWorklist.empty()) {
3121 ContextNode *Clone = ClonesWorklist.front();
3122 ClonesWorklist.pop_front();
3125 checkNode<DerivedCCG, FuncTy, CallTy>(Clone);
3131 if (FuncClonesToCallMap.size() < NodeCloneCount) {
3133 if (NodeCloneCount == 1) {
3138 Clone->CallerEdges, [&](
const std::shared_ptr<ContextEdge> &E) {
3139 return CallsiteToCalleeFuncCloneMap.count(E->Caller);
3143 FuncClonesToCallMap[OrigFunc] = {};
3144 AssignCallsiteCloneToFuncClone(
3145 OrigFunc, Call, Clone,
3146 AllocationCallToContextNodeMap.count(Call));
3147 for (
auto &CE : Clone->CallerEdges) {
3149 if (!
CE->Caller->hasCall())
3151 RecordCalleeFuncOfCallsite(
CE->Caller, OrigFunc);
3161 FuncInfo PreviousAssignedFuncClone;
3163 Clone->CallerEdges, [&](
const std::shared_ptr<ContextEdge> &E) {
3164 return CallsiteToCalleeFuncCloneMap.count(E->Caller);
3166 bool CallerAssignedToCloneOfFunc =
false;
3167 if (EI != Clone->CallerEdges.end()) {
3168 const std::shared_ptr<ContextEdge> &Edge = *EI;
3169 PreviousAssignedFuncClone =
3170 CallsiteToCalleeFuncCloneMap[Edge->Caller];
3171 CallerAssignedToCloneOfFunc =
true;
3176 std::map<CallInfo, CallInfo> NewCallMap;
3177 unsigned CloneNo = FuncClonesToCallMap.size();
3178 assert(CloneNo > 0 &&
"Clone 0 is the original function, which "
3179 "should already exist in the map");
3180 FuncInfo NewFuncClone = cloneFunctionForCallsite(
3181 OrigFunc, Call, NewCallMap, CallsWithMetadata, CloneNo);
3182 FuncClonesToCallMap.emplace(NewFuncClone, std::move(NewCallMap));
3183 FunctionClonesAnalysis++;
3189 if (!CallerAssignedToCloneOfFunc) {
3190 AssignCallsiteCloneToFuncClone(
3191 NewFuncClone, Call, Clone,
3192 AllocationCallToContextNodeMap.count(Call));
3193 for (
auto &CE : Clone->CallerEdges) {
3195 if (!
CE->Caller->hasCall())
3197 RecordCalleeFuncOfCallsite(
CE->Caller, NewFuncClone);
3206 for (
auto CE : Clone->CallerEdges) {
3211 if (!
CE->Caller->hasCall())
3214 if (!CallsiteToCalleeFuncCloneMap.
count(
CE->Caller) ||
3218 CallsiteToCalleeFuncCloneMap[
CE->Caller] !=
3219 PreviousAssignedFuncClone)
3222 RecordCalleeFuncOfCallsite(
CE->Caller, NewFuncClone);
3232 for (
auto CalleeEdge :
CE->Caller->CalleeEdges) {
3237 ContextNode *
Callee = CalleeEdge->Callee;
3241 if (Callee == Clone || !
Callee->hasCall())
3243 ContextNode *NewClone = moveEdgeToNewCalleeClone(CalleeEdge);
3244 removeNoneTypeCalleeEdges(NewClone);
3247 removeNoneTypeCalleeEdges(Callee);
3252 if (CallsiteToCalleeFuncCloneMap.
count(Callee))
3253 RecordCalleeFuncOfCallsite(
3254 NewClone, CallsiteToCalleeFuncCloneMap[Callee]);
3263 OrigCall.setCloneNo(0);
3264 std::map<CallInfo, CallInfo> &CallMap =
3265 FuncClonesToCallMap[NewFuncClone];
3266 assert(CallMap.count(OrigCall));
3267 CallInfo NewCall(CallMap[OrigCall]);
3269 NewClone->setCall(NewCall);
3271 for (
auto &MatchingCall : NewClone->MatchingCalls) {
3272 CallInfo OrigMatchingCall(MatchingCall);
3273 OrigMatchingCall.setCloneNo(0);
3274 assert(CallMap.count(OrigMatchingCall));
3275 CallInfo NewCall(CallMap[OrigMatchingCall]);
3278 MatchingCall = NewCall;
3301 std::map<FuncInfo, ContextNode *> FuncCloneToNewCallsiteCloneMap;
3302 FuncInfo FuncCloneAssignedToCurCallsiteClone;
3305 for (
auto EI = Clone->CallerEdges.begin();
3306 EI != Clone->CallerEdges.end();) {
3309 if (!Edge->Caller->hasCall()) {
3315 if (CallsiteToCalleeFuncCloneMap.
count(Edge->Caller)) {
3316 FuncInfo FuncCloneCalledByCaller =
3317 CallsiteToCalleeFuncCloneMap[Edge->Caller];
3327 if ((FuncCloneToCurNodeCloneMap.count(FuncCloneCalledByCaller) &&
3328 FuncCloneToCurNodeCloneMap[FuncCloneCalledByCaller] !=
3336 (FuncCloneAssignedToCurCallsiteClone &&
3337 FuncCloneAssignedToCurCallsiteClone !=
3338 FuncCloneCalledByCaller)) {
3353 if (FuncCloneToNewCallsiteCloneMap.count(
3354 FuncCloneCalledByCaller)) {
3355 ContextNode *NewClone =
3356 FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller];
3357 moveEdgeToExistingCalleeClone(Edge, NewClone, &EI);
3359 removeNoneTypeCalleeEdges(NewClone);
3362 ContextNode *NewClone = moveEdgeToNewCalleeClone(Edge, &EI);
3363 removeNoneTypeCalleeEdges(NewClone);
3364 FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller] =
3367 ClonesWorklist.push_back(NewClone);
3368 assert(EI == Clone->CallerEdges.end() ||
3374 removeNoneTypeCalleeEdges(Clone);
3383 if (!FuncCloneAssignedToCurCallsiteClone) {
3384 FuncCloneAssignedToCurCallsiteClone = FuncCloneCalledByCaller;
3386 AssignCallsiteCloneToFuncClone(
3387 FuncCloneCalledByCaller, Call, Clone,
3388 AllocationCallToContextNodeMap.count(Call));
3392 assert(FuncCloneAssignedToCurCallsiteClone ==
3393 FuncCloneCalledByCaller);
3402 if (!FuncCloneAssignedToCurCallsiteClone) {
3407 for (
auto &CF : FuncClonesToCallMap) {
3408 if (!FuncCloneToCurNodeCloneMap.count(CF.first)) {
3409 FuncCloneAssignedToCurCallsiteClone = CF.first;
3413 assert(FuncCloneAssignedToCurCallsiteClone);
3415 AssignCallsiteCloneToFuncClone(
3416 FuncCloneAssignedToCurCallsiteClone, Call, Clone,
3417 AllocationCallToContextNodeMap.count(Call));
3419 assert(FuncCloneToCurNodeCloneMap
3420 [FuncCloneAssignedToCurCallsiteClone] == Clone);
3422 RecordCalleeFuncOfCallsite(Edge->Caller,
3423 FuncCloneAssignedToCurCallsiteClone);
3430 checkNode<DerivedCCG, FuncTy, CallTy>(
Node);
3431 for (
const auto &PE :
Node->CalleeEdges)
3432 checkNode<DerivedCCG, FuncTy, CallTy>(PE->Callee);
3433 for (
const auto &CE :
Node->CallerEdges)
3434 checkNode<DerivedCCG, FuncTy, CallTy>(
CE->Caller);
3435 for (
auto *Clone :
Node->Clones) {
3436 checkNode<DerivedCCG, FuncTy, CallTy>(Clone);
3437 for (
const auto &PE : Clone->CalleeEdges)
3438 checkNode<DerivedCCG, FuncTy, CallTy>(PE->Callee);
3439 for (
const auto &CE : Clone->CallerEdges)
3440 checkNode<DerivedCCG, FuncTy, CallTy>(
CE->Caller);
3446 auto UpdateCalls = [&](ContextNode *
Node,
3448 auto &&UpdateCalls) {
3453 for (
auto *Clone :
Node->Clones)
3454 UpdateCalls(Clone, Visited, UpdateCalls);
3456 for (
auto &Edge :
Node->CallerEdges)
3457 UpdateCalls(Edge->Caller, Visited, UpdateCalls);
3461 if (!
Node->hasCall() ||
Node->emptyContextIds())
3464 if (
Node->IsAllocation) {
3465 updateAllocationCall(
Node->Call, allocTypeToUse(
Node->AllocTypes));
3470 if (!CallsiteToCalleeFuncCloneMap.
count(
Node))
3473 auto CalleeFunc = CallsiteToCalleeFuncCloneMap[
Node];
3474 updateCall(
Node->Call, CalleeFunc);
3476 for (
auto &Call :
Node->MatchingCalls)
3477 updateCall(Call, CalleeFunc);
3486 for (
auto &Entry : AllocationCallToContextNodeMap)
3487 UpdateCalls(
Entry.second, Visited, UpdateCalls);
3501 FunctionsClonedThinBackend++;
3502 for (
unsigned I = 1;
I < NumClones;
I++) {
3503 VMaps.
emplace_back(std::make_unique<ValueToValueMapTy>());
3505 FunctionClonesThinBackend++;
3508 for (
auto &BB : *NewF) {
3509 for (
auto &Inst : BB) {
3510 Inst.setMetadata(LLVMContext::MD_memprof,
nullptr);
3511 Inst.setMetadata(LLVMContext::MD_callsite,
nullptr);
3515 auto *PrevF = M.getFunction(
Name);
3519 assert(PrevF->isDeclaration());
3520 NewF->takeName(PrevF);
3521 PrevF->replaceAllUsesWith(NewF);
3522 PrevF->eraseFromParent();
3524 NewF->setName(
Name);
3526 <<
"created clone " <<
ore::NV(
"NewFunction", NewF));
3529 if (!FuncToAliasMap.count(&
F))
3531 for (
auto *
A : FuncToAliasMap[&
F]) {
3533 auto *PrevA = M.getNamedAlias(
Name);
3535 A->getType()->getPointerAddressSpace(),
3536 A->getLinkage(),
Name, NewF);
3537 NewA->copyAttributesFrom(
A);
3541 assert(PrevA->isDeclaration());
3542 NewA->takeName(PrevA);
3543 PrevA->replaceAllUsesWith(NewA);
3544 PrevA->eraseFromParent();
3586bool MemProfContextDisambiguation::applyImport(
Module &M) {
3588 bool Changed =
false;
3590 auto IsMemProfClone = [](
const Function &
F) {
3597 std::map<const Function *, SmallPtrSet<const GlobalAlias *, 1>>
3599 for (
auto &
A :
M.aliases()) {
3600 auto *Aliasee =
A.getAliaseeObject();
3601 if (
auto *
F = dyn_cast<Function>(Aliasee))
3602 FuncToAliasMap[
F].insert(&
A);
3606 if (
F.isDeclaration() || IsMemProfClone(
F))
3612 bool ClonesCreated =
false;
3613 unsigned NumClonesCreated = 0;
3614 auto CloneFuncIfNeeded = [&](
unsigned NumClones) {
3624 if (ClonesCreated) {
3625 assert(NumClonesCreated == NumClones);
3632 ClonesCreated =
true;
3633 NumClonesCreated = NumClones;
3639 CloneFuncIfNeeded(
StackNode.Clones.size());
3643 assert(!IsMemProfClone(*CalledFunction));
3648 auto CalleeOrigName = CalledFunction->getName();
3649 for (
unsigned J = 0; J <
StackNode.Clones.size(); J++) {
3654 auto NewF =
M.getOrInsertFunction(
3656 CalledFunction->getFunctionType());
3662 CBClone = cast<CallBase>((*VMaps[J - 1])[CB]);
3665 <<
ore::NV(
"Call", CBClone) <<
" in clone "
3667 <<
" assigned to call function clone "
3668 <<
ore::NV(
"Callee", NewF.getCallee()));
3686 auto SrcModuleMD =
F.getMetadata(
"thinlto_src_module");
3688 "enable-import-metadata is needed to emit thinlto_src_module");
3690 dyn_cast<MDString>(SrcModuleMD->getOperand(0))->getString();
3692 if (GVS->modulePath() == SrcModule) {
3693 GVSummary = GVS.get();
3697 assert(GVSummary && GVSummary->modulePath() == SrcModule);
3702 if (isa<AliasSummary>(GVSummary))
3705 auto *
FS = cast<FunctionSummary>(GVSummary->getBaseObject());
3707 if (
FS->allocs().empty() &&
FS->callsites().empty())
3710 auto SI =
FS->callsites().begin();
3711 auto AI =
FS->allocs().begin();
3719 for (
auto CallsiteIt =
FS->callsites().rbegin();
3720 CallsiteIt !=
FS->callsites().rend(); CallsiteIt++) {
3721 auto &Callsite = *CallsiteIt;
3725 if (!Callsite.StackIdIndices.empty())
3727 MapTailCallCalleeVIToCallsite.
insert({Callsite.Callee, Callsite});
3733 for (
auto &BB :
F) {
3734 for (
auto &
I : BB) {
3735 auto *CB = dyn_cast<CallBase>(&
I);
3740 auto *CalledValue = CB->getCalledOperand();
3741 auto *CalledFunction = CB->getCalledFunction();
3742 if (CalledValue && !CalledFunction) {
3743 CalledValue = CalledValue->stripPointerCasts();
3745 CalledFunction = dyn_cast<Function>(CalledValue);
3749 if (
auto *GA = dyn_cast<GlobalAlias>(CalledValue)) {
3750 assert(!CalledFunction &&
3751 "Expected null called function in callsite for alias");
3752 CalledFunction = dyn_cast<Function>(GA->getAliaseeObject());
3756 I.getMetadata(LLVMContext::MD_callsite));
3757 auto *MemProfMD =
I.getMetadata(LLVMContext::MD_memprof);
3761 if (CB->getAttributes().hasFnAttr(
"memprof")) {
3763 CB->getAttributes().getFnAttr(
"memprof").getValueAsString() ==
"cold"
3764 ? AllocTypeColdThinBackend++
3765 : AllocTypeNotColdThinBackend++;
3766 OrigAllocsThinBackend++;
3767 AllocVersionsThinBackend++;
3768 if (!MaxAllocVersionsThinBackend)
3769 MaxAllocVersionsThinBackend = 1;
3772 I.setMetadata(LLVMContext::MD_callsite,
nullptr);
3779 auto &AllocNode = *(AI++);
3783 auto MIBIter = AllocNode.MIBs.begin();
3784 for (
auto &MDOp : MemProfMD->operands()) {
3785 assert(MIBIter != AllocNode.MIBs.end());
3787 MIBIter->StackIdIndices.begin();
3788 auto *MIBMD = cast<const MDNode>(MDOp);
3792 auto ContextIterBegin =
3796 (ContextIterBegin != StackContext.
end() &&
3797 *ContextIterBegin == 0)
3800 for (
auto ContextIter = ContextIterBegin;
3801 ContextIter != StackContext.
end(); ++ContextIter) {
3805 if (LastStackContextId == *ContextIter)
3807 LastStackContextId = *ContextIter;
3808 assert(StackIdIndexIter != MIBIter->StackIdIndices.end());
3817 CloneFuncIfNeeded(AllocNode.Versions.size());
3819 OrigAllocsThinBackend++;
3820 AllocVersionsThinBackend += AllocNode.Versions.size();
3821 if (MaxAllocVersionsThinBackend < AllocNode.Versions.size())
3822 MaxAllocVersionsThinBackend = AllocNode.Versions.size();
3829 if (AllocNode.Versions.size() == 1) {
3834 UnclonableAllocsThinBackend++;
3840 return Type == ((uint8_t)AllocationType::NotCold |
3841 (uint8_t)AllocationType::Cold);
3845 for (
unsigned J = 0; J < AllocNode.Versions.size(); J++) {
3851 : AllocTypeNotColdThinBackend++;
3863 CBClone = cast<CallBase>((*VMaps[J - 1])[CB]);
3866 <<
ore::NV(
"AllocationCall", CBClone) <<
" in clone "
3868 <<
" marked with memprof allocation attribute "
3869 <<
ore::NV(
"Attribute", AllocTypeString));
3871 }
else if (!CallsiteContext.empty()) {
3873 assert(SI !=
FS->callsites().end());
3879 auto StackIdIndexIter =
StackNode.StackIdIndices.begin();
3880 for (
auto StackId : CallsiteContext) {
3888 CloneCallsite(
StackNode, CB, CalledFunction);
3889 }
else if (CB->isTailCall()) {
3894 if (CalleeVI && MapTailCallCalleeVIToCallsite.
count(CalleeVI)) {
3895 auto Callsite = MapTailCallCalleeVIToCallsite.
find(CalleeVI);
3896 assert(Callsite != MapTailCallCalleeVIToCallsite.
end());
3897 CloneCallsite(Callsite->second, CB, CalledFunction);
3901 I.setMetadata(LLVMContext::MD_memprof,
nullptr);
3902 I.setMetadata(LLVMContext::MD_callsite,
nullptr);
3910template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3911bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::process() {
3913 dbgs() <<
"CCG before cloning:\n";
3917 exportToDot(
"postbuild");
3930 dbgs() <<
"CCG after cloning:\n";
3934 exportToDot(
"cloned");
3936 bool Changed = assignFunctions();
3939 dbgs() <<
"CCG after assigning function clones:\n";
3943 exportToDot(
"clonefuncassign");
3946 printTotalSizes(
errs());
3951bool MemProfContextDisambiguation::processModule(
3958 return applyImport(M);
3971 ModuleCallsiteContextGraph CCG(M, OREGetter);
3972 return CCG.process();
3977 : ImportSummary(Summary) {
3978 if (ImportSummary) {
3988 auto ReadSummaryFile =
3990 if (!ReadSummaryFile) {
3997 if (!ImportSummaryForTestingOrErr) {
4003 ImportSummaryForTesting = std::move(*ImportSummaryForTestingOrErr);
4004 ImportSummary = ImportSummaryForTesting.get();
4013 if (!processModule(M, OREGetter))
4030 IndexCallsiteContextGraph CCG(
Index, isPrevailing);
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
#define LLVM_ATTRIBUTE_UNUSED
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
This file implements a map that provides insertion order iteration.
static SmallVector< std::unique_ptr< ValueToValueMapTy >, 4 > createFunctionClones(Function &F, unsigned NumClones, Module &M, OptimizationRemarkEmitter &ORE, std::map< const Function *, SmallPtrSet< const GlobalAlias *, 1 > > &FuncToAliasMap)
static cl::opt< unsigned > TailCallSearchDepth("memprof-tail-call-search-depth", cl::init(5), cl::Hidden, cl::desc("Max depth to recursively search for missing " "frames through tail calls."))
static cl::opt< bool > ExportToDot("memprof-export-to-dot", cl::init(false), cl::Hidden, cl::desc("Export graph to dot files."))
static void checkEdge(const std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > &Edge)
bool checkColdOrNotCold(uint8_t AllocType)
static ValueInfo findValueInfoForFunc(const Function &F, const Module &M, const ModuleSummaryIndex *ImportSummary)
static std::string getAllocTypeString(uint8_t AllocTypes)
cl::opt< bool > MemProfReportHintedSizes
static cl::opt< bool > VerifyCCG("memprof-verify-ccg", cl::init(false), cl::Hidden, cl::desc("Perform verification checks on CallingContextGraph."))
static void checkNode(const ContextNode< DerivedCCG, FuncTy, CallTy > *Node, bool CheckEdges=true)
static std::string getMemProfFuncName(Twine Base, unsigned CloneNo)
static cl::opt< std::string > MemProfImportSummary("memprof-import-summary", cl::desc("Import summary to use for testing the ThinLTO backend via opt"), cl::Hidden)
static const std::string MemProfCloneSuffix
static cl::opt< std::string > DotFilePathPrefix("memprof-dot-file-path-prefix", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path prefix of the MemProf dot files."))
static cl::opt< bool > VerifyNodes("memprof-verify-nodes", cl::init(false), cl::Hidden, cl::desc("Perform frequent verification checks on nodes."))
static cl::opt< bool > DumpCCG("memprof-dump-ccg", cl::init(false), cl::Hidden, cl::desc("Dump CallingContextGraph to stdout after each stage."))
cl::opt< bool > MemProfReportHintedSizes("memprof-report-hinted-sizes", cl::init(false), cl::Hidden, cl::desc("Report total allocation sizes of hinted allocations"))
This is the interface to build a ModuleSummaryIndex for a module.
ModuleSummaryIndex.h This file contains the declarations the classes that hold the module index and s...
Module.h This file contains the declarations for the Module class.
FunctionAnalysisManager FAM
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines generic set operations that may be used on set's of different types,...
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
void print(OutputBuffer &OB) const
DomTreeNode::const_iterator end() const
Alias summary information.
ValueInfo getAliaseeVI() const
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
static Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
void addFnAttr(Attribute::AttrKind Kind)
Adds the attribute to the function.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
iterator find(const_arg_type_t< KeyT > Val)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
void reserve(size_type NumEntries)
Grow the densemap so that it can contain at least NumEntries items before resizing again.
Implements a dense probed hash-table based set.
Function summary information to aid decisions and implementation of importing.
static GlobalAlias * create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent)
If a parent module is specified, the alias is automatically inserted into the end of the specified mo...
Function and variable summary information to aid decisions and implementation of importing.
static bool isLocalLinkage(LinkageTypes Linkage)
GUID getGUID() const
Return a 64-bit global unique ID constructed from global value name (i.e.
std::string getGlobalIdentifier() const
Return the modified name for this global value suitable to be used as the key for a global lookup (e....
@ InternalLinkage
Rename collisions when linking (static functions).
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
const Function * getFunction() const
Return the function this instruction belongs to.
This class implements a map that also provides access to all stored values in a deterministic order.
size_type count(const KeyT &Key) const
std::pair< KeyT, ValueT > & back()
MemProfContextDisambiguation(const ModuleSummaryIndex *Summary=nullptr)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
Class to hold module path string table and global value map, and encapsulate methods for operating on...
static StringRef getOriginalNameBeforePromote(StringRef Name)
Helper to obtain the unpromoted name for a global value (or the original name if not promoted).
ValueInfo getValueInfo(const GlobalValueSummaryMapTy::value_type &R) const
Return a ValueInfo for the index value_type (convenient when iterating index).
uint64_t getStackIdAtIndex(unsigned Index) const
GlobalValueSummary * findSummaryInModule(ValueInfo VI, StringRef ModuleId) const
Find the summary for ValueInfo VI in module ModuleId, or nullptr if not found.
GlobalValue::GUID getGUIDFromOriginalID(GlobalValue::GUID OriginalID) const
Return the GUID for OriginalId in the OidGuidMap.
A Module instance is used to store all the information related to an LLVM module.
A discriminated union of two or more pointer types, with the discriminator in the low bit of the poin...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM Value Representation.
StringRef getName() const
Return a constant reference to the value's name.
std::pair< iterator, bool > insert(const ValueT &V)
void reserve(size_t Size)
Grow the DenseSet so that it can contain at least NumEntries items before resizing again.
void swap(DenseSetImpl &RHS)
bool erase(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
An efficient, type-erasing, non-owning reference to a callable.
Helper class to iterate through stack ids in both metadata (memprof MIB and callsite) and the corresp...
CallStackIterator end() const
CallStackIterator beginAfterSharedPrefix(CallStack &Other)
This class implements an extremely fast bulk output stream that can only output to a stream.
StringRef AttributeString(unsigned Attribute)
@ C
The default llvm calling convention, compatible with C.
@ CE
Windows NT (Windows on ARM)
initializer< Ty > init(const Ty &Val)
AllocationType getMIBAllocType(const MDNode *MIB)
Returns the allocation type from an MIB metadata node.
uint64_t getMIBTotalSize(const MDNode *MIB)
Returns the total size from an MIB metadata node, or 0 if it was not recorded.
bool hasSingleAllocType(uint8_t AllocTypes)
True if the AllocTypes bitmask contains just a single type.
std::string getAllocTypeAttributeString(AllocationType Type)
Returns the string to use in attributes with the given type.
MDNode * getMIBStackNode(const MDNode *MIB)
Returns the stack node from an MIB metadata node.
NodeAddr< FuncNode * > Func
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
void logAllUnhandledErrors(Error E, raw_ostream &OS, Twine ErrorBanner={})
Log all errors (if any) in E to OS.
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
bool set_is_subset(const S1Ty &S1, const S2Ty &S2)
set_is_subset(A, B) - Return true iff A in B
void set_subtract(S1Ty &S1, const S2Ty &S2)
set_subtract(A, B) - Compute A := A - B
raw_ostream & WriteGraph(raw_ostream &O, const GraphType &G, bool ShortNames=false, const Twine &Title="")
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr)
Expected< std::unique_ptr< ModuleSummaryIndex > > getModuleSummaryIndex(MemoryBufferRef Buffer)
Parse the specified bitcode buffer, returning the module summary index.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
bool set_union(S1Ty &S1, const S2Ty &S2)
set_union(A, B) - Compute A := A u B, return whether A changed.
cl::opt< bool > SupportsHotColdNew
Indicate we are linking with an allocator that supports hot/cold operator new interfaces.
S1Ty set_intersection(const S1Ty &S1, const S2Ty &S2)
set_intersection(A, B) - Return A ^ B
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
cl::opt< bool > EnableMemProfContextDisambiguation
Enable MemProf context disambiguation for thin link.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Expected< T > errorOrToExpected(ErrorOr< T > &&EO)
Convert an ErrorOr<T> to an Expected<T>.
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Function * CloneFunction(Function *F, ValueToValueMapTy &VMap, ClonedCodeInfo *CodeInfo=nullptr)
Return a copy of the specified function and add it to that function's module.
bool mayHaveMemprofSummary(const CallBase *CB)
Returns true if the instruction could have memprof metadata, used to ensure consistency between summa...
Implement std::hash so that hash_code can be used in STL containers.
DOTGraphTraits(bool IsSimple=false)
typename GTraits::NodeRef NodeRef
static std::string getEdgeAttributes(NodeRef, ChildIteratorType ChildIter, GraphType)
const CallsiteContextGraph< DerivedCCG, FuncTy, CallTy > * GraphType
typename GTraits::ChildIteratorType ChildIteratorType
static bool isNodeHidden(NodeRef Node, GraphType)
static std::string getNodeLabel(NodeRef Node, GraphType G)
static std::string getNodeAttributes(NodeRef Node, GraphType)
static NodeRef getNode(const NodePtrTy &P)
static const ContextNode< DerivedCCG, FuncTy, CallTy > * GetCallee(const EdgePtrTy &P)
static ChildIteratorType child_end(NodeRef N)
std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > EdgePtrTy
const CallsiteContextGraph< DerivedCCG, FuncTy, CallTy > * GraphType
const ContextNode< DerivedCCG, FuncTy, CallTy > * NodeRef
static ChildIteratorType child_begin(NodeRef N)
std::unique_ptr< ContextNode< DerivedCCG, FuncTy, CallTy > > NodePtrTy
static NodeRef getEntryNode(GraphType G)
static nodes_iterator nodes_begin(GraphType G)
static nodes_iterator nodes_end(GraphType G)
Summary of memprof metadata on allocations.
SmallVector< uint8_t > Versions
Summary of memprof callsite metadata.
SmallVector< unsigned > Clones
DOTGraphTraits - Template class that can be specialized to customize how graphs are converted to 'dot...
DefaultDOTGraphTraits - This class provides the default implementations of all of the DOTGraphTraits ...
An information struct used to provide DenseMap with the various necessary components for a given valu...
Used in the streaming interface as the general argument type.
typename GraphType::UnknownGraphTypeError NodeRef
Struct that holds a reference to a particular GUID in a global value summary.
ArrayRef< std::unique_ptr< GlobalValueSummary > > getSummaryList() const
GlobalValue::GUID getGUID() const