49#include <unordered_map>
54#define DEBUG_TYPE "memprof-context-disambiguation"
57 "Number of function clones created during whole program analysis");
59 "Number of function clones created during ThinLTO backend");
61 "Number of functions that had clones created during ThinLTO backend");
62STATISTIC(AllocTypeNotCold,
"Number of not cold static allocations (possibly "
63 "cloned) during whole program analysis");
64STATISTIC(AllocTypeCold,
"Number of cold static allocations (possibly cloned) "
65 "during whole program analysis");
67 "Number of not cold static allocations (possibly cloned) during "
69STATISTIC(AllocTypeColdThinBackend,
"Number of cold static allocations "
70 "(possibly cloned) during ThinLTO backend");
72 "Number of original (not cloned) allocations with memprof profiles "
73 "during ThinLTO backend");
75 AllocVersionsThinBackend,
76 "Number of allocation versions (including clones) during ThinLTO backend");
78 "Maximum number of allocation versions created for an original "
79 "allocation during ThinLTO backend");
81 "Number of unclonable ambigous allocations during ThinLTO backend");
83 "Number of edges removed due to mismatched callees (profiled vs IR)");
85 "Number of profiled callees found via tail calls");
87 "Aggregate depth of profiled callees found via tail calls");
89 "Maximum depth of profiled callees found via tail calls");
91 "Number of profiled callees found via multiple tail call chains");
96 cl::desc(
"Specify the path prefix of the MemProf dot files."));
100 cl::desc(
"Export graph to dot files."));
104 cl::desc(
"Dump CallingContextGraph to stdout after each stage."));
108 cl::desc(
"Perform verification checks on CallingContextGraph."));
112 cl::desc(
"Perform frequent verification checks on nodes."));
115 "memprof-import-summary",
116 cl::desc(
"Import summary to use for testing the ThinLTO backend via opt"),
122 cl::desc(
"Max depth to recursively search for missing "
123 "frames through tail calls."));
134 cl::desc(
"Linking with hot/cold operator new interfaces"));
154template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
155class CallsiteContextGraph {
157 CallsiteContextGraph() =
default;
158 CallsiteContextGraph(
const CallsiteContextGraph &) =
default;
159 CallsiteContextGraph(CallsiteContextGraph &&) =
default;
166 void identifyClones();
173 bool assignFunctions();
180 const CallsiteContextGraph &CCG) {
186 const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>;
188 const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>;
190 void exportToDot(std::string Label)
const;
193 struct FuncInfo final
194 :
public std::pair<FuncTy *, unsigned > {
195 using Base = std::pair<FuncTy *, unsigned>;
196 FuncInfo(
const Base &
B) :
Base(
B) {}
197 FuncInfo(FuncTy *
F =
nullptr,
unsigned CloneNo = 0) :
Base(
F, CloneNo) {}
198 explicit operator bool()
const {
return this->first !=
nullptr; }
199 FuncTy *func()
const {
return this->first; }
200 unsigned cloneNo()
const {
return this->second; }
204 struct CallInfo final :
public std::pair<CallTy, unsigned > {
205 using Base = std::pair<CallTy, unsigned>;
207 CallInfo(CallTy Call =
nullptr,
unsigned CloneNo = 0)
209 explicit operator bool()
const {
return (
bool)this->first; }
210 CallTy call()
const {
return this->first; }
211 unsigned cloneNo()
const {
return this->second; }
212 void setCloneNo(
unsigned N) { this->second =
N; }
214 if (!
operator bool()) {
220 OS <<
"\t(clone " << cloneNo() <<
")";
243 bool Recursive =
false;
259 uint8_t AllocTypes = 0;
263 std::vector<std::shared_ptr<ContextEdge>> CalleeEdges;
267 std::vector<std::shared_ptr<ContextEdge>> CallerEdges;
271 const std::vector<std::shared_ptr<ContextEdge>> *
272 getEdgesWithAllocInfo()
const {
275 if (!CalleeEdges.empty())
277 if (!CallerEdges.empty()) {
290 auto *Edges = getEdgesWithAllocInfo();
294 for (
auto &Edge : *Edges)
295 Count += Edge->getContextIds().size();
297 for (
auto &Edge : *Edges)
298 ContextIds.
insert(Edge->getContextIds().begin(),
299 Edge->getContextIds().end());
305 uint8_t computeAllocType()
const {
306 auto *Edges = getEdgesWithAllocInfo();
308 return (uint8_t)AllocationType::None;
310 (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
311 uint8_t
AllocType = (uint8_t)AllocationType::None;
312 for (
auto &Edge : *Edges) {
323 bool emptyContextIds()
const {
324 auto *Edges = getEdgesWithAllocInfo();
327 for (
auto &Edge : *Edges) {
328 if (!Edge->getContextIds().empty())
335 std::vector<ContextNode *> Clones;
338 ContextNode *CloneOf =
nullptr;
340 ContextNode(
bool IsAllocation) : IsAllocation(IsAllocation),
Call() {}
342 ContextNode(
bool IsAllocation,
CallInfo C)
343 : IsAllocation(IsAllocation),
Call(
C) {}
345 void addClone(ContextNode *Clone) {
347 CloneOf->Clones.push_back(Clone);
348 Clone->CloneOf = CloneOf;
350 Clones.push_back(Clone);
352 Clone->CloneOf =
this;
356 ContextNode *getOrigNode() {
363 unsigned int ContextId);
365 ContextEdge *findEdgeFromCallee(
const ContextNode *Callee);
366 ContextEdge *findEdgeFromCaller(
const ContextNode *Caller);
367 void eraseCalleeEdge(
const ContextEdge *Edge);
368 void eraseCallerEdge(
const ContextEdge *Edge);
372 bool hasCall()
const {
return (
bool)
Call.call(); }
378 bool isRemoved()
const {
379 assert((AllocTypes == (uint8_t)AllocationType::None) ==
381 return AllocTypes == (uint8_t)AllocationType::None;
401 uint8_t AllocTypes = 0;
406 ContextEdge(ContextNode *Callee, ContextNode *Caller, uint8_t
AllocType,
409 ContextIds(
std::
move(ContextIds)) {}
424 void removeNoneTypeCalleeEdges(ContextNode *
Node);
426 recursivelyRemoveNoneTypeCalleeEdges(ContextNode *
Node,
432 template <
class NodeT,
class IteratorT>
433 std::vector<uint64_t>
438 ContextNode *addAllocNode(
CallInfo Call,
const FuncTy *
F);
441 template <
class NodeT,
class IteratorT>
442 void addStackNodesForMIB(ContextNode *AllocNode,
450 void updateStackNodes();
454 void handleCallsitesWithMultipleTargets();
461 std::map<const ContextNode *, const FuncTy *> NodeToCallingFunc;
464 using EdgeIter =
typename std::vector<std::shared_ptr<ContextEdge>>::iterator;
466 using CallContextInfo = std::tuple<CallTy, std::vector<uint64_t>,
475 void assignStackNodesPostOrder(
487 void propagateDuplicateContextIds(
493 void connectNewNode(ContextNode *NewNode, ContextNode *OrigNode,
501 return static_cast<const DerivedCCG *
>(
this)->getStackId(IdOrIndex);
511 calleesMatch(CallTy Call, EdgeIter &EI,
517 bool calleeMatchesFunc(
518 CallTy Call,
const FuncTy *Func,
const FuncTy *CallerFunc,
519 std::vector<std::pair<CallTy, FuncTy *>> &FoundCalleeChain) {
520 return static_cast<DerivedCCG *
>(
this)->calleeMatchesFunc(
521 Call, Func, CallerFunc, FoundCalleeChain);
526 std::vector<uint64_t> getStackIdsWithContextNodesForCall(CallTy Call) {
527 return static_cast<DerivedCCG *
>(
this)->getStackIdsWithContextNodesForCall(
532 uint64_t getLastStackId(CallTy Call) {
533 return static_cast<DerivedCCG *
>(
this)->getLastStackId(Call);
538 AllocType == AllocationType::Cold ? AllocTypeCold++ : AllocTypeNotCold++;
539 static_cast<DerivedCCG *
>(
this)->updateAllocationCall(Call,
AllocType);
544 void updateCall(
CallInfo &CallerCall, FuncInfo CalleeFunc) {
545 static_cast<DerivedCCG *
>(
this)->updateCall(CallerCall, CalleeFunc);
551 FuncInfo cloneFunctionForCallsite(
552 FuncInfo &Func,
CallInfo &Call, std::map<CallInfo, CallInfo> &CallMap,
553 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
554 return static_cast<DerivedCCG *
>(
this)->cloneFunctionForCallsite(
555 Func, Call, CallMap, CallsWithMetadataInFunc, CloneNo);
560 std::string getLabel(
const FuncTy *Func,
const CallTy Call,
561 unsigned CloneNo)
const {
562 return static_cast<const DerivedCCG *
>(
this)->getLabel(Func, Call, CloneNo);
566 ContextNode *getNodeForInst(
const CallInfo &
C);
567 ContextNode *getNodeForAlloc(
const CallInfo &
C);
568 ContextNode *getNodeForStackId(
uint64_t StackId);
591 moveEdgeToNewCalleeClone(
const std::shared_ptr<ContextEdge> &Edge,
592 EdgeIter *CallerEdgeI =
nullptr,
600 void moveEdgeToExistingCalleeClone(
const std::shared_ptr<ContextEdge> &Edge,
601 ContextNode *NewCallee,
602 EdgeIter *CallerEdgeI =
nullptr,
603 bool NewClone =
false,
631 std::vector<std::unique_ptr<ContextNode>> NodeOwner;
637 unsigned int LastContextId = 0;
640template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
642 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode;
643template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
645 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge;
646template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
648 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::FuncInfo;
649template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
651 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::CallInfo;
654class ModuleCallsiteContextGraph
655 :
public CallsiteContextGraph<ModuleCallsiteContextGraph, Function,
658 ModuleCallsiteContextGraph(
663 friend CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
667 bool calleeMatchesFunc(
669 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain);
670 bool findProfiledCalleeThroughTailCalls(
672 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain,
673 bool &FoundMultipleCalleeChains);
675 std::vector<uint64_t> getStackIdsWithContextNodesForCall(
Instruction *Call);
677 void updateCall(
CallInfo &CallerCall, FuncInfo CalleeFunc);
678 CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
680 cloneFunctionForCallsite(FuncInfo &Func,
CallInfo &Call,
681 std::map<CallInfo, CallInfo> &CallMap,
682 std::vector<CallInfo> &CallsWithMetadataInFunc,
685 unsigned CloneNo)
const;
694struct IndexCall :
public PointerUnion<CallsiteInfo *, AllocInfo *> {
696 IndexCall(std::nullptr_t) : IndexCall() {}
701 IndexCall *operator->() {
return this; }
706 if (
auto *AI = llvm::dyn_cast_if_present<AllocInfo *>(getBase())) {
709 auto *CI = llvm::dyn_cast_if_present<CallsiteInfo *>(getBase());
717class IndexCallsiteContextGraph
718 :
public CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
721 IndexCallsiteContextGraph(
726 ~IndexCallsiteContextGraph() {
731 for (
auto &
I : FunctionCalleesToSynthesizedCallsiteInfos) {
733 for (
auto &Callsite :
I.second)
734 FS->addCallsite(*Callsite.second);
739 friend CallsiteContextGraph<IndexCallsiteContextGraph,
FunctionSummary,
743 bool calleeMatchesFunc(
746 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain);
747 bool findProfiledCalleeThroughTailCalls(
749 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain,
750 bool &FoundMultipleCalleeChains);
751 uint64_t getLastStackId(IndexCall &Call);
752 std::vector<uint64_t> getStackIdsWithContextNodesForCall(IndexCall &Call);
754 void updateCall(
CallInfo &CallerCall, FuncInfo CalleeFunc);
757 cloneFunctionForCallsite(FuncInfo &Func,
CallInfo &Call,
758 std::map<CallInfo, CallInfo> &CallMap,
759 std::vector<CallInfo> &CallsWithMetadataInFunc,
761 std::string getLabel(
const FunctionSummary *Func,
const IndexCall &Call,
762 unsigned CloneNo)
const;
766 std::map<const FunctionSummary *, ValueInfo> FSToVIMap;
777 std::map<ValueInfo, std::unique_ptr<CallsiteInfo>>>
778 FunctionCalleesToSynthesizedCallsiteInfos;
790 :
public DenseMapInfo<std::pair<IndexCall, unsigned>> {};
793 :
public DenseMapInfo<PointerUnion<CallsiteInfo *, AllocInfo *>> {};
798struct FieldSeparator {
802 FieldSeparator(
const char *Sep =
", ") : Sep(Sep) {}
819 assert(AllocTypes != (uint8_t)AllocationType::None);
821 ((uint8_t)AllocationType::NotCold | (uint8_t)AllocationType::Cold))
822 return AllocationType::NotCold;
830template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
832 const std::vector<uint8_t> &InAllocTypes,
833 const std::vector<std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>>
836 InAllocTypes.begin(), InAllocTypes.end(), Edges.begin(),
838 const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &r) {
842 if (l == (uint8_t)AllocationType::None ||
843 r->AllocTypes == (uint8_t)AllocationType::None)
845 return allocTypeToUse(l) == allocTypeToUse(r->AllocTypes);
851template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
852typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
853CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForInst(
855 ContextNode *
Node = getNodeForAlloc(
C);
859 return NonAllocationCallToContextNodeMap.lookup(
C);
862template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
863typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
864CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForAlloc(
866 return AllocationCallToContextNodeMap.lookup(
C);
869template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
870typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
871CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForStackId(
873 auto StackEntryNode = StackEntryIdToContextNodeMap.find(StackId);
874 if (StackEntryNode != StackEntryIdToContextNodeMap.end())
875 return StackEntryNode->second;
879template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
880void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
882 unsigned int ContextId) {
883 for (
auto &Edge : CallerEdges) {
884 if (Edge->Caller == Caller) {
886 Edge->getContextIds().insert(ContextId);
890 std::shared_ptr<ContextEdge> Edge = std::make_shared<ContextEdge>(
892 CallerEdges.push_back(Edge);
893 Caller->CalleeEdges.push_back(Edge);
896template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
897void CallsiteContextGraph<
898 DerivedCCG, FuncTy, CallTy>::removeNoneTypeCalleeEdges(ContextNode *
Node) {
899 for (
auto EI =
Node->CalleeEdges.begin(); EI !=
Node->CalleeEdges.end();) {
901 if (Edge->AllocTypes == (uint8_t)AllocationType::None) {
902 assert(Edge->ContextIds.empty());
903 Edge->Callee->eraseCallerEdge(Edge.get());
904 EI =
Node->CalleeEdges.erase(EI);
910template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
911typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge *
912CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
913 findEdgeFromCallee(
const ContextNode *Callee) {
914 for (
const auto &Edge : CalleeEdges)
915 if (Edge->Callee == Callee)
920template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
921typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge *
922CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
923 findEdgeFromCaller(
const ContextNode *Caller) {
924 for (
const auto &Edge : CallerEdges)
925 if (Edge->Caller == Caller)
930template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
931void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
932 eraseCalleeEdge(
const ContextEdge *Edge) {
934 CalleeEdges, [Edge](
const std::shared_ptr<ContextEdge> &CalleeEdge) {
935 return CalleeEdge.get() == Edge;
937 assert(EI != CalleeEdges.end());
938 CalleeEdges.erase(EI);
941template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
942void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
943 eraseCallerEdge(
const ContextEdge *Edge) {
945 CallerEdges, [Edge](
const std::shared_ptr<ContextEdge> &CallerEdge) {
946 return CallerEdge.get() == Edge;
948 assert(EI != CallerEdges.end());
949 CallerEdges.erase(EI);
952template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
953uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::computeAllocType(
956 (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
957 uint8_t
AllocType = (uint8_t)AllocationType::None;
958 for (
auto Id : ContextIds) {
959 AllocType |= (uint8_t)ContextIdToAllocationType[Id];
967template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
969CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypesImpl(
972 (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
973 uint8_t
AllocType = (uint8_t)AllocationType::None;
974 for (
auto Id : Node1Ids) {
975 if (!Node2Ids.
count(Id))
977 AllocType |= (uint8_t)ContextIdToAllocationType[Id];
985template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
986uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypes(
988 if (Node1Ids.
size() < Node2Ids.
size())
989 return intersectAllocTypesImpl(Node1Ids, Node2Ids);
991 return intersectAllocTypesImpl(Node2Ids, Node1Ids);
994template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
995typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
996CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addAllocNode(
998 assert(!getNodeForAlloc(Call));
1000 std::make_unique<ContextNode>(
true, Call));
1001 ContextNode *AllocNode = NodeOwner.back().get();
1002 AllocationCallToContextNodeMap[
Call] = AllocNode;
1003 NodeToCallingFunc[AllocNode] =
F;
1005 AllocNode->OrigStackOrAllocId = LastContextId;
1008 AllocNode->AllocTypes = (uint8_t)AllocationType::None;
1017 if (AllocTypes & (uint8_t)AllocationType::NotCold)
1019 if (AllocTypes & (uint8_t)AllocationType::Cold)
1024template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1025template <
class NodeT,
class IteratorT>
1026void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addStackNodesForMIB(
1036 ContextIdToAllocationType[++LastContextId] =
AllocType;
1040 ContextIdToTotalSize[LastContextId] = TotalSize;
1044 AllocNode->AllocTypes |= (uint8_t)
AllocType;
1049 ContextNode *PrevNode = AllocNode;
1056 ContextIter != StackContext.
end(); ++ContextIter) {
1057 auto StackId = getStackId(*ContextIter);
1058 ContextNode *
StackNode = getNodeForStackId(StackId);
1060 NodeOwner.push_back(
1061 std::make_unique<ContextNode>(
false));
1063 StackEntryIdToContextNodeMap[StackId] =
StackNode;
1064 StackNode->OrigStackOrAllocId = StackId;
1075template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1077CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::duplicateContextIds(
1081 for (
auto OldId : StackSequenceContextIds) {
1082 NewContextIds.
insert(++LastContextId);
1083 OldToNewContextIds[OldId].insert(LastContextId);
1084 assert(ContextIdToAllocationType.count(OldId));
1086 ContextIdToAllocationType[LastContextId] = ContextIdToAllocationType[OldId];
1090 ContextIdToTotalSize[LastContextId] = 0;
1092 return NewContextIds;
1095template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1096void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
1097 propagateDuplicateContextIds(
1102 for (
auto Id : ContextIds)
1103 if (
auto NewId = OldToNewContextIds.find(Id);
1104 NewId != OldToNewContextIds.end())
1105 NewIds.
insert(NewId->second.begin(), NewId->second.end());
1110 auto UpdateCallers = [&](ContextNode *
Node,
1112 auto &&UpdateCallers) ->
void {
1113 for (
const auto &Edge :
Node->CallerEdges) {
1114 auto Inserted = Visited.insert(Edge.get());
1117 ContextNode *NextNode = Edge->Caller;
1121 if (!NewIdsToAdd.
empty()) {
1122 Edge->getContextIds().insert(NewIdsToAdd.
begin(), NewIdsToAdd.
end());
1123 UpdateCallers(NextNode, Visited, UpdateCallers);
1129 for (
auto &Entry : AllocationCallToContextNodeMap) {
1131 UpdateCallers(
Node, Visited, UpdateCallers);
1135template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1136void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::connectNewNode(
1137 ContextNode *NewNode, ContextNode *OrigNode,
bool TowardsCallee,
1142 TowardsCallee ? OrigNode->CalleeEdges : OrigNode->CallerEdges;
1144 for (
auto EI = OrigEdges.begin(); EI != OrigEdges.end();) {
1150 set_subtract(Edge->getContextIds(), RemainingContextIds, NewEdgeContextIds,
1151 NotFoundContextIds);
1152 RemainingContextIds.
swap(NotFoundContextIds);
1154 if (NewEdgeContextIds.
empty()) {
1158 if (TowardsCallee) {
1159 uint8_t NewAllocType = computeAllocType(NewEdgeContextIds);
1160 auto NewEdge = std::make_shared<ContextEdge>(
1161 Edge->Callee, NewNode, NewAllocType, std::move(NewEdgeContextIds));
1162 NewNode->CalleeEdges.push_back(NewEdge);
1163 NewEdge->Callee->CallerEdges.push_back(NewEdge);
1165 uint8_t NewAllocType = computeAllocType(NewEdgeContextIds);
1166 auto NewEdge = std::make_shared<ContextEdge>(
1167 NewNode, Edge->Caller, NewAllocType, std::move(NewEdgeContextIds));
1168 NewNode->CallerEdges.push_back(NewEdge);
1169 NewEdge->Caller->CalleeEdges.push_back(NewEdge);
1172 if (Edge->getContextIds().empty()) {
1173 if (TowardsCallee) {
1174 Edge->Callee->eraseCallerEdge(Edge.get());
1175 EI = OrigNode->CalleeEdges.erase(EI);
1177 Edge->Caller->eraseCalleeEdge(Edge.get());
1178 EI = OrigNode->CallerEdges.erase(EI);
1186template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1188 const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &Edge) {
1191 assert(Edge->AllocTypes != (uint8_t)AllocationType::None);
1192 assert(!Edge->ContextIds.empty());
1195template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1197 bool CheckEdges =
true) {
1198 if (
Node->isRemoved())
1202 auto NodeContextIds =
Node->getContextIds();
1206 if (
Node->CallerEdges.size()) {
1208 Node->CallerEdges.front()->ContextIds);
1211 checkEdge<DerivedCCG, FuncTy, CallTy>(Edge);
1212 set_union(CallerEdgeContextIds, Edge->ContextIds);
1216 assert(NodeContextIds == CallerEdgeContextIds ||
1219 if (
Node->CalleeEdges.size()) {
1221 Node->CalleeEdges.front()->ContextIds);
1224 checkEdge<DerivedCCG, FuncTy, CallTy>(Edge);
1225 set_union(CalleeEdgeContextIds, Edge->getContextIds());
1227 assert(NodeContextIds == CalleeEdgeContextIds);
1231template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1232void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
1233 assignStackNodesPostOrder(ContextNode *
Node,
1236 &StackIdToMatchingCalls) {
1244 auto CallerEdges =
Node->CallerEdges;
1245 for (
auto &Edge : CallerEdges) {
1249 assignStackNodesPostOrder(Edge->Caller, Visited, StackIdToMatchingCalls);
1258 if (
Node->IsAllocation ||
1259 !StackIdToMatchingCalls.count(
Node->OrigStackOrAllocId))
1262 auto &Calls = StackIdToMatchingCalls[
Node->OrigStackOrAllocId];
1266 if (Calls.size() == 1) {
1267 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[0];
1268 if (Ids.size() == 1) {
1269 assert(SavedContextIds.empty());
1272 if (
Node->Recursive)
1274 Node->setCall(Call);
1275 NonAllocationCallToContextNodeMap[
Call] =
Node;
1284 ContextNode *LastNode = getNodeForStackId(LastId);
1288 for (
unsigned I = 0;
I < Calls.size();
I++) {
1289 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[
I];
1292 if (SavedContextIds.empty())
1295 assert(LastId == Ids.back());
1297 ContextNode *FirstNode = getNodeForStackId(Ids[0]);
1307 ContextNode *PrevNode =
nullptr;
1308 for (
auto Id : Ids) {
1309 ContextNode *CurNode = getNodeForStackId(Id);
1313 assert(!CurNode->Recursive);
1318 auto *Edge = CurNode->findEdgeFromCallee(PrevNode);
1320 SavedContextIds.clear();
1327 if (SavedContextIds.empty())
1330 if (SavedContextIds.empty())
1334 NodeOwner.push_back(
1335 std::make_unique<ContextNode>(
false, Call));
1336 ContextNode *NewNode = NodeOwner.back().get();
1337 NodeToCallingFunc[NewNode] =
Func;
1338 NonAllocationCallToContextNodeMap[
Call] = NewNode;
1339 NewNode->AllocTypes = computeAllocType(SavedContextIds);
1344 connectNewNode(NewNode, FirstNode,
true, SavedContextIds);
1349 connectNewNode(NewNode, LastNode,
false, SavedContextIds);
1354 for (
auto Id : Ids) {
1355 ContextNode *CurNode = getNodeForStackId(Id);
1362 auto *PrevEdge = CurNode->findEdgeFromCallee(PrevNode);
1364 set_subtract(PrevEdge->getContextIds(), SavedContextIds);
1365 if (PrevEdge->getContextIds().empty()) {
1366 PrevNode->eraseCallerEdge(PrevEdge);
1367 CurNode->eraseCalleeEdge(PrevEdge);
1373 CurNode->AllocTypes = CurNode->CalleeEdges.empty()
1374 ? (uint8_t)AllocationType::None
1375 : CurNode->computeAllocType();
1379 checkNode<DerivedCCG, FuncTy, CallTy>(NewNode,
true);
1380 for (
auto Id : Ids) {
1381 ContextNode *CurNode = getNodeForStackId(Id);
1384 checkNode<DerivedCCG, FuncTy, CallTy>(CurNode,
true);
1390template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1391void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
1400 for (
auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) {
1401 for (
auto &Call : CallsWithMetadata) {
1403 if (AllocationCallToContextNodeMap.count(Call))
1405 auto StackIdsWithContextNodes =
1406 getStackIdsWithContextNodesForCall(
Call.call());
1409 if (StackIdsWithContextNodes.empty())
1413 StackIdToMatchingCalls[StackIdsWithContextNodes.back()].push_back(
1414 {
Call.call(), StackIdsWithContextNodes, Func, {}});
1425 for (
auto &It : StackIdToMatchingCalls) {
1426 auto &Calls = It.getSecond();
1428 if (Calls.size() == 1) {
1429 auto &Ids = std::get<1>(Calls[0]);
1430 if (Ids.size() == 1)
1439 std::stable_sort(Calls.begin(), Calls.end(),
1440 [](
const CallContextInfo &
A,
const CallContextInfo &
B) {
1441 auto &IdsA = std::get<1>(A);
1442 auto &IdsB = std::get<1>(B);
1443 return IdsA.size() > IdsB.size() ||
1444 (IdsA.size() == IdsB.size() && IdsA < IdsB);
1451 ContextNode *LastNode = getNodeForStackId(LastId);
1455 if (LastNode->Recursive)
1463 for (
unsigned I = 0;
I < Calls.size();
I++) {
1464 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[
I];
1465 assert(SavedContextIds.empty());
1466 assert(LastId == Ids.back());
1474 ContextNode *PrevNode = LastNode;
1475 ContextNode *CurNode = LastNode;
1480 for (
auto IdIter = Ids.rbegin() + 1; IdIter != Ids.rend(); IdIter++) {
1482 CurNode = getNodeForStackId(Id);
1486 if (CurNode->Recursive) {
1491 auto *Edge = CurNode->findEdgeFromCaller(PrevNode);
1509 set_intersect(StackSequenceContextIds, Edge->getContextIds());
1512 if (StackSequenceContextIds.
empty()) {
1525 if (Ids.back() != getLastStackId(Call)) {
1526 for (
const auto &PE : LastNode->CallerEdges) {
1527 set_subtract(StackSequenceContextIds, PE->getContextIds());
1528 if (StackSequenceContextIds.
empty())
1532 if (StackSequenceContextIds.
empty())
1538 bool DuplicateContextIds =
false;
1539 if (
I + 1 < Calls.size()) {
1540 auto NextIds = std::get<1>(Calls[
I + 1]);
1541 DuplicateContextIds = Ids == NextIds;
1550 OldToNewContextIds.
reserve(OldToNewContextIds.
size() +
1551 StackSequenceContextIds.
size());
1554 ? duplicateContextIds(StackSequenceContextIds, OldToNewContextIds)
1555 : StackSequenceContextIds;
1556 assert(!SavedContextIds.empty());
1558 if (!DuplicateContextIds) {
1562 set_subtract(LastNodeContextIds, StackSequenceContextIds);
1563 if (LastNodeContextIds.
empty())
1570 propagateDuplicateContextIds(OldToNewContextIds);
1581 for (
auto &Entry : AllocationCallToContextNodeMap)
1582 assignStackNodesPostOrder(
Entry.second, Visited, StackIdToMatchingCalls);
1589 Call->getMetadata(LLVMContext::MD_callsite));
1590 return CallsiteContext.
back();
1593uint64_t IndexCallsiteContextGraph::getLastStackId(IndexCall &Call) {
1596 CallsiteContext(dyn_cast_if_present<CallsiteInfo *>(
Call.getBase()));
1598 return Index.getStackIdAtIndex(CallsiteContext.
back());
1611std::string ModuleCallsiteContextGraph::getLabel(
const Function *Func,
1613 unsigned CloneNo)
const {
1614 return (
Twine(
Call->getFunction()->getName()) +
" -> " +
1615 cast<CallBase>(Call)->getCalledFunction()->getName())
1619std::string IndexCallsiteContextGraph::getLabel(
const FunctionSummary *Func,
1620 const IndexCall &Call,
1621 unsigned CloneNo)
const {
1622 auto VI = FSToVIMap.find(Func);
1623 assert(VI != FSToVIMap.end());
1624 if (isa<AllocInfo *>(
Call.getBase()))
1625 return (
VI->second.name() +
" -> alloc").str();
1627 auto *Callsite = dyn_cast_if_present<CallsiteInfo *>(
Call.getBase());
1628 return (
VI->second.name() +
" -> " +
1630 Callsite->Clones[CloneNo]))
1635std::vector<uint64_t>
1636ModuleCallsiteContextGraph::getStackIdsWithContextNodesForCall(
1639 Call->getMetadata(LLVMContext::MD_callsite));
1640 return getStackIdsWithContextNodes<MDNode, MDNode::op_iterator>(
1644std::vector<uint64_t>
1645IndexCallsiteContextGraph::getStackIdsWithContextNodesForCall(IndexCall &Call) {
1648 CallsiteContext(dyn_cast_if_present<CallsiteInfo *>(
Call.getBase()));
1654template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1655template <
class NodeT,
class IteratorT>
1656std::vector<uint64_t>
1657CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getStackIdsWithContextNodes(
1659 std::vector<uint64_t> StackIds;
1660 for (
auto IdOrIndex : CallsiteContext) {
1661 auto StackId = getStackId(IdOrIndex);
1662 ContextNode *
Node = getNodeForStackId(StackId);
1665 StackIds.push_back(StackId);
1670ModuleCallsiteContextGraph::ModuleCallsiteContextGraph(
1673 :
Mod(
M), OREGetter(OREGetter) {
1675 std::vector<CallInfo> CallsWithMetadata;
1676 for (
auto &BB :
F) {
1677 for (
auto &
I : BB) {
1678 if (!isa<CallBase>(
I))
1680 if (
auto *MemProfMD =
I.getMetadata(LLVMContext::MD_memprof)) {
1681 CallsWithMetadata.push_back(&
I);
1682 auto *AllocNode = addAllocNode(&
I, &
F);
1683 auto *CallsiteMD =
I.getMetadata(LLVMContext::MD_callsite);
1687 for (
auto &MDOp : MemProfMD->operands()) {
1688 auto *MIBMD = cast<const MDNode>(MDOp);
1692 addStackNodesForMIB<MDNode, MDNode::op_iterator>(
1693 AllocNode, StackContext, CallsiteContext,
1696 assert(AllocNode->AllocTypes != (uint8_t)AllocationType::None);
1699 I.setMetadata(LLVMContext::MD_memprof,
nullptr);
1700 I.setMetadata(LLVMContext::MD_callsite,
nullptr);
1703 else if (
I.getMetadata(LLVMContext::MD_callsite))
1704 CallsWithMetadata.push_back(&
I);
1707 if (!CallsWithMetadata.empty())
1708 FuncToCallsWithMetadata[&
F] = CallsWithMetadata;
1712 dbgs() <<
"CCG before updating call stack chains:\n";
1717 exportToDot(
"prestackupdate");
1721 handleCallsitesWithMultipleTargets();
1724 for (
auto &FuncEntry : FuncToCallsWithMetadata)
1725 for (
auto &Call : FuncEntry.second)
1726 Call.call()->setMetadata(LLVMContext::MD_callsite,
nullptr);
1729IndexCallsiteContextGraph::IndexCallsiteContextGraph(
1736 for (
auto &S :
VI.getSummaryList()) {
1745 !isPrevailing(
VI.getGUID(), S.get()))
1747 auto *
FS = dyn_cast<FunctionSummary>(S.get());
1750 std::vector<CallInfo> CallsWithMetadata;
1751 if (!
FS->allocs().empty()) {
1752 for (
auto &AN :
FS->mutableAllocs()) {
1757 if (AN.MIBs.empty())
1759 CallsWithMetadata.push_back({&AN});
1760 auto *AllocNode = addAllocNode({&AN},
FS);
1768 AN.TotalSizes.size() == AN.MIBs.size());
1770 for (
auto &MIB : AN.MIBs) {
1775 TotalSize = AN.TotalSizes[
I];
1776 addStackNodesForMIB<MIBInfo, SmallVector<unsigned>::const_iterator>(
1777 AllocNode, StackContext, EmptyContext, MIB.AllocType,
1781 assert(AllocNode->AllocTypes != (uint8_t)AllocationType::None);
1786 AN.Versions[0] = (uint8_t)allocTypeToUse(AllocNode->AllocTypes);
1790 if (!
FS->callsites().empty())
1791 for (
auto &SN :
FS->mutableCallsites())
1792 CallsWithMetadata.push_back({&SN});
1794 if (!CallsWithMetadata.empty())
1795 FuncToCallsWithMetadata[FS] = CallsWithMetadata;
1797 if (!
FS->allocs().empty() || !
FS->callsites().empty())
1803 dbgs() <<
"CCG before updating call stack chains:\n";
1808 exportToDot(
"prestackupdate");
1812 handleCallsitesWithMultipleTargets();
1815template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1816void CallsiteContextGraph<DerivedCCG, FuncTy,
1817 CallTy>::handleCallsitesWithMultipleTargets() {
1832 for (
auto &Entry : NonAllocationCallToContextNodeMap) {
1837 for (
auto EI =
Node->CalleeEdges.begin(); EI !=
Node->CalleeEdges.end();
1840 if (!Edge->Callee->hasCall())
1842 assert(NodeToCallingFunc.count(Edge->Callee));
1844 if (calleesMatch(Call, EI, TailCallToContextNodeMap))
1846 RemovedEdgesWithMismatchedCallees++;
1859 NonAllocationCallToContextNodeMap.remove_if(
1860 [](
const auto &it) {
return !it.second->hasCall(); });
1864 for (
auto &[Call,
Node] : TailCallToContextNodeMap)
1865 NonAllocationCallToContextNodeMap[
Call] =
Node;
1876 return Index.getStackIdAtIndex(IdOrIndex);
1879template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1880bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::calleesMatch(
1881 CallTy Call, EdgeIter &EI,
1884 const FuncTy *ProfiledCalleeFunc = NodeToCallingFunc[Edge->Callee];
1885 const FuncTy *CallerFunc = NodeToCallingFunc[Edge->Caller];
1888 std::vector<std::pair<CallTy, FuncTy *>> FoundCalleeChain;
1889 if (!calleeMatchesFunc(Call, ProfiledCalleeFunc, CallerFunc,
1894 if (FoundCalleeChain.empty())
1897 auto AddEdge = [Edge, &EI](ContextNode *
Caller, ContextNode *
Callee) {
1898 auto *CurEdge =
Callee->findEdgeFromCaller(Caller);
1902 CurEdge->ContextIds.insert(Edge->ContextIds.begin(),
1903 Edge->ContextIds.end());
1904 CurEdge->AllocTypes |= Edge->AllocTypes;
1909 auto NewEdge = std::make_shared<ContextEdge>(
1910 Callee, Caller, Edge->AllocTypes, Edge->ContextIds);
1911 Callee->CallerEdges.push_back(NewEdge);
1912 if (Caller == Edge->Caller) {
1916 EI =
Caller->CalleeEdges.insert(EI, NewEdge);
1919 "Iterator position not restored after insert and increment");
1921 Caller->CalleeEdges.push_back(NewEdge);
1926 auto *CurCalleeNode = Edge->Callee;
1927 for (
auto &[NewCall, Func] : FoundCalleeChain) {
1928 ContextNode *NewNode =
nullptr;
1930 if (TailCallToContextNodeMap.
count(NewCall)) {
1931 NewNode = TailCallToContextNodeMap[NewCall];
1932 NewNode->AllocTypes |= Edge->AllocTypes;
1934 FuncToCallsWithMetadata[
Func].push_back({NewCall});
1936 NodeOwner.push_back(
1937 std::make_unique<ContextNode>(
false, NewCall));
1938 NewNode = NodeOwner.
back().get();
1939 NodeToCallingFunc[NewNode] =
Func;
1940 TailCallToContextNodeMap[NewCall] = NewNode;
1941 NewNode->AllocTypes = Edge->AllocTypes;
1945 AddEdge(NewNode, CurCalleeNode);
1947 CurCalleeNode = NewNode;
1951 AddEdge(Edge->Caller, CurCalleeNode);
1954 Edge->Callee->eraseCallerEdge(Edge.get());
1955 EI = Edge->Caller->CalleeEdges.erase(EI);
1961 assert(!Edge->Caller->CalleeEdges.empty());
1967bool ModuleCallsiteContextGraph::findProfiledCalleeThroughTailCalls(
1969 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain,
1970 bool &FoundMultipleCalleeChains) {
1977 FoundCalleeChain.push_back({Callsite,
F});
1980 auto *CalleeFunc = dyn_cast<Function>(CurCallee);
1982 auto *Alias = dyn_cast<GlobalAlias>(CurCallee);
1984 CalleeFunc = dyn_cast<Function>(Alias->getAliasee());
1992 bool FoundSingleCalleeChain =
false;
1993 for (
auto &BB : *CalleeFunc) {
1994 for (
auto &
I : BB) {
1995 auto *CB = dyn_cast<CallBase>(&
I);
1996 if (!CB || !CB->isTailCall())
1998 auto *CalledValue = CB->getCalledOperand();
1999 auto *CalledFunction = CB->getCalledFunction();
2000 if (CalledValue && !CalledFunction) {
2001 CalledValue = CalledValue->stripPointerCasts();
2003 CalledFunction = dyn_cast<Function>(CalledValue);
2007 if (
auto *GA = dyn_cast<GlobalAlias>(CalledValue)) {
2008 assert(!CalledFunction &&
2009 "Expected null called function in callsite for alias");
2010 CalledFunction = dyn_cast<Function>(GA->getAliaseeObject());
2012 if (!CalledFunction)
2014 if (CalledFunction == ProfiledCallee) {
2015 if (FoundSingleCalleeChain) {
2016 FoundMultipleCalleeChains =
true;
2019 FoundSingleCalleeChain =
true;
2020 FoundProfiledCalleeCount++;
2021 FoundProfiledCalleeDepth +=
Depth;
2022 if (
Depth > FoundProfiledCalleeMaxDepth)
2023 FoundProfiledCalleeMaxDepth =
Depth;
2024 SaveCallsiteInfo(&
I, CalleeFunc);
2025 }
else if (findProfiledCalleeThroughTailCalls(
2026 ProfiledCallee, CalledFunction,
Depth + 1,
2027 FoundCalleeChain, FoundMultipleCalleeChains)) {
2030 assert(!FoundMultipleCalleeChains);
2031 if (FoundSingleCalleeChain) {
2032 FoundMultipleCalleeChains =
true;
2035 FoundSingleCalleeChain =
true;
2036 SaveCallsiteInfo(&
I, CalleeFunc);
2037 }
else if (FoundMultipleCalleeChains)
2042 return FoundSingleCalleeChain;
2045bool ModuleCallsiteContextGraph::calleeMatchesFunc(
2047 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain) {
2048 auto *CB = dyn_cast<CallBase>(Call);
2049 if (!CB->getCalledOperand() || CB->isIndirectCall())
2051 auto *CalleeVal = CB->getCalledOperand()->stripPointerCasts();
2052 auto *CalleeFunc = dyn_cast<Function>(CalleeVal);
2053 if (CalleeFunc == Func)
2055 auto *Alias = dyn_cast<GlobalAlias>(CalleeVal);
2056 if (Alias && Alias->getAliasee() == Func)
2067 bool FoundMultipleCalleeChains =
false;
2068 if (!findProfiledCalleeThroughTailCalls(Func, CalleeVal,
Depth,
2070 FoundMultipleCalleeChains)) {
2071 LLVM_DEBUG(
dbgs() <<
"Not found through unique tail call chain: "
2072 <<
Func->getName() <<
" from " << CallerFunc->
getName()
2073 <<
" that actually called " << CalleeVal->getName()
2074 << (FoundMultipleCalleeChains
2075 ?
" (found multiple possible chains)"
2078 if (FoundMultipleCalleeChains)
2079 FoundProfiledCalleeNonUniquelyCount++;
2086bool IndexCallsiteContextGraph::findProfiledCalleeThroughTailCalls(
2088 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain,
2089 bool &FoundMultipleCalleeChains) {
2098 if (!FunctionCalleesToSynthesizedCallsiteInfos.count(FS) ||
2099 !FunctionCalleesToSynthesizedCallsiteInfos[
FS].count(Callee))
2102 FunctionCalleesToSynthesizedCallsiteInfos[FS][Callee] =
2105 FunctionCalleesToSynthesizedCallsiteInfos[
FS][
Callee].get();
2106 FoundCalleeChain.push_back({NewCallsiteInfo,
FS});
2113 bool FoundSingleCalleeChain =
false;
2116 !isPrevailing(CurCallee.
getGUID(), S.get()))
2118 auto *
FS = dyn_cast<FunctionSummary>(S->getBaseObject());
2121 auto FSVI = CurCallee;
2122 auto *AS = dyn_cast<AliasSummary>(S.get());
2124 FSVI = AS->getAliaseeVI();
2125 for (
auto &CallEdge :
FS->calls()) {
2126 if (!CallEdge.second.hasTailCall())
2128 if (CallEdge.first == ProfiledCallee) {
2129 if (FoundSingleCalleeChain) {
2130 FoundMultipleCalleeChains =
true;
2133 FoundSingleCalleeChain =
true;
2134 FoundProfiledCalleeCount++;
2135 FoundProfiledCalleeDepth +=
Depth;
2136 if (
Depth > FoundProfiledCalleeMaxDepth)
2137 FoundProfiledCalleeMaxDepth =
Depth;
2138 CreateAndSaveCallsiteInfo(CallEdge.first, FS);
2140 assert(!FSToVIMap.count(FS) || FSToVIMap[FS] == FSVI);
2141 FSToVIMap[
FS] = FSVI;
2142 }
else if (findProfiledCalleeThroughTailCalls(
2143 ProfiledCallee, CallEdge.first,
Depth + 1,
2144 FoundCalleeChain, FoundMultipleCalleeChains)) {
2147 assert(!FoundMultipleCalleeChains);
2148 if (FoundSingleCalleeChain) {
2149 FoundMultipleCalleeChains =
true;
2152 FoundSingleCalleeChain =
true;
2153 CreateAndSaveCallsiteInfo(CallEdge.first, FS);
2155 assert(!FSToVIMap.count(FS) || FSToVIMap[FS] == FSVI);
2156 FSToVIMap[
FS] = FSVI;
2157 }
else if (FoundMultipleCalleeChains)
2162 return FoundSingleCalleeChain;
2165bool IndexCallsiteContextGraph::calleeMatchesFunc(
2168 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain) {
2170 dyn_cast_if_present<CallsiteInfo *>(
Call.getBase())->Callee;
2174 Callee.getSummaryList().empty()
2176 : dyn_cast<AliasSummary>(
Callee.getSummaryList()[0].get());
2177 assert(FSToVIMap.count(Func));
2178 auto FuncVI = FSToVIMap[
Func];
2179 if (Callee == FuncVI ||
2194 bool FoundMultipleCalleeChains =
false;
2195 if (!findProfiledCalleeThroughTailCalls(
2196 FuncVI, Callee,
Depth, FoundCalleeChain, FoundMultipleCalleeChains)) {
2197 LLVM_DEBUG(
dbgs() <<
"Not found through unique tail call chain: " << FuncVI
2198 <<
" from " << FSToVIMap[CallerFunc]
2199 <<
" that actually called " << Callee
2200 << (FoundMultipleCalleeChains
2201 ?
" (found multiple possible chains)"
2204 if (FoundMultipleCalleeChains)
2205 FoundProfiledCalleeNonUniquelyCount++;
2212template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2213void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::dump()
2219template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2220void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::print(
2222 OS <<
"Node " <<
this <<
"\n";
2226 OS <<
" (recursive)";
2229 OS <<
"\tContextIds:";
2231 auto ContextIds = getContextIds();
2232 std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
2233 std::sort(SortedIds.begin(), SortedIds.end());
2234 for (
auto Id : SortedIds)
2237 OS <<
"\tCalleeEdges:\n";
2238 for (
auto &Edge : CalleeEdges)
2239 OS <<
"\t\t" << *Edge <<
"\n";
2240 OS <<
"\tCallerEdges:\n";
2241 for (
auto &Edge : CallerEdges)
2242 OS <<
"\t\t" << *Edge <<
"\n";
2243 if (!Clones.empty()) {
2246 for (
auto *Clone : Clones)
2249 }
else if (CloneOf) {
2250 OS <<
"\tClone of " << CloneOf <<
"\n";
2254template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2255void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::dump()
2261template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2262void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::print(
2266 OS <<
" ContextIds:";
2267 std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
2268 std::sort(SortedIds.begin(), SortedIds.end());
2269 for (
auto Id : SortedIds)
2273template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2274void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::dump()
const {
2278template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2279void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::print(
2281 OS <<
"Callsite Context Graph:\n";
2282 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
2283 for (
const auto Node : nodes<GraphType>(
this)) {
2284 if (
Node->isRemoved())
2291template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2292void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::printTotalSizes(
2294 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
2295 for (
const auto Node : nodes<GraphType>(
this)) {
2296 if (
Node->isRemoved())
2298 if (!
Node->IsAllocation)
2301 std::vector<uint32_t> SortedIds(ContextIds.
begin(), ContextIds.
end());
2302 std::sort(SortedIds.begin(), SortedIds.end());
2303 for (
auto Id : SortedIds) {
2304 auto SizeI = ContextIdToTotalSize.find(Id);
2305 assert(SizeI != ContextIdToTotalSize.end());
2306 auto TypeI = ContextIdToAllocationType.find(Id);
2307 assert(TypeI != ContextIdToAllocationType.end());
2309 <<
" with total size " << SizeI->second <<
" is "
2315template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2316void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::check()
const {
2317 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
2318 for (
const auto Node : nodes<GraphType>(
this)) {
2319 checkNode<DerivedCCG, FuncTy, CallTy>(
Node,
false);
2320 for (
auto &Edge :
Node->CallerEdges)
2321 checkEdge<DerivedCCG, FuncTy, CallTy>(Edge);
2325template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2327 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
2328 using NodeRef =
const ContextNode<DerivedCCG, FuncTy, CallTy> *;
2330 using NodePtrTy = std::unique_ptr<ContextNode<DerivedCCG, FuncTy, CallTy>>;
2346 return G->NodeOwner.begin()->get();
2349 using EdgePtrTy = std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>;
2350 static const ContextNode<DerivedCCG, FuncTy, CallTy> *
2358 decltype(&GetCallee)>;
2369template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2374 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
2380 std::string LabelString =
2381 (
Twine(
"OrigId: ") + (Node->IsAllocation ?
"Alloc" :
"") +
2382 Twine(Node->OrigStackOrAllocId))
2384 LabelString +=
"\n";
2385 if (Node->hasCall()) {
2386 auto Func =
G->NodeToCallingFunc.find(Node);
2387 assert(Func !=
G->NodeToCallingFunc.end());
2389 G->getLabel(Func->second, Node->Call.call(), Node->Call.cloneNo());
2391 LabelString +=
"null call";
2392 if (Node->Recursive)
2393 LabelString +=
" (recursive)";
2395 LabelString +=
" (external)";
2402 getContextIds(Node->getContextIds()) +
"\"")
2405 (
Twine(
",fillcolor=\"") + getColor(Node->AllocTypes) +
"\"").str();
2407 if (Node->CloneOf) {
2417 auto &Edge = *(ChildIter.getCurrent());
2418 return (
Twine(
"tooltip=\"") + getContextIds(Edge->ContextIds) +
"\"" +
2419 Twine(
",fillcolor=\"") + getColor(Edge->AllocTypes) +
"\"")
2426 return Node->isRemoved();
2431 std::string IdString =
"ContextIds:";
2432 if (ContextIds.
size() < 100) {
2433 std::vector<uint32_t> SortedIds(ContextIds.
begin(), ContextIds.
end());
2434 std::sort(SortedIds.begin(), SortedIds.end());
2435 for (
auto Id : SortedIds)
2436 IdString += (
" " +
Twine(Id)).str();
2438 IdString += (
" (" +
Twine(ContextIds.
size()) +
" ids)").str();
2443 static std::string getColor(uint8_t AllocTypes) {
2452 return "mediumorchid1";
2456 static std::string getNodeId(NodeRef
Node) {
2457 std::stringstream SStream;
2458 SStream << std::hex <<
"N0x" << (
unsigned long long)
Node;
2459 std::string
Result = SStream.str();
2464template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2465void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::exportToDot(
2466 std::string Label)
const {
2471template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2472typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
2473CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::moveEdgeToNewCalleeClone(
2474 const std::shared_ptr<ContextEdge> &Edge, EdgeIter *CallerEdgeI,
2476 ContextNode *
Node = Edge->Callee;
2477 NodeOwner.push_back(
2478 std::make_unique<ContextNode>(
Node->IsAllocation,
Node->Call));
2479 ContextNode *Clone = NodeOwner.back().get();
2480 Node->addClone(Clone);
2482 NodeToCallingFunc[Clone] = NodeToCallingFunc[
Node];
2483 moveEdgeToExistingCalleeClone(Edge, Clone, CallerEdgeI,
true,
2488template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2489void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
2490 moveEdgeToExistingCalleeClone(
const std::shared_ptr<ContextEdge> &Edge,
2491 ContextNode *NewCallee, EdgeIter *CallerEdgeI,
2496 assert(NewCallee->getOrigNode() == Edge->Callee->getOrigNode());
2498 ContextNode *OldCallee = Edge->Callee;
2502 auto ExistingEdgeToNewCallee = NewCallee->findEdgeFromCaller(Edge->Caller);
2506 if (ContextIdsToMove.
empty())
2507 ContextIdsToMove = Edge->getContextIds();
2511 if (Edge->getContextIds().size() == ContextIdsToMove.
size()) {
2514 *CallerEdgeI = OldCallee->CallerEdges.
erase(*CallerEdgeI);
2516 OldCallee->eraseCallerEdge(Edge.get());
2517 if (ExistingEdgeToNewCallee) {
2520 ExistingEdgeToNewCallee->getContextIds().insert(ContextIdsToMove.
begin(),
2521 ContextIdsToMove.
end());
2522 ExistingEdgeToNewCallee->AllocTypes |= Edge->AllocTypes;
2523 assert(Edge->ContextIds == ContextIdsToMove);
2524 Edge->ContextIds.clear();
2526 Edge->Caller->eraseCalleeEdge(Edge.get());
2529 Edge->Callee = NewCallee;
2530 NewCallee->CallerEdges.push_back(Edge);
2535 NewCallee->AllocTypes |= Edge->AllocTypes;
2541 auto CallerEdgeAllocType = computeAllocType(ContextIdsToMove);
2542 if (ExistingEdgeToNewCallee) {
2545 ExistingEdgeToNewCallee->getContextIds().insert(ContextIdsToMove.
begin(),
2546 ContextIdsToMove.
end());
2547 ExistingEdgeToNewCallee->AllocTypes |= CallerEdgeAllocType;
2550 auto NewEdge = std::make_shared<ContextEdge>(
2551 NewCallee, Edge->Caller, CallerEdgeAllocType, ContextIdsToMove);
2552 Edge->Caller->CalleeEdges.push_back(NewEdge);
2553 NewCallee->CallerEdges.push_back(NewEdge);
2557 NewCallee->AllocTypes |= CallerEdgeAllocType;
2559 Edge->AllocTypes = computeAllocType(Edge->ContextIds);
2564 for (
auto &OldCalleeEdge : OldCallee->CalleeEdges) {
2569 set_subtract(OldCalleeEdge->getContextIds(), EdgeContextIdsToMove);
2570 OldCalleeEdge->AllocTypes =
2571 computeAllocType(OldCalleeEdge->getContextIds());
2578 if (
auto *NewCalleeEdge =
2579 NewCallee->findEdgeFromCallee(OldCalleeEdge->Callee)) {
2580 NewCalleeEdge->getContextIds().insert(EdgeContextIdsToMove.
begin(),
2581 EdgeContextIdsToMove.
end());
2582 NewCalleeEdge->AllocTypes |= computeAllocType(EdgeContextIdsToMove);
2586 auto NewEdge = std::make_shared<ContextEdge>(
2587 OldCalleeEdge->Callee, NewCallee,
2588 computeAllocType(EdgeContextIdsToMove), EdgeContextIdsToMove);
2589 NewCallee->CalleeEdges.push_back(NewEdge);
2590 NewEdge->Callee->CallerEdges.push_back(NewEdge);
2594 OldCallee->AllocTypes = OldCallee->computeAllocType();
2597 OldCallee->emptyContextIds());
2599 checkNode<DerivedCCG, FuncTy, CallTy>(OldCallee,
false);
2600 checkNode<DerivedCCG, FuncTy, CallTy>(NewCallee,
false);
2601 for (
const auto &OldCalleeEdge : OldCallee->CalleeEdges)
2602 checkNode<DerivedCCG, FuncTy, CallTy>(OldCalleeEdge->Callee,
2604 for (
const auto &NewCalleeEdge : NewCallee->CalleeEdges)
2605 checkNode<DerivedCCG, FuncTy, CallTy>(NewCalleeEdge->Callee,
2610template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2611void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
2612 recursivelyRemoveNoneTypeCalleeEdges(
2618 removeNoneTypeCalleeEdges(
Node);
2620 for (
auto *Clone :
Node->Clones)
2621 recursivelyRemoveNoneTypeCalleeEdges(Clone, Visited);
2625 auto CallerEdges =
Node->CallerEdges;
2626 for (
auto &Edge : CallerEdges) {
2628 if (Edge->Callee ==
nullptr && Edge->Caller ==
nullptr) {
2632 recursivelyRemoveNoneTypeCalleeEdges(Edge->Caller, Visited);
2636template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2637void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones() {
2639 for (
auto &Entry : AllocationCallToContextNodeMap) {
2641 identifyClones(
Entry.second, Visited,
Entry.second->getContextIds());
2644 for (
auto &Entry : AllocationCallToContextNodeMap)
2645 recursivelyRemoveNoneTypeCalleeEdges(
Entry.second, Visited);
2658template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2659void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones(
2663 checkNode<DerivedCCG, FuncTy, CallTy>(
Node,
false);
2671 if (!
Node->hasCall())
2686 auto CallerEdges =
Node->CallerEdges;
2687 for (
auto &Edge : CallerEdges) {
2689 if (Edge->Callee ==
nullptr && Edge->Caller ==
nullptr) {
2694 if (!Visited.
count(Edge->Caller) && !Edge->Caller->CloneOf) {
2695 identifyClones(Edge->Caller, Visited, AllocContextIds);
2718 const unsigned AllocTypeCloningPriority[] = { 3, 4,
2721 std::stable_sort(
Node->CallerEdges.begin(),
Node->CallerEdges.end(),
2722 [&](
const std::shared_ptr<ContextEdge> &
A,
2723 const std::shared_ptr<ContextEdge> &
B) {
2726 if (A->ContextIds.empty())
2732 if (B->ContextIds.empty())
2735 if (A->AllocTypes == B->AllocTypes)
2738 return *A->ContextIds.begin() < *B->ContextIds.begin();
2739 return AllocTypeCloningPriority[A->AllocTypes] <
2740 AllocTypeCloningPriority[B->AllocTypes];
2750 for (
auto EI =
Node->CallerEdges.begin(); EI !=
Node->CallerEdges.end();) {
2751 auto CallerEdge = *EI;
2760 auto CallerEdgeContextsForAlloc =
2762 if (CallerEdgeContextsForAlloc.empty()) {
2766 auto CallerAllocTypeForAlloc = computeAllocType(CallerEdgeContextsForAlloc);
2770 std::vector<uint8_t> CalleeEdgeAllocTypesForCallerEdge;
2771 CalleeEdgeAllocTypesForCallerEdge.reserve(
Node->CalleeEdges.size());
2772 for (
auto &CalleeEdge :
Node->CalleeEdges)
2773 CalleeEdgeAllocTypesForCallerEdge.push_back(intersectAllocTypes(
2774 CalleeEdge->getContextIds(), CallerEdgeContextsForAlloc));
2789 if (allocTypeToUse(CallerAllocTypeForAlloc) ==
2790 allocTypeToUse(
Node->AllocTypes) &&
2791 allocTypesMatch<DerivedCCG, FuncTy, CallTy>(
2792 CalleeEdgeAllocTypesForCallerEdge,
Node->CalleeEdges)) {
2799 ContextNode *Clone =
nullptr;
2800 for (
auto *CurClone :
Node->Clones) {
2801 if (allocTypeToUse(CurClone->AllocTypes) !=
2802 allocTypeToUse(CallerAllocTypeForAlloc))
2805 if (!allocTypesMatch<DerivedCCG, FuncTy, CallTy>(
2806 CalleeEdgeAllocTypesForCallerEdge, CurClone->CalleeEdges))
2814 moveEdgeToExistingCalleeClone(CallerEdge, Clone, &EI,
false,
2815 CallerEdgeContextsForAlloc);
2818 moveEdgeToNewCalleeClone(CallerEdge, &EI, CallerEdgeContextsForAlloc);
2833 checkNode<DerivedCCG, FuncTy, CallTy>(
Node,
false);
2836void ModuleCallsiteContextGraph::updateAllocationCall(
2840 "memprof", AllocTypeString);
2841 cast<CallBase>(
Call.call())->addFnAttr(
A);
2842 OREGetter(
Call.call()->getFunction())
2844 <<
ore::NV(
"AllocationCall",
Call.call()) <<
" in clone "
2846 <<
" marked with memprof allocation attribute "
2847 <<
ore::NV(
"Attribute", AllocTypeString));
2850void IndexCallsiteContextGraph::updateAllocationCall(
CallInfo &Call,
2854 assert(AI->Versions.size() >
Call.cloneNo());
2858void ModuleCallsiteContextGraph::updateCall(
CallInfo &CallerCall,
2859 FuncInfo CalleeFunc) {
2860 if (CalleeFunc.cloneNo() > 0)
2861 cast<CallBase>(CallerCall.call())->setCalledFunction(CalleeFunc.func());
2862 OREGetter(CallerCall.call()->getFunction())
2864 <<
ore::NV(
"Call", CallerCall.call()) <<
" in clone "
2865 <<
ore::NV(
"Caller", CallerCall.call()->getFunction())
2866 <<
" assigned to call function clone "
2867 <<
ore::NV(
"Callee", CalleeFunc.func()));
2870void IndexCallsiteContextGraph::updateCall(
CallInfo &CallerCall,
2871 FuncInfo CalleeFunc) {
2872 auto *CI = CallerCall.call().dyn_cast<
CallsiteInfo *>();
2874 "Caller cannot be an allocation which should not have profiled calls");
2875 assert(CI->Clones.size() > CallerCall.cloneNo());
2876 CI->Clones[CallerCall.cloneNo()] = CalleeFunc.cloneNo();
2879CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
2881ModuleCallsiteContextGraph::cloneFunctionForCallsite(
2882 FuncInfo &Func,
CallInfo &Call, std::map<CallInfo, CallInfo> &CallMap,
2883 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
2889 NewFunc->setName(
Name);
2890 for (
auto &Inst : CallsWithMetadataInFunc) {
2892 assert(Inst.cloneNo() == 0);
2893 CallMap[Inst] = {cast<Instruction>(VMap[Inst.call()]), CloneNo};
2895 OREGetter(
Func.func())
2897 <<
"created clone " <<
ore::NV(
"NewFunction", NewFunc));
2898 return {NewFunc, CloneNo};
2902 IndexCall>::FuncInfo
2903IndexCallsiteContextGraph::cloneFunctionForCallsite(
2904 FuncInfo &Func,
CallInfo &Call, std::map<CallInfo, CallInfo> &CallMap,
2905 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
2920 for (
auto &Inst : CallsWithMetadataInFunc) {
2922 assert(Inst.cloneNo() == 0);
2923 if (
auto *AI = Inst.call().dyn_cast<
AllocInfo *>()) {
2924 assert(AI->Versions.size() == CloneNo);
2927 AI->Versions.push_back(0);
2930 assert(CI && CI->Clones.size() == CloneNo);
2933 CI->Clones.push_back(0);
2935 CallMap[Inst] = {Inst.call(), CloneNo};
2937 return {
Func.func(), CloneNo};
2971template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2972bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
2973 bool Changed =
false;
2981 auto RecordCalleeFuncOfCallsite = [&](ContextNode *
Caller,
2982 const FuncInfo &CalleeFunc) {
2984 CallsiteToCalleeFuncCloneMap[
Caller] = CalleeFunc;
2989 for (
auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) {
2990 FuncInfo OrigFunc(Func);
2994 std::map<FuncInfo, std::map<CallInfo, CallInfo>> FuncClonesToCallMap;
2995 for (
auto &Call : CallsWithMetadata) {
2996 ContextNode *
Node = getNodeForInst(Call);
3003 "Not having a call should have prevented cloning");
3007 std::map<FuncInfo, ContextNode *> FuncCloneToCurNodeCloneMap;
3011 auto AssignCallsiteCloneToFuncClone = [&](
const FuncInfo &FuncClone,
3013 ContextNode *CallsiteClone,
3016 FuncCloneToCurNodeCloneMap[FuncClone] = CallsiteClone;
3018 assert(FuncClonesToCallMap.count(FuncClone));
3019 std::map<CallInfo, CallInfo> &CallMap = FuncClonesToCallMap[FuncClone];
3021 if (CallMap.count(Call))
3022 CallClone = CallMap[
Call];
3023 CallsiteClone->setCall(CallClone);
3029 std::deque<ContextNode *> ClonesWorklist;
3031 if (!
Node->emptyContextIds())
3032 ClonesWorklist.push_back(
Node);
3033 ClonesWorklist.insert(ClonesWorklist.end(),
Node->Clones.begin(),
3034 Node->Clones.end());
3039 unsigned NodeCloneCount = 0;
3040 while (!ClonesWorklist.empty()) {
3041 ContextNode *Clone = ClonesWorklist.front();
3042 ClonesWorklist.pop_front();
3045 checkNode<DerivedCCG, FuncTy, CallTy>(Clone);
3051 if (FuncClonesToCallMap.size() < NodeCloneCount) {
3053 if (NodeCloneCount == 1) {
3058 Clone->CallerEdges, [&](
const std::shared_ptr<ContextEdge> &E) {
3059 return CallsiteToCalleeFuncCloneMap.count(E->Caller);
3063 FuncClonesToCallMap[OrigFunc] = {};
3064 AssignCallsiteCloneToFuncClone(
3065 OrigFunc, Call, Clone,
3066 AllocationCallToContextNodeMap.count(Call));
3067 for (
auto &CE : Clone->CallerEdges) {
3069 if (!
CE->Caller->hasCall())
3071 RecordCalleeFuncOfCallsite(
CE->Caller, OrigFunc);
3081 FuncInfo PreviousAssignedFuncClone;
3083 Clone->CallerEdges, [&](
const std::shared_ptr<ContextEdge> &E) {
3084 return CallsiteToCalleeFuncCloneMap.count(E->Caller);
3086 bool CallerAssignedToCloneOfFunc =
false;
3087 if (EI != Clone->CallerEdges.end()) {
3088 const std::shared_ptr<ContextEdge> &Edge = *EI;
3089 PreviousAssignedFuncClone =
3090 CallsiteToCalleeFuncCloneMap[Edge->Caller];
3091 CallerAssignedToCloneOfFunc =
true;
3096 std::map<CallInfo, CallInfo> NewCallMap;
3097 unsigned CloneNo = FuncClonesToCallMap.size();
3098 assert(CloneNo > 0 &&
"Clone 0 is the original function, which "
3099 "should already exist in the map");
3100 FuncInfo NewFuncClone = cloneFunctionForCallsite(
3101 OrigFunc, Call, NewCallMap, CallsWithMetadata, CloneNo);
3102 FuncClonesToCallMap.emplace(NewFuncClone, std::move(NewCallMap));
3103 FunctionClonesAnalysis++;
3109 if (!CallerAssignedToCloneOfFunc) {
3110 AssignCallsiteCloneToFuncClone(
3111 NewFuncClone, Call, Clone,
3112 AllocationCallToContextNodeMap.count(Call));
3113 for (
auto &CE : Clone->CallerEdges) {
3115 if (!
CE->Caller->hasCall())
3117 RecordCalleeFuncOfCallsite(
CE->Caller, NewFuncClone);
3126 for (
auto CE : Clone->CallerEdges) {
3131 if (!
CE->Caller->hasCall())
3134 if (!CallsiteToCalleeFuncCloneMap.
count(
CE->Caller) ||
3138 CallsiteToCalleeFuncCloneMap[
CE->Caller] !=
3139 PreviousAssignedFuncClone)
3142 RecordCalleeFuncOfCallsite(
CE->Caller, NewFuncClone);
3152 for (
auto CalleeEdge :
CE->Caller->CalleeEdges) {
3157 ContextNode *
Callee = CalleeEdge->Callee;
3161 if (Callee == Clone || !
Callee->hasCall())
3163 ContextNode *NewClone = moveEdgeToNewCalleeClone(CalleeEdge);
3164 removeNoneTypeCalleeEdges(NewClone);
3167 removeNoneTypeCalleeEdges(Callee);
3172 if (CallsiteToCalleeFuncCloneMap.
count(Callee))
3173 RecordCalleeFuncOfCallsite(
3174 NewClone, CallsiteToCalleeFuncCloneMap[Callee]);
3183 OrigCall.setCloneNo(0);
3184 std::map<CallInfo, CallInfo> &CallMap =
3185 FuncClonesToCallMap[NewFuncClone];
3186 assert(CallMap.count(OrigCall));
3187 CallInfo NewCall(CallMap[OrigCall]);
3189 NewClone->setCall(NewCall);
3211 std::map<FuncInfo, ContextNode *> FuncCloneToNewCallsiteCloneMap;
3212 FuncInfo FuncCloneAssignedToCurCallsiteClone;
3215 for (
auto EI = Clone->CallerEdges.begin();
3216 EI != Clone->CallerEdges.end();) {
3219 if (!Edge->Caller->hasCall()) {
3225 if (CallsiteToCalleeFuncCloneMap.
count(Edge->Caller)) {
3226 FuncInfo FuncCloneCalledByCaller =
3227 CallsiteToCalleeFuncCloneMap[Edge->Caller];
3237 if ((FuncCloneToCurNodeCloneMap.count(FuncCloneCalledByCaller) &&
3238 FuncCloneToCurNodeCloneMap[FuncCloneCalledByCaller] !=
3246 (FuncCloneAssignedToCurCallsiteClone &&
3247 FuncCloneAssignedToCurCallsiteClone !=
3248 FuncCloneCalledByCaller)) {
3263 if (FuncCloneToNewCallsiteCloneMap.count(
3264 FuncCloneCalledByCaller)) {
3265 ContextNode *NewClone =
3266 FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller];
3267 moveEdgeToExistingCalleeClone(Edge, NewClone, &EI);
3269 removeNoneTypeCalleeEdges(NewClone);
3272 ContextNode *NewClone = moveEdgeToNewCalleeClone(Edge, &EI);
3273 removeNoneTypeCalleeEdges(NewClone);
3274 FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller] =
3277 ClonesWorklist.push_back(NewClone);
3278 assert(EI == Clone->CallerEdges.end() ||
3284 removeNoneTypeCalleeEdges(Clone);
3293 if (!FuncCloneAssignedToCurCallsiteClone) {
3294 FuncCloneAssignedToCurCallsiteClone = FuncCloneCalledByCaller;
3296 AssignCallsiteCloneToFuncClone(
3297 FuncCloneCalledByCaller, Call, Clone,
3298 AllocationCallToContextNodeMap.count(Call));
3302 assert(FuncCloneAssignedToCurCallsiteClone ==
3303 FuncCloneCalledByCaller);
3312 if (!FuncCloneAssignedToCurCallsiteClone) {
3317 for (
auto &CF : FuncClonesToCallMap) {
3318 if (!FuncCloneToCurNodeCloneMap.count(CF.first)) {
3319 FuncCloneAssignedToCurCallsiteClone = CF.first;
3323 assert(FuncCloneAssignedToCurCallsiteClone);
3325 AssignCallsiteCloneToFuncClone(
3326 FuncCloneAssignedToCurCallsiteClone, Call, Clone,
3327 AllocationCallToContextNodeMap.count(Call));
3329 assert(FuncCloneToCurNodeCloneMap
3330 [FuncCloneAssignedToCurCallsiteClone] == Clone);
3332 RecordCalleeFuncOfCallsite(Edge->Caller,
3333 FuncCloneAssignedToCurCallsiteClone);
3340 checkNode<DerivedCCG, FuncTy, CallTy>(
Node);
3341 for (
const auto &PE :
Node->CalleeEdges)
3342 checkNode<DerivedCCG, FuncTy, CallTy>(PE->Callee);
3343 for (
const auto &CE :
Node->CallerEdges)
3344 checkNode<DerivedCCG, FuncTy, CallTy>(
CE->Caller);
3345 for (
auto *Clone :
Node->Clones) {
3346 checkNode<DerivedCCG, FuncTy, CallTy>(Clone);
3347 for (
const auto &PE : Clone->CalleeEdges)
3348 checkNode<DerivedCCG, FuncTy, CallTy>(PE->Callee);
3349 for (
const auto &CE : Clone->CallerEdges)
3350 checkNode<DerivedCCG, FuncTy, CallTy>(
CE->Caller);
3356 auto UpdateCalls = [&](ContextNode *
Node,
3358 auto &&UpdateCalls) {
3363 for (
auto *Clone :
Node->Clones)
3364 UpdateCalls(Clone, Visited, UpdateCalls);
3366 for (
auto &Edge :
Node->CallerEdges)
3367 UpdateCalls(Edge->Caller, Visited, UpdateCalls);
3371 if (!
Node->hasCall() ||
Node->emptyContextIds())
3374 if (
Node->IsAllocation) {
3375 updateAllocationCall(
Node->Call, allocTypeToUse(
Node->AllocTypes));
3379 if (!CallsiteToCalleeFuncCloneMap.
count(
Node))
3382 auto CalleeFunc = CallsiteToCalleeFuncCloneMap[
Node];
3383 updateCall(
Node->Call, CalleeFunc);
3392 for (
auto &Entry : AllocationCallToContextNodeMap)
3393 UpdateCalls(
Entry.second, Visited, UpdateCalls);
3407 FunctionsClonedThinBackend++;
3408 for (
unsigned I = 1;
I < NumClones;
I++) {
3409 VMaps.
emplace_back(std::make_unique<ValueToValueMapTy>());
3411 FunctionClonesThinBackend++;
3414 for (
auto &BB : *NewF) {
3415 for (
auto &Inst : BB) {
3416 Inst.setMetadata(LLVMContext::MD_memprof,
nullptr);
3417 Inst.setMetadata(LLVMContext::MD_callsite,
nullptr);
3421 auto *PrevF = M.getFunction(
Name);
3425 assert(PrevF->isDeclaration());
3426 NewF->takeName(PrevF);
3427 PrevF->replaceAllUsesWith(NewF);
3428 PrevF->eraseFromParent();
3430 NewF->setName(
Name);
3432 <<
"created clone " <<
ore::NV(
"NewFunction", NewF));
3435 if (!FuncToAliasMap.count(&
F))
3437 for (
auto *
A : FuncToAliasMap[&
F]) {
3439 auto *PrevA = M.getNamedAlias(
Name);
3441 A->getType()->getPointerAddressSpace(),
3442 A->getLinkage(),
Name, NewF);
3443 NewA->copyAttributesFrom(
A);
3447 assert(PrevA->isDeclaration());
3448 NewA->takeName(PrevA);
3449 PrevA->replaceAllUsesWith(NewA);
3450 PrevA->eraseFromParent();
3492bool MemProfContextDisambiguation::applyImport(
Module &M) {
3494 bool Changed =
false;
3496 auto IsMemProfClone = [](
const Function &
F) {
3503 std::map<const Function *, SmallPtrSet<const GlobalAlias *, 1>>
3505 for (
auto &
A :
M.aliases()) {
3506 auto *Aliasee =
A.getAliaseeObject();
3507 if (
auto *
F = dyn_cast<Function>(Aliasee))
3508 FuncToAliasMap[
F].insert(&
A);
3512 if (
F.isDeclaration() || IsMemProfClone(
F))
3518 bool ClonesCreated =
false;
3519 unsigned NumClonesCreated = 0;
3520 auto CloneFuncIfNeeded = [&](
unsigned NumClones) {
3530 if (ClonesCreated) {
3531 assert(NumClonesCreated == NumClones);
3538 ClonesCreated =
true;
3539 NumClonesCreated = NumClones;
3545 CloneFuncIfNeeded(
StackNode.Clones.size());
3549 assert(!IsMemProfClone(*CalledFunction));
3554 auto CalleeOrigName = CalledFunction->getName();
3555 for (
unsigned J = 0; J <
StackNode.Clones.size(); J++) {
3560 auto NewF =
M.getOrInsertFunction(
3562 CalledFunction->getFunctionType());
3568 CBClone = cast<CallBase>((*VMaps[J - 1])[CB]);
3571 <<
ore::NV(
"Call", CBClone) <<
" in clone "
3573 <<
" assigned to call function clone "
3574 <<
ore::NV(
"Callee", NewF.getCallee()));
3592 auto SrcModuleMD =
F.getMetadata(
"thinlto_src_module");
3594 "enable-import-metadata is needed to emit thinlto_src_module");
3596 dyn_cast<MDString>(SrcModuleMD->getOperand(0))->getString();
3598 if (GVS->modulePath() == SrcModule) {
3599 GVSummary = GVS.get();
3603 assert(GVSummary && GVSummary->modulePath() == SrcModule);
3608 if (isa<AliasSummary>(GVSummary))
3611 auto *
FS = cast<FunctionSummary>(GVSummary->getBaseObject());
3613 if (
FS->allocs().empty() &&
FS->callsites().empty())
3616 auto SI =
FS->callsites().begin();
3617 auto AI =
FS->allocs().begin();
3625 for (
auto CallsiteIt =
FS->callsites().rbegin();
3626 CallsiteIt !=
FS->callsites().rend(); CallsiteIt++) {
3627 auto &Callsite = *CallsiteIt;
3631 if (!Callsite.StackIdIndices.empty())
3633 MapTailCallCalleeVIToCallsite.
insert({Callsite.Callee, Callsite});
3639 for (
auto &BB :
F) {
3640 for (
auto &
I : BB) {
3641 auto *CB = dyn_cast<CallBase>(&
I);
3646 auto *CalledValue = CB->getCalledOperand();
3647 auto *CalledFunction = CB->getCalledFunction();
3648 if (CalledValue && !CalledFunction) {
3649 CalledValue = CalledValue->stripPointerCasts();
3651 CalledFunction = dyn_cast<Function>(CalledValue);
3655 if (
auto *GA = dyn_cast<GlobalAlias>(CalledValue)) {
3656 assert(!CalledFunction &&
3657 "Expected null called function in callsite for alias");
3658 CalledFunction = dyn_cast<Function>(GA->getAliaseeObject());
3662 I.getMetadata(LLVMContext::MD_callsite));
3663 auto *MemProfMD =
I.getMetadata(LLVMContext::MD_memprof);
3667 if (CB->getAttributes().hasFnAttr(
"memprof")) {
3669 CB->getAttributes().getFnAttr(
"memprof").getValueAsString() ==
"cold"
3670 ? AllocTypeColdThinBackend++
3671 : AllocTypeNotColdThinBackend++;
3672 OrigAllocsThinBackend++;
3673 AllocVersionsThinBackend++;
3674 if (!MaxAllocVersionsThinBackend)
3675 MaxAllocVersionsThinBackend = 1;
3678 I.setMetadata(LLVMContext::MD_callsite,
nullptr);
3685 auto &AllocNode = *(AI++);
3689 auto MIBIter = AllocNode.MIBs.begin();
3690 for (
auto &MDOp : MemProfMD->operands()) {
3691 assert(MIBIter != AllocNode.MIBs.end());
3693 MIBIter->StackIdIndices.begin();
3694 auto *MIBMD = cast<const MDNode>(MDOp);
3698 auto ContextIterBegin =
3702 (ContextIterBegin != StackContext.
end() &&
3703 *ContextIterBegin == 0)
3706 for (
auto ContextIter = ContextIterBegin;
3707 ContextIter != StackContext.
end(); ++ContextIter) {
3711 if (LastStackContextId == *ContextIter)
3713 LastStackContextId = *ContextIter;
3714 assert(StackIdIndexIter != MIBIter->StackIdIndices.end());
3723 CloneFuncIfNeeded(AllocNode.Versions.size());
3725 OrigAllocsThinBackend++;
3726 AllocVersionsThinBackend += AllocNode.Versions.size();
3727 if (MaxAllocVersionsThinBackend < AllocNode.Versions.size())
3728 MaxAllocVersionsThinBackend = AllocNode.Versions.size();
3735 if (AllocNode.Versions.size() == 1) {
3740 UnclonableAllocsThinBackend++;
3746 return Type == ((uint8_t)AllocationType::NotCold |
3747 (uint8_t)AllocationType::Cold);
3751 for (
unsigned J = 0; J < AllocNode.Versions.size(); J++) {
3757 : AllocTypeNotColdThinBackend++;
3769 CBClone = cast<CallBase>((*VMaps[J - 1])[CB]);
3772 <<
ore::NV(
"AllocationCall", CBClone) <<
" in clone "
3774 <<
" marked with memprof allocation attribute "
3775 <<
ore::NV(
"Attribute", AllocTypeString));
3777 }
else if (!CallsiteContext.empty()) {
3779 assert(SI !=
FS->callsites().end());
3785 auto StackIdIndexIter =
StackNode.StackIdIndices.begin();
3786 for (
auto StackId : CallsiteContext) {
3794 CloneCallsite(
StackNode, CB, CalledFunction);
3795 }
else if (CB->isTailCall()) {
3800 if (CalleeVI && MapTailCallCalleeVIToCallsite.
count(CalleeVI)) {
3801 auto Callsite = MapTailCallCalleeVIToCallsite.
find(CalleeVI);
3802 assert(Callsite != MapTailCallCalleeVIToCallsite.
end());
3803 CloneCallsite(Callsite->second, CB, CalledFunction);
3807 I.setMetadata(LLVMContext::MD_memprof,
nullptr);
3808 I.setMetadata(LLVMContext::MD_callsite,
nullptr);
3816template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3817bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::process() {
3819 dbgs() <<
"CCG before cloning:\n";
3823 exportToDot(
"postbuild");
3836 dbgs() <<
"CCG after cloning:\n";
3840 exportToDot(
"cloned");
3842 bool Changed = assignFunctions();
3845 dbgs() <<
"CCG after assigning function clones:\n";
3849 exportToDot(
"clonefuncassign");
3852 printTotalSizes(
errs());
3857bool MemProfContextDisambiguation::processModule(
3864 return applyImport(M);
3877 ModuleCallsiteContextGraph CCG(M, OREGetter);
3878 return CCG.process();
3883 : ImportSummary(Summary) {
3884 if (ImportSummary) {
3894 auto ReadSummaryFile =
3896 if (!ReadSummaryFile) {
3903 if (!ImportSummaryForTestingOrErr) {
3909 ImportSummaryForTesting = std::move(*ImportSummaryForTestingOrErr);
3910 ImportSummary = ImportSummaryForTesting.get();
3919 if (!processModule(M, OREGetter))
3936 IndexCallsiteContextGraph CCG(
Index, isPrevailing);
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
#define LLVM_ATTRIBUTE_UNUSED
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
This file implements a map that provides insertion order iteration.
static SmallVector< std::unique_ptr< ValueToValueMapTy >, 4 > createFunctionClones(Function &F, unsigned NumClones, Module &M, OptimizationRemarkEmitter &ORE, std::map< const Function *, SmallPtrSet< const GlobalAlias *, 1 > > &FuncToAliasMap)
static cl::opt< unsigned > TailCallSearchDepth("memprof-tail-call-search-depth", cl::init(5), cl::Hidden, cl::desc("Max depth to recursively search for missing " "frames through tail calls."))
static cl::opt< bool > ExportToDot("memprof-export-to-dot", cl::init(false), cl::Hidden, cl::desc("Export graph to dot files."))
static void checkEdge(const std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > &Edge)
bool checkColdOrNotCold(uint8_t AllocType)
static ValueInfo findValueInfoForFunc(const Function &F, const Module &M, const ModuleSummaryIndex *ImportSummary)
static std::string getAllocTypeString(uint8_t AllocTypes)
cl::opt< bool > MemProfReportHintedSizes
static cl::opt< bool > VerifyCCG("memprof-verify-ccg", cl::init(false), cl::Hidden, cl::desc("Perform verification checks on CallingContextGraph."))
static void checkNode(const ContextNode< DerivedCCG, FuncTy, CallTy > *Node, bool CheckEdges=true)
static std::string getMemProfFuncName(Twine Base, unsigned CloneNo)
static cl::opt< std::string > MemProfImportSummary("memprof-import-summary", cl::desc("Import summary to use for testing the ThinLTO backend via opt"), cl::Hidden)
static const std::string MemProfCloneSuffix
static cl::opt< std::string > DotFilePathPrefix("memprof-dot-file-path-prefix", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path prefix of the MemProf dot files."))
static cl::opt< bool > VerifyNodes("memprof-verify-nodes", cl::init(false), cl::Hidden, cl::desc("Perform frequent verification checks on nodes."))
static cl::opt< bool > DumpCCG("memprof-dump-ccg", cl::init(false), cl::Hidden, cl::desc("Dump CallingContextGraph to stdout after each stage."))
cl::opt< bool > MemProfReportHintedSizes("memprof-report-hinted-sizes", cl::init(false), cl::Hidden, cl::desc("Report total allocation sizes of hinted allocations"))
This is the interface to build a ModuleSummaryIndex for a module.
ModuleSummaryIndex.h This file contains the declarations the classes that hold the module index and s...
Module.h This file contains the declarations for the Module class.
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
FunctionAnalysisManager FAM
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines generic set operations that may be used on set's of different types,...
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
void print(OutputBuffer &OB) const
DomTreeNode::const_iterator end() const
Alias summary information.
ValueInfo getAliaseeVI() const
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
static Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
void addFnAttr(Attribute::AttrKind Kind)
Adds the attribute to the function.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
iterator find(const_arg_type_t< KeyT > Val)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
void reserve(size_type NumEntries)
Grow the densemap so that it can contain at least NumEntries items before resizing again.
Implements a dense probed hash-table based set.
Function summary information to aid decisions and implementation of importing.
static GlobalAlias * create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent)
If a parent module is specified, the alias is automatically inserted into the end of the specified mo...
Function and variable summary information to aid decisions and implementation of importing.
static bool isLocalLinkage(LinkageTypes Linkage)
GUID getGUID() const
Return a 64-bit global unique ID constructed from global value name (i.e.
std::string getGlobalIdentifier() const
Return the modified name for this global value suitable to be used as the key for a global lookup (e....
@ InternalLinkage
Rename collisions when linking (static functions).
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
const Function * getFunction() const
Return the function this instruction belongs to.
This class implements a map that also provides access to all stored values in a deterministic order.
size_type count(const KeyT &Key) const
std::pair< KeyT, ValueT > & back()
MemProfContextDisambiguation(const ModuleSummaryIndex *Summary=nullptr)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
Class to hold module path string table and global value map, and encapsulate methods for operating on...
static StringRef getOriginalNameBeforePromote(StringRef Name)
Helper to obtain the unpromoted name for a global value (or the original name if not promoted).
ValueInfo getValueInfo(const GlobalValueSummaryMapTy::value_type &R) const
Return a ValueInfo for the index value_type (convenient when iterating index).
uint64_t getStackIdAtIndex(unsigned Index) const
GlobalValueSummary * findSummaryInModule(ValueInfo VI, StringRef ModuleId) const
Find the summary for ValueInfo VI in module ModuleId, or nullptr if not found.
GlobalValue::GUID getGUIDFromOriginalID(GlobalValue::GUID OriginalID) const
Return the GUID for OriginalId in the OidGuidMap.
A Module instance is used to store all the information related to an LLVM module.
A discriminated union of two or more pointer types, with the discriminator in the low bit of the poin...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM Value Representation.
StringRef getName() const
Return a constant reference to the value's name.
std::pair< iterator, bool > insert(const ValueT &V)
void reserve(size_t Size)
Grow the DenseSet so that it can contain at least NumEntries items before resizing again.
void swap(DenseSetImpl &RHS)
bool erase(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
An efficient, type-erasing, non-owning reference to a callable.
Helper class to iterate through stack ids in both metadata (memprof MIB and callsite) and the corresp...
CallStackIterator end() const
CallStackIterator beginAfterSharedPrefix(CallStack &Other)
This class implements an extremely fast bulk output stream that can only output to a stream.
StringRef AttributeString(unsigned Attribute)
@ C
The default llvm calling convention, compatible with C.
@ CE
Windows NT (Windows on ARM)
initializer< Ty > init(const Ty &Val)
AllocationType getMIBAllocType(const MDNode *MIB)
Returns the allocation type from an MIB metadata node.
uint64_t getMIBTotalSize(const MDNode *MIB)
Returns the total size from an MIB metadata node, or 0 if it was not recorded.
bool hasSingleAllocType(uint8_t AllocTypes)
True if the AllocTypes bitmask contains just a single type.
std::string getAllocTypeAttributeString(AllocationType Type)
Returns the string to use in attributes with the given type.
MDNode * getMIBStackNode(const MDNode *MIB)
Returns the stack node from an MIB metadata node.
NodeAddr< FuncNode * > Func
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
void logAllUnhandledErrors(Error E, raw_ostream &OS, Twine ErrorBanner={})
Log all errors (if any) in E to OS.
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
bool set_is_subset(const S1Ty &S1, const S2Ty &S2)
set_is_subset(A, B) - Return true iff A in B
void set_subtract(S1Ty &S1, const S2Ty &S2)
set_subtract(A, B) - Compute A := A - B
raw_ostream & WriteGraph(raw_ostream &O, const GraphType &G, bool ShortNames=false, const Twine &Title="")
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr)
Expected< std::unique_ptr< ModuleSummaryIndex > > getModuleSummaryIndex(MemoryBufferRef Buffer)
Parse the specified bitcode buffer, returning the module summary index.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
bool set_union(S1Ty &S1, const S2Ty &S2)
set_union(A, B) - Compute A := A u B, return whether A changed.
cl::opt< bool > SupportsHotColdNew
Indicate we are linking with an allocator that supports hot/cold operator new interfaces.
S1Ty set_intersection(const S1Ty &S1, const S2Ty &S2)
set_intersection(A, B) - Return A ^ B
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
cl::opt< bool > EnableMemProfContextDisambiguation
Enable MemProf context disambiguation for thin link.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Expected< T > errorOrToExpected(ErrorOr< T > &&EO)
Convert an ErrorOr<T> to an Expected<T>.
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Function * CloneFunction(Function *F, ValueToValueMapTy &VMap, ClonedCodeInfo *CodeInfo=nullptr)
Return a copy of the specified function and add it to that function's module.
bool mayHaveMemprofSummary(const CallBase *CB)
Returns true if the instruction could have memprof metadata, used to ensure consistency between summa...
Implement std::hash so that hash_code can be used in STL containers.
DOTGraphTraits(bool IsSimple=false)
typename GTraits::NodeRef NodeRef
static std::string getEdgeAttributes(NodeRef, ChildIteratorType ChildIter, GraphType)
const CallsiteContextGraph< DerivedCCG, FuncTy, CallTy > * GraphType
typename GTraits::ChildIteratorType ChildIteratorType
static bool isNodeHidden(NodeRef Node, GraphType)
static std::string getNodeLabel(NodeRef Node, GraphType G)
static std::string getNodeAttributes(NodeRef Node, GraphType)
static NodeRef getNode(const NodePtrTy &P)
static const ContextNode< DerivedCCG, FuncTy, CallTy > * GetCallee(const EdgePtrTy &P)
static ChildIteratorType child_end(NodeRef N)
std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > EdgePtrTy
const CallsiteContextGraph< DerivedCCG, FuncTy, CallTy > * GraphType
const ContextNode< DerivedCCG, FuncTy, CallTy > * NodeRef
static ChildIteratorType child_begin(NodeRef N)
std::unique_ptr< ContextNode< DerivedCCG, FuncTy, CallTy > > NodePtrTy
static NodeRef getEntryNode(GraphType G)
static nodes_iterator nodes_begin(GraphType G)
static nodes_iterator nodes_end(GraphType G)
Summary of memprof metadata on allocations.
SmallVector< uint8_t > Versions
Summary of memprof callsite metadata.
SmallVector< unsigned > Clones
DOTGraphTraits - Template class that can be specialized to customize how graphs are converted to 'dot...
DefaultDOTGraphTraits - This class provides the default implementations of all of the DOTGraphTraits ...
An information struct used to provide DenseMap with the various necessary components for a given valu...
Used in the streaming interface as the general argument type.
typename GraphType::UnknownGraphTypeError NodeRef
Struct that holds a reference to a particular GUID in a global value summary.
ArrayRef< std::unique_ptr< GlobalValueSummary > > getSummaryList() const
GlobalValue::GUID getGUID() const