49#include <unordered_map>
54#define DEBUG_TYPE "memprof-context-disambiguation"
57 "Number of function clones created during whole program analysis");
59 "Number of function clones created during ThinLTO backend");
61 "Number of functions that had clones created during ThinLTO backend");
62STATISTIC(AllocTypeNotCold,
"Number of not cold static allocations (possibly "
63 "cloned) during whole program analysis");
64STATISTIC(AllocTypeCold,
"Number of cold static allocations (possibly cloned) "
65 "during whole program analysis");
67 "Number of not cold static allocations (possibly cloned) during "
69STATISTIC(AllocTypeColdThinBackend,
"Number of cold static allocations "
70 "(possibly cloned) during ThinLTO backend");
72 "Number of original (not cloned) allocations with memprof profiles "
73 "during ThinLTO backend");
75 AllocVersionsThinBackend,
76 "Number of allocation versions (including clones) during ThinLTO backend");
78 "Maximum number of allocation versions created for an original "
79 "allocation during ThinLTO backend");
81 "Number of unclonable ambigous allocations during ThinLTO backend");
83 "Number of edges removed due to mismatched callees (profiled vs IR)");
85 "Number of profiled callees found via tail calls");
87 "Aggregate depth of profiled callees found via tail calls");
89 "Maximum depth of profiled callees found via tail calls");
91 "Number of profiled callees found via multiple tail call chains");
96 cl::desc(
"Specify the path prefix of the MemProf dot files."));
100 cl::desc(
"Export graph to dot files."));
104 cl::desc(
"Dump CallingContextGraph to stdout after each stage."));
108 cl::desc(
"Perform verification checks on CallingContextGraph."));
112 cl::desc(
"Perform frequent verification checks on nodes."));
115 "memprof-import-summary",
116 cl::desc(
"Import summary to use for testing the ThinLTO backend via opt"),
122 cl::desc(
"Max depth to recursively search for missing "
123 "frames through tail calls."));
128 cl::desc(
"Allow cloning of callsites involved in recursive cycles"));
137 cl::desc(
"Allow cloning of contexts through recursive cycles"));
148 cl::desc(
"Linking with hot/cold operator new interfaces"));
153 "Require target function definition when promoting indirect calls"));
174template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
175class CallsiteContextGraph {
177 CallsiteContextGraph() =
default;
178 CallsiteContextGraph(
const CallsiteContextGraph &) =
default;
179 CallsiteContextGraph(CallsiteContextGraph &&) =
default;
186 void identifyClones();
193 bool assignFunctions();
200 const CallsiteContextGraph &CCG) {
206 const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>;
208 const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>;
210 void exportToDot(std::string Label)
const;
213 struct FuncInfo final
214 :
public std::pair<FuncTy *, unsigned > {
215 using Base = std::pair<FuncTy *, unsigned>;
216 FuncInfo(
const Base &
B) :
Base(
B) {}
217 FuncInfo(FuncTy *
F =
nullptr,
unsigned CloneNo = 0) :
Base(
F, CloneNo) {}
218 explicit operator bool()
const {
return this->first !=
nullptr; }
219 FuncTy *
func()
const {
return this->first; }
220 unsigned cloneNo()
const {
return this->second; }
224 struct CallInfo final :
public std::pair<CallTy, unsigned > {
225 using Base = std::pair<CallTy, unsigned>;
227 CallInfo(CallTy Call =
nullptr,
unsigned CloneNo = 0)
229 explicit operator bool()
const {
return (
bool)this->first; }
230 CallTy call()
const {
return this->first; }
231 unsigned cloneNo()
const {
return this->second; }
232 void setCloneNo(
unsigned N) { this->second =
N; }
234 if (!
operator bool()) {
240 OS <<
"\t(clone " << cloneNo() <<
")";
263 bool Recursive =
false;
290 std::vector<std::shared_ptr<ContextEdge>> CalleeEdges;
294 std::vector<std::shared_ptr<ContextEdge>> CallerEdges;
298 const std::vector<std::shared_ptr<ContextEdge>> *
299 getEdgesWithAllocInfo()
const {
302 if (!CalleeEdges.empty())
304 if (!CallerEdges.empty()) {
317 auto *Edges = getEdgesWithAllocInfo();
321 for (
auto &Edge : *Edges)
322 Count += Edge->getContextIds().size();
324 for (
auto &Edge : *Edges)
325 ContextIds.
insert(Edge->getContextIds().begin(),
326 Edge->getContextIds().end());
332 uint8_t computeAllocType()
const {
333 auto *Edges = getEdgesWithAllocInfo();
335 return (
uint8_t)AllocationType::None;
339 for (
auto &Edge : *Edges) {
350 bool emptyContextIds()
const {
351 auto *Edges = getEdgesWithAllocInfo();
354 for (
auto &Edge : *Edges) {
355 if (!Edge->getContextIds().empty())
362 std::vector<ContextNode *> Clones;
365 ContextNode *CloneOf =
nullptr;
367 ContextNode(
bool IsAllocation) : IsAllocation(IsAllocation),
Call() {}
369 ContextNode(
bool IsAllocation,
CallInfo C)
370 : IsAllocation(IsAllocation),
Call(
C) {}
372 void addClone(ContextNode *Clone) {
374 CloneOf->Clones.push_back(Clone);
375 Clone->CloneOf = CloneOf;
377 Clones.push_back(Clone);
379 Clone->CloneOf =
this;
383 ContextNode *getOrigNode() {
390 unsigned int ContextId);
392 ContextEdge *findEdgeFromCallee(
const ContextNode *Callee);
393 ContextEdge *findEdgeFromCaller(
const ContextNode *Caller);
394 void eraseCalleeEdge(
const ContextEdge *Edge);
395 void eraseCallerEdge(
const ContextEdge *Edge);
399 bool hasCall()
const {
return (
bool)
Call.call(); }
405 bool isRemoved()
const {
408 return AllocTypes == (
uint8_t)AllocationType::None;
436 ContextIds(
std::
move(ContextIds)) {}
442 inline void clear() {
444 AllocTypes = (
uint8_t)AllocationType::None;
452 inline bool isRemoved()
const {
453 if (Callee || Caller)
474 void removeNoneTypeCalleeEdges(ContextNode *
Node);
475 void removeNoneTypeCallerEdges(ContextNode *
Node);
477 recursivelyRemoveNoneTypeCalleeEdges(ContextNode *
Node,
483 template <
class NodeT,
class IteratorT>
484 std::vector<uint64_t>
489 ContextNode *addAllocNode(
CallInfo Call,
const FuncTy *
F);
492 template <
class NodeT,
class IteratorT>
493 void addStackNodesForMIB(ContextNode *AllocNode,
502 void updateStackNodes();
506 void handleCallsitesWithMultipleTargets();
512 bool partitionCallsByCallee(
514 std::vector<std::pair<CallInfo, ContextNode *>> &NewCallToNode);
521 std::map<const ContextNode *, const FuncTy *> NodeToCallingFunc;
524 using EdgeIter =
typename std::vector<std::shared_ptr<ContextEdge>>::iterator;
529 struct CallContextInfo {
533 std::vector<uint64_t> StackIds;
547 void removeEdgeFromGraph(ContextEdge *Edge, EdgeIter *EI =
nullptr,
548 bool CalleeIter =
true);
556 void assignStackNodesPostOrder(
569 void propagateDuplicateContextIds(
575 void connectNewNode(ContextNode *NewNode, ContextNode *OrigNode,
583 return static_cast<const DerivedCCG *
>(
this)->getStackId(IdOrIndex);
593 calleesMatch(CallTy Call, EdgeIter &EI,
598 const FuncTy *getCalleeFunc(CallTy Call) {
599 return static_cast<DerivedCCG *
>(
this)->getCalleeFunc(Call);
605 bool calleeMatchesFunc(
606 CallTy Call,
const FuncTy *Func,
const FuncTy *CallerFunc,
607 std::vector<std::pair<CallTy, FuncTy *>> &FoundCalleeChain) {
608 return static_cast<DerivedCCG *
>(
this)->calleeMatchesFunc(
609 Call, Func, CallerFunc, FoundCalleeChain);
613 bool sameCallee(CallTy Call1, CallTy Call2) {
614 return static_cast<DerivedCCG *
>(
this)->sameCallee(Call1, Call2);
619 std::vector<uint64_t> getStackIdsWithContextNodesForCall(CallTy Call) {
620 return static_cast<DerivedCCG *
>(
this)->getStackIdsWithContextNodesForCall(
625 uint64_t getLastStackId(CallTy Call) {
626 return static_cast<DerivedCCG *
>(
this)->getLastStackId(Call);
631 AllocType == AllocationType::Cold ? AllocTypeCold++ : AllocTypeNotCold++;
632 static_cast<DerivedCCG *
>(
this)->updateAllocationCall(Call,
AllocType);
637 return static_cast<const DerivedCCG *
>(
this)->getAllocationCallType(Call);
642 void updateCall(
CallInfo &CallerCall, FuncInfo CalleeFunc) {
643 static_cast<DerivedCCG *
>(
this)->updateCall(CallerCall, CalleeFunc);
649 FuncInfo cloneFunctionForCallsite(
650 FuncInfo &Func,
CallInfo &Call, std::map<CallInfo, CallInfo> &CallMap,
651 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
652 return static_cast<DerivedCCG *
>(
this)->cloneFunctionForCallsite(
653 Func, Call, CallMap, CallsWithMetadataInFunc, CloneNo);
658 std::string getLabel(
const FuncTy *Func,
const CallTy Call,
659 unsigned CloneNo)
const {
660 return static_cast<const DerivedCCG *
>(
this)->getLabel(Func, Call, CloneNo);
664 ContextNode *createNewNode(
bool IsAllocation,
const FuncTy *
F =
nullptr,
666 NodeOwner.push_back(std::make_unique<ContextNode>(IsAllocation,
C));
667 auto *NewNode = NodeOwner.back().get();
669 NodeToCallingFunc[NewNode] =
F;
674 ContextNode *getNodeForInst(
const CallInfo &
C);
675 ContextNode *getNodeForAlloc(
const CallInfo &
C);
676 ContextNode *getNodeForStackId(
uint64_t StackId);
699 moveEdgeToNewCalleeClone(
const std::shared_ptr<ContextEdge> &Edge,
700 EdgeIter *CallerEdgeI =
nullptr,
708 void moveEdgeToExistingCalleeClone(
const std::shared_ptr<ContextEdge> &Edge,
709 ContextNode *NewCallee,
710 EdgeIter *CallerEdgeI =
nullptr,
711 bool NewClone =
false,
720 void moveCalleeEdgeToNewCaller(EdgeIter &CalleeEdgeI, ContextNode *NewCaller);
749 std::vector<std::unique_ptr<ContextNode>> NodeOwner;
755 unsigned int LastContextId = 0;
758template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
760 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode;
761template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
763 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge;
764template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
766 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::FuncInfo;
767template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
769 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::CallInfo;
772class ModuleCallsiteContextGraph
773 :
public CallsiteContextGraph<ModuleCallsiteContextGraph, Function,
776 ModuleCallsiteContextGraph(
781 friend CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
786 bool calleeMatchesFunc(
788 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain);
790 bool findProfiledCalleeThroughTailCalls(
792 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain,
793 bool &FoundMultipleCalleeChains);
795 std::vector<uint64_t> getStackIdsWithContextNodesForCall(
Instruction *Call);
798 void updateCall(
CallInfo &CallerCall, FuncInfo CalleeFunc);
799 CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
801 cloneFunctionForCallsite(FuncInfo &Func,
CallInfo &Call,
802 std::map<CallInfo, CallInfo> &CallMap,
803 std::vector<CallInfo> &CallsWithMetadataInFunc,
806 unsigned CloneNo)
const;
815struct IndexCall :
public PointerUnion<CallsiteInfo *, AllocInfo *> {
817 IndexCall(std::nullptr_t) : IndexCall() {}
822 IndexCall *operator->() {
return this; }
827 if (
auto *AI = llvm::dyn_cast_if_present<AllocInfo *>(getBase())) {
830 auto *CI = llvm::dyn_cast_if_present<CallsiteInfo *>(getBase());
838class IndexCallsiteContextGraph
839 :
public CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
842 IndexCallsiteContextGraph(
847 ~IndexCallsiteContextGraph() {
852 for (
auto &
I : FunctionCalleesToSynthesizedCallsiteInfos) {
854 for (
auto &Callsite :
I.second)
855 FS->addCallsite(*Callsite.second);
860 friend CallsiteContextGraph<IndexCallsiteContextGraph,
FunctionSummary,
865 bool calleeMatchesFunc(
868 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain);
869 bool sameCallee(IndexCall &Call1, IndexCall &Call2);
870 bool findProfiledCalleeThroughTailCalls(
872 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain,
873 bool &FoundMultipleCalleeChains);
874 uint64_t getLastStackId(IndexCall &Call);
875 std::vector<uint64_t> getStackIdsWithContextNodesForCall(IndexCall &Call);
878 void updateCall(
CallInfo &CallerCall, FuncInfo CalleeFunc);
881 cloneFunctionForCallsite(FuncInfo &Func,
CallInfo &Call,
882 std::map<CallInfo, CallInfo> &CallMap,
883 std::vector<CallInfo> &CallsWithMetadataInFunc,
885 std::string getLabel(
const FunctionSummary *Func,
const IndexCall &Call,
886 unsigned CloneNo)
const;
890 std::map<const FunctionSummary *, ValueInfo> FSToVIMap;
901 std::map<ValueInfo, std::unique_ptr<CallsiteInfo>>>
902 FunctionCalleesToSynthesizedCallsiteInfos;
914 :
public DenseMapInfo<std::pair<IndexCall, unsigned>> {};
917 :
public DenseMapInfo<PointerUnion<CallsiteInfo *, AllocInfo *>> {};
930 ((
uint8_t)AllocationType::NotCold | (
uint8_t)AllocationType::Cold))
931 return AllocationType::NotCold;
939template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
941 const std::vector<uint8_t> &InAllocTypes,
942 const std::vector<std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>>
946 assert(InAllocTypes.size() == Edges.size());
948 InAllocTypes.begin(), InAllocTypes.end(), Edges.begin(), Edges.end(),
950 const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &r) {
954 if (l == (uint8_t)AllocationType::None ||
955 r->AllocTypes == (uint8_t)AllocationType::None)
957 return allocTypeToUse(l) == allocTypeToUse(r->AllocTypes);
966template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
967bool allocTypesMatchClone(
968 const std::vector<uint8_t> &InAllocTypes,
969 const ContextNode<DerivedCCG, FuncTy, CallTy> *Clone) {
970 const ContextNode<DerivedCCG, FuncTy, CallTy> *
Node = Clone->CloneOf;
974 assert(InAllocTypes.size() ==
Node->CalleeEdges.size());
978 for (
const auto &E : Clone->CalleeEdges) {
980 EdgeCalleeMap[E->Callee] = E->AllocTypes;
984 for (
unsigned I = 0;
I <
Node->CalleeEdges.size();
I++) {
985 auto Iter = EdgeCalleeMap.
find(
Node->CalleeEdges[
I]->Callee);
987 if (Iter == EdgeCalleeMap.
end())
992 if (InAllocTypes[
I] == (
uint8_t)AllocationType::None ||
993 Iter->second == (
uint8_t)AllocationType::None)
995 if (allocTypeToUse(Iter->second) != allocTypeToUse(InAllocTypes[
I]))
1003template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1004typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1005CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForInst(
1007 ContextNode *
Node = getNodeForAlloc(
C);
1011 return NonAllocationCallToContextNodeMap.lookup(
C);
1014template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1015typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1016CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForAlloc(
1018 return AllocationCallToContextNodeMap.lookup(
C);
1021template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1022typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1023CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForStackId(
1025 auto StackEntryNode = StackEntryIdToContextNodeMap.find(StackId);
1026 if (StackEntryNode != StackEntryIdToContextNodeMap.end())
1027 return StackEntryNode->second;
1031template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1032void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1034 unsigned int ContextId) {
1035 for (
auto &Edge : CallerEdges) {
1036 if (Edge->Caller == Caller) {
1038 Edge->getContextIds().insert(ContextId);
1042 std::shared_ptr<ContextEdge> Edge = std::make_shared<ContextEdge>(
1044 CallerEdges.push_back(Edge);
1045 Caller->CalleeEdges.push_back(Edge);
1048template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1049void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::removeEdgeFromGraph(
1050 ContextEdge *Edge, EdgeIter *EI,
bool CalleeIter) {
1051 assert(!EI || (*EI)->get() == Edge);
1056 auto *
Callee = Edge->Callee;
1057 auto *
Caller = Edge->Caller;
1065 Callee->eraseCallerEdge(Edge);
1066 Caller->eraseCalleeEdge(Edge);
1067 }
else if (CalleeIter) {
1068 Callee->eraseCallerEdge(Edge);
1069 *EI =
Caller->CalleeEdges.erase(*EI);
1071 Caller->eraseCalleeEdge(Edge);
1072 *EI =
Callee->CallerEdges.erase(*EI);
1076template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1077void CallsiteContextGraph<
1078 DerivedCCG, FuncTy, CallTy>::removeNoneTypeCalleeEdges(ContextNode *
Node) {
1079 for (
auto EI =
Node->CalleeEdges.begin(); EI !=
Node->CalleeEdges.end();) {
1081 if (Edge->AllocTypes == (
uint8_t)AllocationType::None) {
1082 assert(Edge->ContextIds.empty());
1083 removeEdgeFromGraph(Edge.get(), &EI,
true);
1089template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1090void CallsiteContextGraph<
1091 DerivedCCG, FuncTy, CallTy>::removeNoneTypeCallerEdges(ContextNode *
Node) {
1092 for (
auto EI =
Node->CallerEdges.begin(); EI !=
Node->CallerEdges.end();) {
1094 if (Edge->AllocTypes == (
uint8_t)AllocationType::None) {
1095 assert(Edge->ContextIds.empty());
1096 Edge->Caller->eraseCalleeEdge(Edge.get());
1097 EI =
Node->CallerEdges.erase(EI);
1103template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1104typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge *
1105CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1106 findEdgeFromCallee(
const ContextNode *Callee) {
1107 for (
const auto &Edge : CalleeEdges)
1108 if (Edge->Callee == Callee)
1113template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1114typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge *
1115CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1116 findEdgeFromCaller(
const ContextNode *Caller) {
1117 for (
const auto &Edge : CallerEdges)
1118 if (Edge->Caller == Caller)
1123template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1124void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1125 eraseCalleeEdge(
const ContextEdge *Edge) {
1127 CalleeEdges, [Edge](
const std::shared_ptr<ContextEdge> &CalleeEdge) {
1128 return CalleeEdge.get() == Edge;
1130 assert(EI != CalleeEdges.end());
1131 CalleeEdges.erase(EI);
1134template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1135void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1136 eraseCallerEdge(
const ContextEdge *Edge) {
1138 CallerEdges, [Edge](
const std::shared_ptr<ContextEdge> &CallerEdge) {
1139 return CallerEdge.get() == Edge;
1141 assert(EI != CallerEdges.end());
1142 CallerEdges.erase(EI);
1145template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1146uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::computeAllocType(
1149 (
uint8_t)AllocationType::Cold | (
uint8_t)AllocationType::NotCold;
1151 for (
auto Id : ContextIds) {
1160template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1162CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypesImpl(
1165 (
uint8_t)AllocationType::Cold | (
uint8_t)AllocationType::NotCold;
1167 for (
auto Id : Node1Ids) {
1168 if (!Node2Ids.
count(Id))
1178template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1179uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypes(
1181 if (Node1Ids.
size() < Node2Ids.
size())
1182 return intersectAllocTypesImpl(Node1Ids, Node2Ids);
1184 return intersectAllocTypesImpl(Node2Ids, Node1Ids);
1187template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1188typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1189CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addAllocNode(
1191 assert(!getNodeForAlloc(Call));
1192 ContextNode *AllocNode = createNewNode(
true,
F, Call);
1193 AllocationCallToContextNodeMap[
Call] = AllocNode;
1195 AllocNode->OrigStackOrAllocId = LastContextId;
1198 AllocNode->AllocTypes = (
uint8_t)AllocationType::None;
1207 if (AllocTypes & (
uint8_t)AllocationType::NotCold)
1209 if (AllocTypes & (
uint8_t)AllocationType::Cold)
1214template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1215template <
class NodeT,
class IteratorT>
1216void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addStackNodesForMIB(
1225 ContextIdToAllocationType[++LastContextId] =
AllocType;
1227 if (!ContextSizeInfo.
empty()) {
1228 auto &
Entry = ContextIdToContextSizeInfos[LastContextId];
1238 ContextNode *PrevNode = AllocNode;
1245 ContextIter != StackContext.
end(); ++ContextIter) {
1246 auto StackId = getStackId(*ContextIter);
1247 ContextNode *
StackNode = getNodeForStackId(StackId);
1250 StackEntryIdToContextNodeMap[StackId] =
StackNode;
1251 StackNode->OrigStackOrAllocId = StackId;
1266template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1268CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::duplicateContextIds(
1272 for (
auto OldId : StackSequenceContextIds) {
1273 NewContextIds.
insert(++LastContextId);
1274 OldToNewContextIds[OldId].insert(LastContextId);
1275 assert(ContextIdToAllocationType.count(OldId));
1277 ContextIdToAllocationType[LastContextId] = ContextIdToAllocationType[OldId];
1279 return NewContextIds;
1282template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1283void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
1284 propagateDuplicateContextIds(
1289 for (
auto Id : ContextIds)
1290 if (
auto NewId = OldToNewContextIds.find(Id);
1291 NewId != OldToNewContextIds.end())
1292 NewIds.
insert(NewId->second.begin(), NewId->second.end());
1297 auto UpdateCallers = [&](ContextNode *
Node,
1299 auto &&UpdateCallers) ->
void {
1300 for (
const auto &Edge :
Node->CallerEdges) {
1301 auto Inserted = Visited.insert(Edge.get());
1304 ContextNode *NextNode = Edge->Caller;
1308 if (!NewIdsToAdd.
empty()) {
1309 Edge->getContextIds().insert(NewIdsToAdd.
begin(), NewIdsToAdd.
end());
1310 UpdateCallers(NextNode, Visited, UpdateCallers);
1316 for (
auto &Entry : AllocationCallToContextNodeMap) {
1318 UpdateCallers(
Node, Visited, UpdateCallers);
1322template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1323void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::connectNewNode(
1324 ContextNode *NewNode, ContextNode *OrigNode,
bool TowardsCallee,
1329 TowardsCallee ? OrigNode->CalleeEdges : OrigNode->CallerEdges;
1331 for (
auto EI = OrigEdges.begin(); EI != OrigEdges.end();) {
1337 set_subtract(Edge->getContextIds(), RemainingContextIds, NewEdgeContextIds,
1338 NotFoundContextIds);
1339 RemainingContextIds.
swap(NotFoundContextIds);
1341 if (NewEdgeContextIds.
empty()) {
1345 if (TowardsCallee) {
1346 uint8_t NewAllocType = computeAllocType(NewEdgeContextIds);
1347 auto NewEdge = std::make_shared<ContextEdge>(
1348 Edge->Callee, NewNode, NewAllocType, std::move(NewEdgeContextIds));
1349 NewNode->CalleeEdges.push_back(NewEdge);
1350 NewEdge->Callee->CallerEdges.push_back(NewEdge);
1352 uint8_t NewAllocType = computeAllocType(NewEdgeContextIds);
1353 auto NewEdge = std::make_shared<ContextEdge>(
1354 NewNode, Edge->Caller, NewAllocType, std::move(NewEdgeContextIds));
1355 NewNode->CallerEdges.push_back(NewEdge);
1356 NewEdge->Caller->CalleeEdges.push_back(NewEdge);
1359 if (Edge->getContextIds().empty()) {
1360 removeEdgeFromGraph(Edge.get(), &EI, TowardsCallee);
1367template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1369 const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &Edge) {
1373 assert(!Edge->ContextIds.empty());
1376template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1378 bool CheckEdges =
true) {
1379 if (
Node->isRemoved())
1383 auto NodeContextIds =
Node->getContextIds();
1387 if (
Node->CallerEdges.size()) {
1389 Node->CallerEdges.front()->ContextIds);
1392 checkEdge<DerivedCCG, FuncTy, CallTy>(Edge);
1393 set_union(CallerEdgeContextIds, Edge->ContextIds);
1400 NodeContextIds == CallerEdgeContextIds ||
1403 if (
Node->CalleeEdges.size()) {
1405 Node->CalleeEdges.front()->ContextIds);
1408 checkEdge<DerivedCCG, FuncTy, CallTy>(Edge);
1409 set_union(CalleeEdgeContextIds, Edge->getContextIds());
1411 assert(NodeContextIds == CalleeEdgeContextIds);
1420 for (
const auto &E :
Node->CalleeEdges)
1426template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1427void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
1428 assignStackNodesPostOrder(
1431 &StackIdToMatchingCalls,
1440 auto CallerEdges =
Node->CallerEdges;
1441 for (
auto &Edge : CallerEdges) {
1443 if (Edge->isRemoved()) {
1447 assignStackNodesPostOrder(Edge->Caller, Visited, StackIdToMatchingCalls,
1448 CallToMatchingCall);
1457 if (
Node->IsAllocation ||
1458 !StackIdToMatchingCalls.count(
Node->OrigStackOrAllocId))
1461 auto &Calls = StackIdToMatchingCalls[
Node->OrigStackOrAllocId];
1465 if (Calls.size() == 1) {
1466 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[0];
1467 if (Ids.size() == 1) {
1468 assert(SavedContextIds.empty());
1471 if (
Node->Recursive)
1473 Node->setCall(Call);
1474 NonAllocationCallToContextNodeMap[
Call] =
Node;
1484 ContextNode *LastNode = getNodeForStackId(LastId);
1489 ContextNode *LastNode =
Node;
1496 bool PrevIterCreatedNode =
false;
1497 bool CreatedNode =
false;
1498 for (
unsigned I = 0;
I < Calls.size();
1499 I++, PrevIterCreatedNode = CreatedNode) {
1500 CreatedNode =
false;
1501 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[
I];
1504 if (SavedContextIds.empty()) {
1509 if (!CallToMatchingCall.
contains(Call))
1511 auto MatchingCall = CallToMatchingCall[
Call];
1512 if (!NonAllocationCallToContextNodeMap.contains(MatchingCall)) {
1516 assert(
I > 0 && !PrevIterCreatedNode);
1519 NonAllocationCallToContextNodeMap[MatchingCall]->MatchingCalls.push_back(
1524 assert(LastId == Ids.back());
1533 ContextNode *PrevNode = LastNode;
1537 for (
auto IdIter = Ids.rbegin() + 1; IdIter != Ids.rend(); IdIter++) {
1539 ContextNode *CurNode = getNodeForStackId(Id);
1543 assert(!CurNode->Recursive);
1545 auto *Edge = CurNode->findEdgeFromCaller(PrevNode);
1557 if (SavedContextIds.empty()) {
1566 ContextNode *NewNode = createNewNode(
false, Func, Call);
1567 NonAllocationCallToContextNodeMap[
Call] = NewNode;
1569 NewNode->AllocTypes = computeAllocType(SavedContextIds);
1571 ContextNode *FirstNode = getNodeForStackId(Ids[0]);
1577 connectNewNode(NewNode, FirstNode,
true, SavedContextIds);
1582 connectNewNode(NewNode, LastNode,
false, SavedContextIds);
1587 for (
auto Id : Ids) {
1588 ContextNode *CurNode = getNodeForStackId(Id);
1595 auto *PrevEdge = CurNode->findEdgeFromCallee(PrevNode);
1597 set_subtract(PrevEdge->getContextIds(), SavedContextIds);
1598 if (PrevEdge->getContextIds().empty())
1599 removeEdgeFromGraph(PrevEdge);
1604 CurNode->AllocTypes = CurNode->CalleeEdges.empty()
1605 ? (
uint8_t)AllocationType::None
1606 : CurNode->computeAllocType();
1610 checkNode<DerivedCCG, FuncTy, CallTy>(NewNode,
true);
1611 for (
auto Id : Ids) {
1612 ContextNode *CurNode = getNodeForStackId(Id);
1615 checkNode<DerivedCCG, FuncTy, CallTy>(CurNode,
true);
1621template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1622void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
1631 for (
auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) {
1632 for (
auto &Call : CallsWithMetadata) {
1634 if (AllocationCallToContextNodeMap.count(Call))
1636 auto StackIdsWithContextNodes =
1637 getStackIdsWithContextNodesForCall(
Call.call());
1640 if (StackIdsWithContextNodes.empty())
1644 StackIdToMatchingCalls[StackIdsWithContextNodes.back()].push_back(
1645 {
Call.call(), StackIdsWithContextNodes, Func, {}});
1660 for (
auto &It : StackIdToMatchingCalls) {
1661 auto &Calls = It.getSecond();
1663 if (Calls.size() == 1) {
1664 auto &Ids = Calls[0].StackIds;
1665 if (Ids.size() == 1)
1679 for (
const auto &[
Idx, CallCtxInfo] :
enumerate(Calls))
1680 FuncToIndex.
insert({CallCtxInfo.Func,
Idx});
1682 Calls.begin(), Calls.end(),
1683 [&FuncToIndex](
const CallContextInfo &
A,
const CallContextInfo &
B) {
1684 return A.StackIds.size() > B.StackIds.size() ||
1685 (A.StackIds.size() == B.StackIds.size() &&
1686 (A.StackIds < B.StackIds ||
1687 (A.StackIds == B.StackIds &&
1688 FuncToIndex[A.Func] < FuncToIndex[B.Func])));
1695 ContextNode *LastNode = getNodeForStackId(LastId);
1699 if (LastNode->Recursive)
1715 for (
unsigned I = 0;
I < Calls.size();
I++) {
1716 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[
I];
1717 assert(SavedContextIds.empty());
1718 assert(LastId == Ids.back());
1723 if (
I > 0 && Ids != Calls[
I - 1].StackIds)
1724 MatchingIdsFuncSet.
clear();
1733 ContextNode *PrevNode = LastNode;
1734 ContextNode *CurNode = LastNode;
1739 for (
auto IdIter = Ids.rbegin() + 1; IdIter != Ids.rend(); IdIter++) {
1741 CurNode = getNodeForStackId(Id);
1745 if (CurNode->Recursive) {
1750 auto *Edge = CurNode->findEdgeFromCaller(PrevNode);
1768 set_intersect(StackSequenceContextIds, Edge->getContextIds());
1771 if (StackSequenceContextIds.
empty()) {
1784 if (Ids.back() != getLastStackId(Call)) {
1785 for (
const auto &PE : LastNode->CallerEdges) {
1786 set_subtract(StackSequenceContextIds, PE->getContextIds());
1787 if (StackSequenceContextIds.
empty())
1791 if (StackSequenceContextIds.
empty())
1803 MatchingIdsFuncSet.
insert(Func);
1810 bool DuplicateContextIds =
false;
1811 for (
unsigned J =
I + 1; J < Calls.size(); J++) {
1812 auto &CallCtxInfo = Calls[J];
1813 auto &NextIds = CallCtxInfo.StackIds;
1816 auto *NextFunc = CallCtxInfo.Func;
1817 if (NextFunc != Func) {
1820 DuplicateContextIds =
true;
1823 auto &NextCall = CallCtxInfo.Call;
1824 CallToMatchingCall[NextCall] =
Call;
1835 OldToNewContextIds.
reserve(OldToNewContextIds.
size() +
1836 StackSequenceContextIds.
size());
1839 ? duplicateContextIds(StackSequenceContextIds, OldToNewContextIds)
1840 : StackSequenceContextIds;
1841 assert(!SavedContextIds.empty());
1843 if (!DuplicateContextIds) {
1847 set_subtract(LastNodeContextIds, StackSequenceContextIds);
1848 if (LastNodeContextIds.
empty())
1855 propagateDuplicateContextIds(OldToNewContextIds);
1866 for (
auto &Entry : AllocationCallToContextNodeMap)
1867 assignStackNodesPostOrder(
Entry.second, Visited, StackIdToMatchingCalls,
1868 CallToMatchingCall);
1875 Call->getMetadata(LLVMContext::MD_callsite));
1876 return CallsiteContext.
back();
1879uint64_t IndexCallsiteContextGraph::getLastStackId(IndexCall &Call) {
1882 CallsiteContext(dyn_cast_if_present<CallsiteInfo *>(
Call.getBase()));
1884 return Index.getStackIdAtIndex(CallsiteContext.
back());
1901std::string ModuleCallsiteContextGraph::getLabel(
const Function *Func,
1903 unsigned CloneNo)
const {
1904 return (
Twine(
Call->getFunction()->getName()) +
" -> " +
1905 cast<CallBase>(Call)->getCalledFunction()->getName())
1909std::string IndexCallsiteContextGraph::getLabel(
const FunctionSummary *Func,
1910 const IndexCall &Call,
1911 unsigned CloneNo)
const {
1912 auto VI = FSToVIMap.find(Func);
1913 assert(VI != FSToVIMap.end());
1914 if (isa<AllocInfo *>(
Call.getBase()))
1915 return (
VI->second.name() +
" -> alloc").str();
1917 auto *Callsite = dyn_cast_if_present<CallsiteInfo *>(
Call.getBase());
1918 return (
VI->second.name() +
" -> " +
1920 Callsite->Clones[CloneNo]))
1925std::vector<uint64_t>
1926ModuleCallsiteContextGraph::getStackIdsWithContextNodesForCall(
1929 Call->getMetadata(LLVMContext::MD_callsite));
1930 return getStackIdsWithContextNodes<MDNode, MDNode::op_iterator>(
1934std::vector<uint64_t>
1935IndexCallsiteContextGraph::getStackIdsWithContextNodesForCall(IndexCall &Call) {
1938 CallsiteContext(dyn_cast_if_present<CallsiteInfo *>(
Call.getBase()));
1944template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1945template <
class NodeT,
class IteratorT>
1946std::vector<uint64_t>
1947CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getStackIdsWithContextNodes(
1949 std::vector<uint64_t> StackIds;
1950 for (
auto IdOrIndex : CallsiteContext) {
1951 auto StackId = getStackId(IdOrIndex);
1952 ContextNode *
Node = getNodeForStackId(StackId);
1955 StackIds.push_back(StackId);
1960ModuleCallsiteContextGraph::ModuleCallsiteContextGraph(
1963 :
Mod(
M), OREGetter(OREGetter) {
1965 std::vector<CallInfo> CallsWithMetadata;
1966 for (
auto &BB :
F) {
1967 for (
auto &
I : BB) {
1968 if (!isa<CallBase>(
I))
1970 if (
auto *MemProfMD =
I.getMetadata(LLVMContext::MD_memprof)) {
1971 CallsWithMetadata.push_back(&
I);
1972 auto *AllocNode = addAllocNode(&
I, &
F);
1973 auto *CallsiteMD =
I.getMetadata(LLVMContext::MD_callsite);
1977 for (
auto &MDOp : MemProfMD->operands()) {
1978 auto *MIBMD = cast<const MDNode>(MDOp);
1979 std::vector<ContextTotalSize> ContextSizeInfo;
1981 if (MIBMD->getNumOperands() > 2) {
1982 for (
unsigned I = 2;
I < MIBMD->getNumOperands();
I++) {
1983 MDNode *ContextSizePair =
1984 dyn_cast<MDNode>(MIBMD->getOperand(
I));
1986 uint64_t FullStackId = mdconst::dyn_extract<ConstantInt>(
1989 uint64_t TotalSize = mdconst::dyn_extract<ConstantInt>(
1992 ContextSizeInfo.push_back({FullStackId, TotalSize});
1998 addStackNodesForMIB<MDNode, MDNode::op_iterator>(
1999 AllocNode, StackContext, CallsiteContext,
2002 assert(AllocNode->AllocTypes != (
uint8_t)AllocationType::None);
2005 I.setMetadata(LLVMContext::MD_memprof,
nullptr);
2006 I.setMetadata(LLVMContext::MD_callsite,
nullptr);
2009 else if (
I.getMetadata(LLVMContext::MD_callsite)) {
2010 CallsWithMetadata.push_back(&
I);
2014 if (!CallsWithMetadata.empty())
2015 FuncToCallsWithMetadata[&
F] = CallsWithMetadata;
2019 dbgs() <<
"CCG before updating call stack chains:\n";
2024 exportToDot(
"prestackupdate");
2028 handleCallsitesWithMultipleTargets();
2031 for (
auto &FuncEntry : FuncToCallsWithMetadata)
2032 for (
auto &Call : FuncEntry.second)
2033 Call.call()->setMetadata(LLVMContext::MD_callsite,
nullptr);
2036IndexCallsiteContextGraph::IndexCallsiteContextGraph(
2041 for (
auto &
I : Index) {
2043 for (
auto &S :
VI.getSummaryList()) {
2052 !isPrevailing(
VI.getGUID(), S.get()))
2054 auto *
FS = dyn_cast<FunctionSummary>(S.get());
2057 std::vector<CallInfo> CallsWithMetadata;
2058 if (!
FS->allocs().empty()) {
2059 for (
auto &AN :
FS->mutableAllocs()) {
2064 if (AN.MIBs.empty())
2066 IndexCall AllocCall(&AN);
2067 CallsWithMetadata.push_back(AllocCall);
2068 auto *AllocNode = addAllocNode(AllocCall, FS);
2077 AN.ContextSizeInfos.size() == AN.MIBs.size());
2079 for (
auto &MIB : AN.MIBs) {
2082 std::vector<ContextTotalSize> ContextSizeInfo;
2083 if (!AN.ContextSizeInfos.empty()) {
2084 for (
auto [FullStackId, TotalSize] : AN.ContextSizeInfos[
I])
2085 ContextSizeInfo.push_back({FullStackId, TotalSize});
2087 addStackNodesForMIB<MIBInfo, SmallVector<unsigned>::const_iterator>(
2088 AllocNode, StackContext, EmptyContext, MIB.AllocType,
2092 assert(AllocNode->AllocTypes != (
uint8_t)AllocationType::None);
2097 AN.Versions[0] = (
uint8_t)allocTypeToUse(AllocNode->AllocTypes);
2101 if (!
FS->callsites().empty())
2102 for (
auto &SN :
FS->mutableCallsites()) {
2103 IndexCall StackNodeCall(&SN);
2104 CallsWithMetadata.push_back(StackNodeCall);
2107 if (!CallsWithMetadata.empty())
2108 FuncToCallsWithMetadata[
FS] = CallsWithMetadata;
2110 if (!
FS->allocs().empty() || !
FS->callsites().empty())
2116 dbgs() <<
"CCG before updating call stack chains:\n";
2121 exportToDot(
"prestackupdate");
2125 handleCallsitesWithMultipleTargets();
2128template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2129void CallsiteContextGraph<DerivedCCG, FuncTy,
2130 CallTy>::handleCallsitesWithMultipleTargets() {
2145 std::vector<std::pair<CallInfo, ContextNode *>> NewCallToNode;
2146 for (
auto &Entry : NonAllocationCallToContextNodeMap) {
2156 std::vector<CallInfo> AllCalls;
2157 AllCalls.reserve(
Node->MatchingCalls.size() + 1);
2158 AllCalls.push_back(
Node->Call);
2159 AllCalls.insert(AllCalls.end(),
Node->MatchingCalls.begin(),
2160 Node->MatchingCalls.end());
2173 if (partitionCallsByCallee(
Node, AllCalls, NewCallToNode))
2176 auto It = AllCalls.begin();
2178 for (; It != AllCalls.end(); ++It) {
2181 for (
auto EI =
Node->CalleeEdges.begin(); EI !=
Node->CalleeEdges.end();
2184 if (!Edge->Callee->hasCall())
2186 assert(NodeToCallingFunc.count(Edge->Callee));
2188 if (!calleesMatch(
ThisCall.call(), EI, TailCallToContextNodeMap)) {
2197 if (
Node->Call != ThisCall) {
2198 Node->setCall(ThisCall);
2209 Node->MatchingCalls.clear();
2212 if (It == AllCalls.end()) {
2213 RemovedEdgesWithMismatchedCallees++;
2222 for (++It; It != AllCalls.end(); ++It) {
2226 Node->MatchingCalls.push_back(ThisCall);
2235 NonAllocationCallToContextNodeMap.remove_if([](
const auto &it) {
2236 return !it.second->hasCall() || it.second->Call != it.first;
2240 for (
auto &[Call,
Node] : NewCallToNode)
2241 NonAllocationCallToContextNodeMap[
Call] =
Node;
2245 for (
auto &[Call,
Node] : TailCallToContextNodeMap)
2246 NonAllocationCallToContextNodeMap[
Call] =
Node;
2249template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2250bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::partitionCallsByCallee(
2252 std::vector<std::pair<CallInfo, ContextNode *>> &NewCallToNode) {
2256 struct CallsWithSameCallee {
2257 std::vector<CallInfo> Calls;
2258 ContextNode *
Node =
nullptr;
2264 for (
auto ThisCall : AllCalls) {
2265 auto *
F = getCalleeFunc(
ThisCall.call());
2267 CalleeFuncToCallInfo[
F].Calls.push_back(ThisCall);
2276 for (
const auto &Edge :
Node->CalleeEdges) {
2277 if (!Edge->Callee->hasCall())
2279 const FuncTy *ProfiledCalleeFunc = NodeToCallingFunc[Edge->Callee];
2280 if (CalleeFuncToCallInfo.
contains(ProfiledCalleeFunc))
2281 CalleeNodeToCallInfo[Edge->Callee] =
2282 &CalleeFuncToCallInfo[ProfiledCalleeFunc];
2288 if (CalleeNodeToCallInfo.
empty())
2300 ContextNode *UnmatchedCalleesNode =
nullptr;
2302 bool UsedOrigNode =
false;
2304 for (
auto EI =
Node->CalleeEdges.begin(); EI !=
Node->CalleeEdges.end();) {
2306 if (!Edge->Callee->hasCall()) {
2313 ContextNode *CallerNodeToUse =
nullptr;
2317 if (!CalleeNodeToCallInfo.
contains(Edge->Callee)) {
2318 if (!UnmatchedCalleesNode)
2319 UnmatchedCalleesNode =
2320 createNewNode(
false, NodeToCallingFunc[
Node]);
2321 CallerNodeToUse = UnmatchedCalleesNode;
2325 auto *
Info = CalleeNodeToCallInfo[Edge->Callee];
2328 if (!UsedOrigNode) {
2331 Node->MatchingCalls.clear();
2332 UsedOrigNode =
true;
2335 createNewNode(
false, NodeToCallingFunc[
Node]);
2339 Info->Node->setCall(
Info->Calls.front());
2340 Info->Node->MatchingCalls.insert(
Info->Node->MatchingCalls.end(),
2341 Info->Calls.begin() + 1,
2346 NewCallToNode.push_back({
Info->Node->Call,
Info->Node});
2348 CallerNodeToUse =
Info->Node;
2352 if (CallerNodeToUse ==
Node) {
2357 moveCalleeEdgeToNewCaller(EI, CallerNodeToUse);
2364 for (
auto &
I : CalleeNodeToCallInfo)
2365 removeNoneTypeCallerEdges(
I.second->Node);
2366 if (UnmatchedCalleesNode)
2367 removeNoneTypeCallerEdges(UnmatchedCalleesNode);
2368 removeNoneTypeCallerEdges(
Node);
2381 return Index.getStackIdAtIndex(IdOrIndex);
2384template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2385bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::calleesMatch(
2386 CallTy Call, EdgeIter &EI,
2389 const FuncTy *ProfiledCalleeFunc = NodeToCallingFunc[Edge->Callee];
2390 const FuncTy *CallerFunc = NodeToCallingFunc[Edge->Caller];
2393 std::vector<std::pair<CallTy, FuncTy *>> FoundCalleeChain;
2394 if (!calleeMatchesFunc(Call, ProfiledCalleeFunc, CallerFunc,
2399 if (FoundCalleeChain.empty())
2402 auto AddEdge = [Edge, &EI](ContextNode *
Caller, ContextNode *
Callee) {
2403 auto *CurEdge =
Callee->findEdgeFromCaller(Caller);
2407 CurEdge->ContextIds.insert(Edge->ContextIds.begin(),
2408 Edge->ContextIds.end());
2409 CurEdge->AllocTypes |= Edge->AllocTypes;
2414 auto NewEdge = std::make_shared<ContextEdge>(
2415 Callee, Caller, Edge->AllocTypes, Edge->ContextIds);
2416 Callee->CallerEdges.push_back(NewEdge);
2417 if (Caller == Edge->Caller) {
2421 EI =
Caller->CalleeEdges.insert(EI, NewEdge);
2424 "Iterator position not restored after insert and increment");
2426 Caller->CalleeEdges.push_back(NewEdge);
2431 auto *CurCalleeNode = Edge->Callee;
2432 for (
auto &[NewCall, Func] : FoundCalleeChain) {
2433 ContextNode *NewNode =
nullptr;
2435 if (TailCallToContextNodeMap.
count(NewCall)) {
2436 NewNode = TailCallToContextNodeMap[NewCall];
2437 NewNode->AllocTypes |= Edge->AllocTypes;
2439 FuncToCallsWithMetadata[
Func].push_back({NewCall});
2441 NewNode = createNewNode(
false, Func, NewCall);
2442 TailCallToContextNodeMap[NewCall] = NewNode;
2443 NewNode->AllocTypes = Edge->AllocTypes;
2447 AddEdge(NewNode, CurCalleeNode);
2449 CurCalleeNode = NewNode;
2453 AddEdge(Edge->Caller, CurCalleeNode);
2457 auto *
Caller = Edge->Caller;
2461 removeEdgeFromGraph(Edge.get(), &EI,
true);
2473bool ModuleCallsiteContextGraph::findProfiledCalleeThroughTailCalls(
2475 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain,
2476 bool &FoundMultipleCalleeChains) {
2483 FoundCalleeChain.push_back({Callsite,
F});
2486 auto *CalleeFunc = dyn_cast<Function>(CurCallee);
2488 auto *Alias = dyn_cast<GlobalAlias>(CurCallee);
2490 CalleeFunc = dyn_cast<Function>(Alias->getAliasee());
2498 bool FoundSingleCalleeChain =
false;
2499 for (
auto &BB : *CalleeFunc) {
2500 for (
auto &
I : BB) {
2501 auto *CB = dyn_cast<CallBase>(&
I);
2502 if (!CB || !CB->isTailCall())
2504 auto *CalledValue = CB->getCalledOperand();
2505 auto *CalledFunction = CB->getCalledFunction();
2506 if (CalledValue && !CalledFunction) {
2507 CalledValue = CalledValue->stripPointerCasts();
2509 CalledFunction = dyn_cast<Function>(CalledValue);
2513 if (
auto *GA = dyn_cast<GlobalAlias>(CalledValue)) {
2514 assert(!CalledFunction &&
2515 "Expected null called function in callsite for alias");
2516 CalledFunction = dyn_cast<Function>(GA->getAliaseeObject());
2518 if (!CalledFunction)
2520 if (CalledFunction == ProfiledCallee) {
2521 if (FoundSingleCalleeChain) {
2522 FoundMultipleCalleeChains =
true;
2525 FoundSingleCalleeChain =
true;
2526 FoundProfiledCalleeCount++;
2527 FoundProfiledCalleeDepth +=
Depth;
2528 if (
Depth > FoundProfiledCalleeMaxDepth)
2529 FoundProfiledCalleeMaxDepth =
Depth;
2530 SaveCallsiteInfo(&
I, CalleeFunc);
2531 }
else if (findProfiledCalleeThroughTailCalls(
2532 ProfiledCallee, CalledFunction,
Depth + 1,
2533 FoundCalleeChain, FoundMultipleCalleeChains)) {
2536 assert(!FoundMultipleCalleeChains);
2537 if (FoundSingleCalleeChain) {
2538 FoundMultipleCalleeChains =
true;
2541 FoundSingleCalleeChain =
true;
2542 SaveCallsiteInfo(&
I, CalleeFunc);
2543 }
else if (FoundMultipleCalleeChains)
2548 return FoundSingleCalleeChain;
2552 auto *CB = dyn_cast<CallBase>(Call);
2553 if (!CB->getCalledOperand() || CB->isIndirectCall())
2555 auto *CalleeVal = CB->getCalledOperand()->stripPointerCasts();
2556 auto *Alias = dyn_cast<GlobalAlias>(CalleeVal);
2558 return dyn_cast<Function>(Alias->getAliasee());
2559 return dyn_cast<Function>(CalleeVal);
2562bool ModuleCallsiteContextGraph::calleeMatchesFunc(
2564 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain) {
2565 auto *CB = dyn_cast<CallBase>(Call);
2566 if (!CB->getCalledOperand() || CB->isIndirectCall())
2568 auto *CalleeVal = CB->getCalledOperand()->stripPointerCasts();
2569 auto *CalleeFunc = dyn_cast<Function>(CalleeVal);
2570 if (CalleeFunc == Func)
2572 auto *Alias = dyn_cast<GlobalAlias>(CalleeVal);
2573 if (Alias && Alias->getAliasee() == Func)
2584 bool FoundMultipleCalleeChains =
false;
2585 if (!findProfiledCalleeThroughTailCalls(Func, CalleeVal,
Depth,
2587 FoundMultipleCalleeChains)) {
2588 LLVM_DEBUG(
dbgs() <<
"Not found through unique tail call chain: "
2589 <<
Func->getName() <<
" from " << CallerFunc->
getName()
2590 <<
" that actually called " << CalleeVal->getName()
2591 << (FoundMultipleCalleeChains
2592 ?
" (found multiple possible chains)"
2595 if (FoundMultipleCalleeChains)
2596 FoundProfiledCalleeNonUniquelyCount++;
2603bool ModuleCallsiteContextGraph::sameCallee(
Instruction *Call1,
2605 auto *CB1 = cast<CallBase>(Call1);
2606 if (!CB1->getCalledOperand() || CB1->isIndirectCall())
2608 auto *CalleeVal1 = CB1->getCalledOperand()->stripPointerCasts();
2609 auto *CalleeFunc1 = dyn_cast<Function>(CalleeVal1);
2610 auto *CB2 = cast<CallBase>(Call2);
2611 if (!CB2->getCalledOperand() || CB2->isIndirectCall())
2613 auto *CalleeVal2 = CB2->getCalledOperand()->stripPointerCasts();
2614 auto *CalleeFunc2 = dyn_cast<Function>(CalleeVal2);
2615 return CalleeFunc1 == CalleeFunc2;
2618bool IndexCallsiteContextGraph::findProfiledCalleeThroughTailCalls(
2620 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain,
2621 bool &FoundMultipleCalleeChains) {
2630 if (!FunctionCalleesToSynthesizedCallsiteInfos.count(FS) ||
2631 !FunctionCalleesToSynthesizedCallsiteInfos[
FS].count(Callee))
2634 FunctionCalleesToSynthesizedCallsiteInfos[FS][Callee] =
2637 FunctionCalleesToSynthesizedCallsiteInfos[
FS][
Callee].get();
2638 FoundCalleeChain.push_back({NewCallsiteInfo,
FS});
2645 bool FoundSingleCalleeChain =
false;
2648 !isPrevailing(CurCallee.
getGUID(), S.get()))
2650 auto *
FS = dyn_cast<FunctionSummary>(S->getBaseObject());
2653 auto FSVI = CurCallee;
2654 auto *AS = dyn_cast<AliasSummary>(S.get());
2656 FSVI = AS->getAliaseeVI();
2657 for (
auto &CallEdge :
FS->calls()) {
2658 if (!CallEdge.second.hasTailCall())
2660 if (CallEdge.first == ProfiledCallee) {
2661 if (FoundSingleCalleeChain) {
2662 FoundMultipleCalleeChains =
true;
2665 FoundSingleCalleeChain =
true;
2666 FoundProfiledCalleeCount++;
2667 FoundProfiledCalleeDepth +=
Depth;
2668 if (
Depth > FoundProfiledCalleeMaxDepth)
2669 FoundProfiledCalleeMaxDepth =
Depth;
2670 CreateAndSaveCallsiteInfo(CallEdge.first, FS);
2672 assert(!FSToVIMap.count(FS) || FSToVIMap[FS] == FSVI);
2673 FSToVIMap[
FS] = FSVI;
2674 }
else if (findProfiledCalleeThroughTailCalls(
2675 ProfiledCallee, CallEdge.first,
Depth + 1,
2676 FoundCalleeChain, FoundMultipleCalleeChains)) {
2679 assert(!FoundMultipleCalleeChains);
2680 if (FoundSingleCalleeChain) {
2681 FoundMultipleCalleeChains =
true;
2684 FoundSingleCalleeChain =
true;
2685 CreateAndSaveCallsiteInfo(CallEdge.first, FS);
2687 assert(!FSToVIMap.count(FS) || FSToVIMap[FS] == FSVI);
2688 FSToVIMap[
FS] = FSVI;
2689 }
else if (FoundMultipleCalleeChains)
2694 return FoundSingleCalleeChain;
2698IndexCallsiteContextGraph::getCalleeFunc(IndexCall &Call) {
2700 dyn_cast_if_present<CallsiteInfo *>(
Call.getBase())->Callee;
2701 if (
Callee.getSummaryList().empty())
2703 return dyn_cast<FunctionSummary>(
Callee.getSummaryList()[0]->getBaseObject());
2706bool IndexCallsiteContextGraph::calleeMatchesFunc(
2709 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain) {
2711 dyn_cast_if_present<CallsiteInfo *>(
Call.getBase())->Callee;
2715 Callee.getSummaryList().empty()
2717 : dyn_cast<AliasSummary>(
Callee.getSummaryList()[0].get());
2718 assert(FSToVIMap.count(Func));
2719 auto FuncVI = FSToVIMap[
Func];
2720 if (Callee == FuncVI ||
2735 bool FoundMultipleCalleeChains =
false;
2736 if (!findProfiledCalleeThroughTailCalls(
2737 FuncVI, Callee,
Depth, FoundCalleeChain, FoundMultipleCalleeChains)) {
2738 LLVM_DEBUG(
dbgs() <<
"Not found through unique tail call chain: " << FuncVI
2739 <<
" from " << FSToVIMap[CallerFunc]
2740 <<
" that actually called " << Callee
2741 << (FoundMultipleCalleeChains
2742 ?
" (found multiple possible chains)"
2745 if (FoundMultipleCalleeChains)
2746 FoundProfiledCalleeNonUniquelyCount++;
2753bool IndexCallsiteContextGraph::sameCallee(IndexCall &Call1, IndexCall &Call2) {
2755 dyn_cast_if_present<CallsiteInfo *>(Call1.getBase())->Callee;
2757 dyn_cast_if_present<CallsiteInfo *>(Call2.getBase())->Callee;
2758 return Callee1 == Callee2;
2761template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2762void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::dump()
2768template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2769void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::print(
2771 OS <<
"Node " <<
this <<
"\n";
2775 OS <<
" (recursive)";
2777 if (!MatchingCalls.
empty()) {
2778 OS <<
"\tMatchingCalls:\n";
2779 for (
auto &MatchingCall : MatchingCalls) {
2781 MatchingCall.print(
OS);
2786 OS <<
"\tContextIds:";
2788 auto ContextIds = getContextIds();
2789 std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
2790 std::sort(SortedIds.begin(), SortedIds.end());
2791 for (
auto Id : SortedIds)
2794 OS <<
"\tCalleeEdges:\n";
2795 for (
auto &Edge : CalleeEdges)
2796 OS <<
"\t\t" << *Edge <<
"\n";
2797 OS <<
"\tCallerEdges:\n";
2798 for (
auto &Edge : CallerEdges)
2799 OS <<
"\t\t" << *Edge <<
"\n";
2800 if (!Clones.empty()) {
2803 for (
auto *Clone : Clones)
2806 }
else if (CloneOf) {
2807 OS <<
"\tClone of " << CloneOf <<
"\n";
2811template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2812void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::dump()
2818template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2819void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::print(
2823 OS <<
" ContextIds:";
2824 std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
2825 std::sort(SortedIds.begin(), SortedIds.end());
2826 for (
auto Id : SortedIds)
2830template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2831void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::dump()
const {
2835template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2836void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::print(
2838 OS <<
"Callsite Context Graph:\n";
2839 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
2840 for (
const auto Node : nodes<GraphType>(
this)) {
2841 if (
Node->isRemoved())
2848template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2849void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::printTotalSizes(
2851 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
2852 for (
const auto Node : nodes<GraphType>(
this)) {
2853 if (
Node->isRemoved())
2855 if (!
Node->IsAllocation)
2858 auto AllocTypeFromCall = getAllocationCallType(
Node->Call);
2859 std::vector<uint32_t> SortedIds(ContextIds.
begin(), ContextIds.
end());
2860 std::sort(SortedIds.begin(), SortedIds.end());
2861 for (
auto Id : SortedIds) {
2862 auto TypeI = ContextIdToAllocationType.find(Id);
2863 assert(TypeI != ContextIdToAllocationType.end());
2864 auto CSI = ContextIdToContextSizeInfos.find(Id);
2865 if (CSI != ContextIdToContextSizeInfos.end()) {
2866 for (
auto &Info : CSI->second) {
2867 OS <<
"MemProf hinting: "
2869 <<
" full allocation context " <<
Info.FullStackId
2870 <<
" with total size " <<
Info.TotalSize <<
" is "
2872 if (allocTypeToUse(
Node->AllocTypes) != AllocTypeFromCall)
2874 <<
" due to cold byte percent";
2882template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2883void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::check()
const {
2884 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
2885 for (
const auto Node : nodes<GraphType>(
this)) {
2886 checkNode<DerivedCCG, FuncTy, CallTy>(
Node,
false);
2887 for (
auto &Edge :
Node->CallerEdges)
2888 checkEdge<DerivedCCG, FuncTy, CallTy>(Edge);
2892template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2894 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
2895 using NodeRef =
const ContextNode<DerivedCCG, FuncTy, CallTy> *;
2897 using NodePtrTy = std::unique_ptr<ContextNode<DerivedCCG, FuncTy, CallTy>>;
2913 return G->NodeOwner.begin()->get();
2916 using EdgePtrTy = std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>;
2917 static const ContextNode<DerivedCCG, FuncTy, CallTy> *
2925 decltype(&GetCallee)>;
2936template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2941 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
2947 std::string LabelString =
2948 (
Twine(
"OrigId: ") + (Node->IsAllocation ?
"Alloc" :
"") +
2949 Twine(Node->OrigStackOrAllocId))
2951 LabelString +=
"\n";
2952 if (Node->hasCall()) {
2953 auto Func =
G->NodeToCallingFunc.find(Node);
2954 assert(Func !=
G->NodeToCallingFunc.end());
2956 G->getLabel(Func->second, Node->Call.call(), Node->Call.cloneNo());
2958 LabelString +=
"null call";
2959 if (Node->Recursive)
2960 LabelString +=
" (recursive)";
2962 LabelString +=
" (external)";
2968 std::string AttributeString = (
Twine(
"tooltip=\"") + getNodeId(Node) +
" " +
2969 getContextIds(Node->getContextIds()) +
"\"")
2972 (
Twine(
",fillcolor=\"") + getColor(Node->AllocTypes) +
"\"").str();
2973 AttributeString +=
",style=\"filled\"";
2974 if (Node->CloneOf) {
2975 AttributeString +=
",color=\"blue\"";
2976 AttributeString +=
",style=\"filled,bold,dashed\"";
2978 AttributeString +=
",style=\"filled\"";
2979 return AttributeString;
2984 auto &Edge = *(ChildIter.getCurrent());
2985 return (
Twine(
"tooltip=\"") + getContextIds(Edge->ContextIds) +
"\"" +
2986 Twine(
",fillcolor=\"") + getColor(Edge->AllocTypes) +
"\"")
2993 return Node->isRemoved();
2998 std::string IdString =
"ContextIds:";
2999 if (ContextIds.
size() < 100) {
3000 std::vector<uint32_t> SortedIds(ContextIds.
begin(), ContextIds.
end());
3001 std::sort(SortedIds.begin(), SortedIds.end());
3002 for (
auto Id : SortedIds)
3003 IdString += (
" " +
Twine(Id)).str();
3005 IdString += (
" (" +
Twine(ContextIds.
size()) +
" ids)").str();
3010 static std::string getColor(
uint8_t AllocTypes) {
3019 return "mediumorchid1";
3023 static std::string getNodeId(NodeRef
Node) {
3024 std::stringstream SStream;
3025 SStream << std::hex <<
"N0x" << (
unsigned long long)
Node;
3026 std::string
Result = SStream.str();
3031template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3032void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::exportToDot(
3033 std::string Label)
const {
3038template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3039typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
3040CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::moveEdgeToNewCalleeClone(
3041 const std::shared_ptr<ContextEdge> &Edge, EdgeIter *CallerEdgeI,
3043 ContextNode *
Node = Edge->Callee;
3045 ContextNode *Clone =
3046 createNewNode(
Node->IsAllocation, NodeToCallingFunc[
Node],
Node->Call);
3047 Node->addClone(Clone);
3048 Clone->MatchingCalls =
Node->MatchingCalls;
3049 moveEdgeToExistingCalleeClone(Edge, Clone, CallerEdgeI,
true,
3054template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3055void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
3056 moveEdgeToExistingCalleeClone(
const std::shared_ptr<ContextEdge> &Edge,
3057 ContextNode *NewCallee, EdgeIter *CallerEdgeI,
3062 assert(NewCallee->getOrigNode() == Edge->Callee->getOrigNode());
3064 ContextNode *OldCallee = Edge->Callee;
3068 auto ExistingEdgeToNewCallee = NewCallee->findEdgeFromCaller(Edge->Caller);
3072 if (ContextIdsToMove.
empty())
3073 ContextIdsToMove = Edge->getContextIds();
3077 if (Edge->getContextIds().size() == ContextIdsToMove.
size()) {
3080 NewCallee->AllocTypes |= Edge->AllocTypes;
3082 if (ExistingEdgeToNewCallee) {
3085 ExistingEdgeToNewCallee->getContextIds().
insert(ContextIdsToMove.
begin(),
3086 ContextIdsToMove.
end());
3087 ExistingEdgeToNewCallee->AllocTypes |= Edge->AllocTypes;
3088 assert(Edge->ContextIds == ContextIdsToMove);
3089 removeEdgeFromGraph(Edge.get(), CallerEdgeI,
false);
3092 Edge->Callee = NewCallee;
3093 NewCallee->CallerEdges.push_back(Edge);
3096 *CallerEdgeI = OldCallee->CallerEdges.erase(*CallerEdgeI);
3098 OldCallee->eraseCallerEdge(Edge.get());
3107 auto CallerEdgeAllocType = computeAllocType(ContextIdsToMove);
3108 if (ExistingEdgeToNewCallee) {
3111 ExistingEdgeToNewCallee->getContextIds().insert(ContextIdsToMove.
begin(),
3112 ContextIdsToMove.
end());
3113 ExistingEdgeToNewCallee->AllocTypes |= CallerEdgeAllocType;
3116 auto NewEdge = std::make_shared<ContextEdge>(
3117 NewCallee, Edge->Caller, CallerEdgeAllocType, ContextIdsToMove);
3118 Edge->Caller->CalleeEdges.push_back(NewEdge);
3119 NewCallee->CallerEdges.push_back(NewEdge);
3123 NewCallee->AllocTypes |= CallerEdgeAllocType;
3125 Edge->AllocTypes = computeAllocType(Edge->ContextIds);
3130 for (
auto &OldCalleeEdge : OldCallee->CalleeEdges) {
3135 set_subtract(OldCalleeEdge->getContextIds(), EdgeContextIdsToMove);
3136 OldCalleeEdge->AllocTypes =
3137 computeAllocType(OldCalleeEdge->getContextIds());
3144 if (
auto *NewCalleeEdge =
3145 NewCallee->findEdgeFromCallee(OldCalleeEdge->Callee)) {
3146 NewCalleeEdge->getContextIds().insert(EdgeContextIdsToMove.
begin(),
3147 EdgeContextIdsToMove.
end());
3148 NewCalleeEdge->AllocTypes |= computeAllocType(EdgeContextIdsToMove);
3152 auto NewEdge = std::make_shared<ContextEdge>(
3153 OldCalleeEdge->Callee, NewCallee,
3154 computeAllocType(EdgeContextIdsToMove), EdgeContextIdsToMove);
3155 NewCallee->CalleeEdges.push_back(NewEdge);
3156 NewEdge->Callee->CallerEdges.push_back(NewEdge);
3160 OldCallee->AllocTypes = OldCallee->computeAllocType();
3163 OldCallee->emptyContextIds());
3165 checkNode<DerivedCCG, FuncTy, CallTy>(OldCallee,
false);
3166 checkNode<DerivedCCG, FuncTy, CallTy>(NewCallee,
false);
3167 for (
const auto &OldCalleeEdge : OldCallee->CalleeEdges)
3168 checkNode<DerivedCCG, FuncTy, CallTy>(OldCalleeEdge->Callee,
3170 for (
const auto &NewCalleeEdge : NewCallee->CalleeEdges)
3171 checkNode<DerivedCCG, FuncTy, CallTy>(NewCalleeEdge->Callee,
3176template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3177void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
3178 moveCalleeEdgeToNewCaller(EdgeIter &CalleeEdgeI, ContextNode *NewCaller) {
3179 auto Edge = *CalleeEdgeI;
3181 ContextNode *OldCaller = Edge->Caller;
3185 auto ExistingEdgeToNewCaller = NewCaller->findEdgeFromCallee(Edge->Callee);
3187 CalleeEdgeI = OldCaller->CalleeEdges.erase(CalleeEdgeI);
3188 if (ExistingEdgeToNewCaller) {
3191 ExistingEdgeToNewCaller->getContextIds().insert(
3192 Edge->getContextIds().begin(), Edge->getContextIds().end());
3193 ExistingEdgeToNewCaller->AllocTypes |= Edge->AllocTypes;
3194 Edge->ContextIds.clear();
3196 Edge->Callee->eraseCallerEdge(Edge.get());
3199 Edge->Caller = NewCaller;
3200 NewCaller->CalleeEdges.push_back(Edge);
3205 NewCaller->AllocTypes |= Edge->AllocTypes;
3212 bool IsNewNode = NewCaller->CallerEdges.empty();
3214 for (
auto &OldCallerEdge : OldCaller->CallerEdges) {
3219 set_subtract(OldCallerEdge->getContextIds(), EdgeContextIdsToMove);
3220 OldCallerEdge->AllocTypes =
3221 computeAllocType(OldCallerEdge->getContextIds());
3226 auto *ExistingCallerEdge =
3227 NewCaller->findEdgeFromCaller(OldCallerEdge->Caller);
3228 assert(IsNewNode || ExistingCallerEdge);
3229 if (ExistingCallerEdge) {
3230 ExistingCallerEdge->getContextIds().insert(EdgeContextIdsToMove.
begin(),
3231 EdgeContextIdsToMove.
end());
3232 ExistingCallerEdge->AllocTypes |= computeAllocType(EdgeContextIdsToMove);
3235 auto NewEdge = std::make_shared<ContextEdge>(
3236 NewCaller, OldCallerEdge->Caller,
3237 computeAllocType(EdgeContextIdsToMove), EdgeContextIdsToMove);
3238 NewCaller->CallerEdges.push_back(NewEdge);
3239 NewEdge->Caller->CalleeEdges.push_back(NewEdge);
3243 OldCaller->AllocTypes = OldCaller->computeAllocType();
3246 OldCaller->emptyContextIds());
3248 checkNode<DerivedCCG, FuncTy, CallTy>(OldCaller,
false);
3249 checkNode<DerivedCCG, FuncTy, CallTy>(NewCaller,
false);
3250 for (
const auto &OldCallerEdge : OldCaller->CallerEdges)
3251 checkNode<DerivedCCG, FuncTy, CallTy>(OldCallerEdge->Caller,
3253 for (
const auto &NewCallerEdge : NewCaller->CallerEdges)
3254 checkNode<DerivedCCG, FuncTy, CallTy>(NewCallerEdge->Caller,
3259template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3260void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
3261 recursivelyRemoveNoneTypeCalleeEdges(
3267 removeNoneTypeCalleeEdges(
Node);
3269 for (
auto *Clone :
Node->Clones)
3270 recursivelyRemoveNoneTypeCalleeEdges(Clone, Visited);
3274 auto CallerEdges =
Node->CallerEdges;
3275 for (
auto &Edge : CallerEdges) {
3277 if (Edge->isRemoved()) {
3281 recursivelyRemoveNoneTypeCalleeEdges(Edge->Caller, Visited);
3285template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3286void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones() {
3288 for (
auto &Entry : AllocationCallToContextNodeMap) {
3290 identifyClones(
Entry.second, Visited,
Entry.second->getContextIds());
3293 for (
auto &Entry : AllocationCallToContextNodeMap)
3294 recursivelyRemoveNoneTypeCalleeEdges(
Entry.second, Visited);
3307template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3308void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones(
3312 checkNode<DerivedCCG, FuncTy, CallTy>(
Node,
false);
3320 if (!
Node->hasCall())
3335 auto CallerEdges =
Node->CallerEdges;
3336 for (
auto &Edge : CallerEdges) {
3338 if (Edge->isRemoved()) {
3343 if (!Visited.
count(Edge->Caller) && !Edge->Caller->CloneOf) {
3344 identifyClones(Edge->Caller, Visited, AllocContextIds);
3367 const unsigned AllocTypeCloningPriority[] = { 3, 4,
3370 std::stable_sort(
Node->CallerEdges.begin(),
Node->CallerEdges.end(),
3371 [&](
const std::shared_ptr<ContextEdge> &
A,
3372 const std::shared_ptr<ContextEdge> &
B) {
3375 if (A->ContextIds.empty())
3381 if (B->ContextIds.empty())
3384 if (A->AllocTypes == B->AllocTypes)
3387 return *A->ContextIds.begin() < *B->ContextIds.begin();
3388 return AllocTypeCloningPriority[A->AllocTypes] <
3389 AllocTypeCloningPriority[B->AllocTypes];
3399 for (
auto &CE :
Node->CallerEdges) {
3402 AllCallerContextIds.
reserve(
CE->getContextIds().size());
3403 for (
auto Id :
CE->getContextIds())
3404 if (!AllCallerContextIds.
insert(Id).second)
3405 RecursiveContextIds.
insert(Id);
3414 for (
auto EI =
Node->CallerEdges.begin(); EI !=
Node->CallerEdges.end();) {
3415 auto CallerEdge = *EI;
3424 if (!CallerEdge->Caller->hasCall()) {
3431 auto CallerEdgeContextsForAlloc =
3433 if (!RecursiveContextIds.
empty())
3434 CallerEdgeContextsForAlloc =
3436 if (CallerEdgeContextsForAlloc.empty()) {
3440 auto CallerAllocTypeForAlloc = computeAllocType(CallerEdgeContextsForAlloc);
3444 std::vector<uint8_t> CalleeEdgeAllocTypesForCallerEdge;
3445 CalleeEdgeAllocTypesForCallerEdge.reserve(
Node->CalleeEdges.size());
3446 for (
auto &CalleeEdge :
Node->CalleeEdges)
3447 CalleeEdgeAllocTypesForCallerEdge.push_back(intersectAllocTypes(
3448 CalleeEdge->getContextIds(), CallerEdgeContextsForAlloc));
3463 if (allocTypeToUse(CallerAllocTypeForAlloc) ==
3464 allocTypeToUse(
Node->AllocTypes) &&
3465 allocTypesMatch<DerivedCCG, FuncTy, CallTy>(
3466 CalleeEdgeAllocTypesForCallerEdge,
Node->CalleeEdges)) {
3473 ContextNode *Clone =
nullptr;
3474 for (
auto *CurClone :
Node->Clones) {
3475 if (allocTypeToUse(CurClone->AllocTypes) !=
3476 allocTypeToUse(CallerAllocTypeForAlloc))
3483 assert(!BothSingleAlloc ||
3484 CurClone->AllocTypes == CallerAllocTypeForAlloc);
3490 if (BothSingleAlloc || allocTypesMatchClone<DerivedCCG, FuncTy, CallTy>(
3491 CalleeEdgeAllocTypesForCallerEdge, CurClone)) {
3499 moveEdgeToExistingCalleeClone(CallerEdge, Clone, &EI,
false,
3500 CallerEdgeContextsForAlloc);
3503 moveEdgeToNewCalleeClone(CallerEdge, &EI, CallerEdgeContextsForAlloc);
3518 checkNode<DerivedCCG, FuncTy, CallTy>(
Node,
false);
3521void ModuleCallsiteContextGraph::updateAllocationCall(
3525 "memprof", AllocTypeString);
3526 cast<CallBase>(
Call.call())->addFnAttr(
A);
3527 OREGetter(
Call.call()->getFunction())
3529 <<
ore::NV(
"AllocationCall",
Call.call()) <<
" in clone "
3531 <<
" marked with memprof allocation attribute "
3532 <<
ore::NV(
"Attribute", AllocTypeString));
3535void IndexCallsiteContextGraph::updateAllocationCall(
CallInfo &Call,
3539 assert(AI->Versions.size() >
Call.cloneNo());
3544ModuleCallsiteContextGraph::getAllocationCallType(
const CallInfo &Call)
const {
3545 const auto *CB = cast<CallBase>(
Call.call());
3546 if (!CB->getAttributes().hasFnAttr(
"memprof"))
3548 return CB->getAttributes().getFnAttr(
"memprof").getValueAsString() ==
"cold"
3554IndexCallsiteContextGraph::getAllocationCallType(
const CallInfo &Call)
const {
3556 assert(AI->Versions.size() >
Call.cloneNo());
3560void ModuleCallsiteContextGraph::updateCall(
CallInfo &CallerCall,
3561 FuncInfo CalleeFunc) {
3562 if (CalleeFunc.cloneNo() > 0)
3563 cast<CallBase>(CallerCall.call())->setCalledFunction(CalleeFunc.func());
3564 OREGetter(CallerCall.call()->getFunction())
3566 <<
ore::NV(
"Call", CallerCall.call()) <<
" in clone "
3567 <<
ore::NV(
"Caller", CallerCall.call()->getFunction())
3568 <<
" assigned to call function clone "
3569 <<
ore::NV(
"Callee", CalleeFunc.func()));
3572void IndexCallsiteContextGraph::updateCall(
CallInfo &CallerCall,
3573 FuncInfo CalleeFunc) {
3574 auto *CI = CallerCall.call().dyn_cast<
CallsiteInfo *>();
3576 "Caller cannot be an allocation which should not have profiled calls");
3577 assert(CI->Clones.size() > CallerCall.cloneNo());
3578 CI->Clones[CallerCall.cloneNo()] = CalleeFunc.cloneNo();
3581CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
3583ModuleCallsiteContextGraph::cloneFunctionForCallsite(
3584 FuncInfo &Func,
CallInfo &Call, std::map<CallInfo, CallInfo> &CallMap,
3585 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
3591 NewFunc->setName(
Name);
3592 for (
auto &Inst : CallsWithMetadataInFunc) {
3594 assert(Inst.cloneNo() == 0);
3595 CallMap[Inst] = {cast<Instruction>(VMap[Inst.call()]), CloneNo};
3597 OREGetter(
Func.func())
3599 <<
"created clone " <<
ore::NV(
"NewFunction", NewFunc));
3600 return {NewFunc, CloneNo};
3604 IndexCall>::FuncInfo
3605IndexCallsiteContextGraph::cloneFunctionForCallsite(
3606 FuncInfo &Func,
CallInfo &Call, std::map<CallInfo, CallInfo> &CallMap,
3607 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
3622 for (
auto &Inst : CallsWithMetadataInFunc) {
3624 assert(Inst.cloneNo() == 0);
3625 if (
auto *AI = Inst.call().dyn_cast<
AllocInfo *>()) {
3626 assert(AI->Versions.size() == CloneNo);
3629 AI->Versions.push_back(0);
3632 assert(CI && CI->Clones.size() == CloneNo);
3635 CI->Clones.push_back(0);
3637 CallMap[Inst] = {Inst.call(), CloneNo};
3639 return {
Func.func(), CloneNo};
3673template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3674bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
3675 bool Changed =
false;
3683 auto RecordCalleeFuncOfCallsite = [&](ContextNode *
Caller,
3684 const FuncInfo &CalleeFunc) {
3686 CallsiteToCalleeFuncCloneMap[
Caller] = CalleeFunc;
3691 for (
auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) {
3692 FuncInfo OrigFunc(Func);
3696 std::map<FuncInfo, std::map<CallInfo, CallInfo>> FuncClonesToCallMap;
3697 for (
auto &Call : CallsWithMetadata) {
3698 ContextNode *
Node = getNodeForInst(Call);
3705 "Not having a call should have prevented cloning");
3709 std::map<FuncInfo, ContextNode *> FuncCloneToCurNodeCloneMap;
3713 auto AssignCallsiteCloneToFuncClone = [&](
const FuncInfo &FuncClone,
3715 ContextNode *CallsiteClone,
3718 FuncCloneToCurNodeCloneMap[FuncClone] = CallsiteClone;
3720 assert(FuncClonesToCallMap.count(FuncClone));
3721 std::map<CallInfo, CallInfo> &CallMap = FuncClonesToCallMap[FuncClone];
3723 if (CallMap.count(Call))
3724 CallClone = CallMap[
Call];
3725 CallsiteClone->setCall(CallClone);
3727 for (
auto &MatchingCall :
Node->MatchingCalls) {
3729 if (CallMap.count(MatchingCall))
3730 CallClone = CallMap[MatchingCall];
3732 MatchingCall = CallClone;
3739 std::deque<ContextNode *> ClonesWorklist;
3741 if (!
Node->emptyContextIds())
3742 ClonesWorklist.push_back(
Node);
3743 ClonesWorklist.insert(ClonesWorklist.end(),
Node->Clones.begin(),
3744 Node->Clones.end());
3749 unsigned NodeCloneCount = 0;
3750 while (!ClonesWorklist.empty()) {
3751 ContextNode *Clone = ClonesWorklist.front();
3752 ClonesWorklist.pop_front();
3755 checkNode<DerivedCCG, FuncTy, CallTy>(Clone);
3761 if (FuncClonesToCallMap.size() < NodeCloneCount) {
3763 if (NodeCloneCount == 1) {
3768 Clone->CallerEdges, [&](
const std::shared_ptr<ContextEdge> &E) {
3769 return CallsiteToCalleeFuncCloneMap.count(E->Caller);
3773 FuncClonesToCallMap[OrigFunc] = {};
3774 AssignCallsiteCloneToFuncClone(
3775 OrigFunc, Call, Clone,
3776 AllocationCallToContextNodeMap.count(Call));
3777 for (
auto &CE : Clone->CallerEdges) {
3779 if (!
CE->Caller->hasCall())
3781 RecordCalleeFuncOfCallsite(
CE->Caller, OrigFunc);
3791 FuncInfo PreviousAssignedFuncClone;
3793 Clone->CallerEdges, [&](
const std::shared_ptr<ContextEdge> &E) {
3794 return CallsiteToCalleeFuncCloneMap.count(E->Caller);
3796 bool CallerAssignedToCloneOfFunc =
false;
3797 if (EI != Clone->CallerEdges.end()) {
3798 const std::shared_ptr<ContextEdge> &Edge = *EI;
3799 PreviousAssignedFuncClone =
3800 CallsiteToCalleeFuncCloneMap[Edge->Caller];
3801 CallerAssignedToCloneOfFunc =
true;
3806 std::map<CallInfo, CallInfo> NewCallMap;
3807 unsigned CloneNo = FuncClonesToCallMap.size();
3808 assert(CloneNo > 0 &&
"Clone 0 is the original function, which "
3809 "should already exist in the map");
3810 FuncInfo NewFuncClone = cloneFunctionForCallsite(
3811 OrigFunc, Call, NewCallMap, CallsWithMetadata, CloneNo);
3812 FuncClonesToCallMap.emplace(NewFuncClone, std::move(NewCallMap));
3813 FunctionClonesAnalysis++;
3819 if (!CallerAssignedToCloneOfFunc) {
3820 AssignCallsiteCloneToFuncClone(
3821 NewFuncClone, Call, Clone,
3822 AllocationCallToContextNodeMap.count(Call));
3823 for (
auto &CE : Clone->CallerEdges) {
3825 if (!
CE->Caller->hasCall())
3827 RecordCalleeFuncOfCallsite(
CE->Caller, NewFuncClone);
3839 auto CallerEdges = Clone->CallerEdges;
3840 for (
auto CE : CallerEdges) {
3842 if (
CE->isRemoved()) {
3848 if (!
CE->Caller->hasCall())
3851 if (!CallsiteToCalleeFuncCloneMap.
count(
CE->Caller) ||
3855 CallsiteToCalleeFuncCloneMap[
CE->Caller] !=
3856 PreviousAssignedFuncClone)
3859 RecordCalleeFuncOfCallsite(
CE->Caller, NewFuncClone);
3872 auto CalleeEdges =
CE->Caller->CalleeEdges;
3873 for (
auto CalleeEdge : CalleeEdges) {
3876 if (CalleeEdge->isRemoved()) {
3881 ContextNode *
Callee = CalleeEdge->Callee;
3885 if (Callee == Clone || !
Callee->hasCall())
3887 ContextNode *NewClone = moveEdgeToNewCalleeClone(CalleeEdge);
3888 removeNoneTypeCalleeEdges(NewClone);
3891 removeNoneTypeCalleeEdges(Callee);
3896 if (CallsiteToCalleeFuncCloneMap.
count(Callee))
3897 RecordCalleeFuncOfCallsite(
3898 NewClone, CallsiteToCalleeFuncCloneMap[Callee]);
3907 OrigCall.setCloneNo(0);
3908 std::map<CallInfo, CallInfo> &CallMap =
3909 FuncClonesToCallMap[NewFuncClone];
3910 assert(CallMap.count(OrigCall));
3911 CallInfo NewCall(CallMap[OrigCall]);
3913 NewClone->setCall(NewCall);
3915 for (
auto &MatchingCall : NewClone->MatchingCalls) {
3916 CallInfo OrigMatchingCall(MatchingCall);
3917 OrigMatchingCall.setCloneNo(0);
3918 assert(CallMap.count(OrigMatchingCall));
3919 CallInfo NewCall(CallMap[OrigMatchingCall]);
3922 MatchingCall = NewCall;
3945 std::map<FuncInfo, ContextNode *> FuncCloneToNewCallsiteCloneMap;
3946 FuncInfo FuncCloneAssignedToCurCallsiteClone;
3949 for (
auto EI = Clone->CallerEdges.begin();
3950 EI != Clone->CallerEdges.end();) {
3953 if (!Edge->Caller->hasCall()) {
3959 if (CallsiteToCalleeFuncCloneMap.
count(Edge->Caller)) {
3960 FuncInfo FuncCloneCalledByCaller =
3961 CallsiteToCalleeFuncCloneMap[Edge->Caller];
3971 if ((FuncCloneToCurNodeCloneMap.count(FuncCloneCalledByCaller) &&
3972 FuncCloneToCurNodeCloneMap[FuncCloneCalledByCaller] !=
3980 (FuncCloneAssignedToCurCallsiteClone &&
3981 FuncCloneAssignedToCurCallsiteClone !=
3982 FuncCloneCalledByCaller)) {
3997 if (FuncCloneToNewCallsiteCloneMap.count(
3998 FuncCloneCalledByCaller)) {
3999 ContextNode *NewClone =
4000 FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller];
4001 moveEdgeToExistingCalleeClone(Edge, NewClone, &EI);
4003 removeNoneTypeCalleeEdges(NewClone);
4006 ContextNode *NewClone = moveEdgeToNewCalleeClone(Edge, &EI);
4007 removeNoneTypeCalleeEdges(NewClone);
4008 FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller] =
4011 ClonesWorklist.push_back(NewClone);
4012 assert(EI == Clone->CallerEdges.end() ||
4018 removeNoneTypeCalleeEdges(Clone);
4027 if (!FuncCloneAssignedToCurCallsiteClone) {
4028 FuncCloneAssignedToCurCallsiteClone = FuncCloneCalledByCaller;
4030 AssignCallsiteCloneToFuncClone(
4031 FuncCloneCalledByCaller, Call, Clone,
4032 AllocationCallToContextNodeMap.count(Call));
4036 assert(FuncCloneAssignedToCurCallsiteClone ==
4037 FuncCloneCalledByCaller);
4046 if (!FuncCloneAssignedToCurCallsiteClone) {
4051 for (
auto &CF : FuncClonesToCallMap) {
4052 if (!FuncCloneToCurNodeCloneMap.count(CF.first)) {
4053 FuncCloneAssignedToCurCallsiteClone = CF.first;
4057 assert(FuncCloneAssignedToCurCallsiteClone);
4059 AssignCallsiteCloneToFuncClone(
4060 FuncCloneAssignedToCurCallsiteClone, Call, Clone,
4061 AllocationCallToContextNodeMap.count(Call));
4063 assert(FuncCloneToCurNodeCloneMap
4064 [FuncCloneAssignedToCurCallsiteClone] == Clone);
4066 RecordCalleeFuncOfCallsite(Edge->Caller,
4067 FuncCloneAssignedToCurCallsiteClone);
4074 checkNode<DerivedCCG, FuncTy, CallTy>(
Node);
4075 for (
const auto &PE :
Node->CalleeEdges)
4076 checkNode<DerivedCCG, FuncTy, CallTy>(PE->Callee);
4077 for (
const auto &CE :
Node->CallerEdges)
4078 checkNode<DerivedCCG, FuncTy, CallTy>(
CE->Caller);
4079 for (
auto *Clone :
Node->Clones) {
4080 checkNode<DerivedCCG, FuncTy, CallTy>(Clone);
4081 for (
const auto &PE : Clone->CalleeEdges)
4082 checkNode<DerivedCCG, FuncTy, CallTy>(PE->Callee);
4083 for (
const auto &CE : Clone->CallerEdges)
4084 checkNode<DerivedCCG, FuncTy, CallTy>(
CE->Caller);
4093 auto UpdateCalls = [&](ContextNode *
Node,
4095 auto &&UpdateCalls) {
4100 for (
auto *Clone :
Node->Clones)
4101 UpdateCalls(Clone, Visited, UpdateCalls);
4103 for (
auto &Edge :
Node->CallerEdges)
4104 UpdateCalls(Edge->Caller, Visited, UpdateCalls);
4108 if (!
Node->hasCall() ||
Node->emptyContextIds())
4111 if (
Node->IsAllocation) {
4112 auto AT = allocTypeToUse(
Node->AllocTypes);
4118 !ContextIdToContextSizeInfos.empty()) {
4121 for (
auto Id :
Node->getContextIds()) {
4122 auto TypeI = ContextIdToAllocationType.find(Id);
4123 assert(TypeI != ContextIdToAllocationType.end());
4124 auto CSI = ContextIdToContextSizeInfos.find(Id);
4125 if (CSI != ContextIdToContextSizeInfos.end()) {
4126 for (
auto &Info : CSI->second) {
4129 TotalCold +=
Info.TotalSize;
4136 updateAllocationCall(
Node->Call, AT);
4141 if (!CallsiteToCalleeFuncCloneMap.
count(
Node))
4144 auto CalleeFunc = CallsiteToCalleeFuncCloneMap[
Node];
4145 updateCall(
Node->Call, CalleeFunc);
4147 for (
auto &Call :
Node->MatchingCalls)
4148 updateCall(Call, CalleeFunc);
4157 for (
auto &Entry : AllocationCallToContextNodeMap)
4158 UpdateCalls(
Entry.second, Visited, UpdateCalls);
4172 FunctionsClonedThinBackend++;
4173 for (
unsigned I = 1;
I < NumClones;
I++) {
4174 VMaps.
emplace_back(std::make_unique<ValueToValueMapTy>());
4176 FunctionClonesThinBackend++;
4179 for (
auto &BB : *NewF) {
4180 for (
auto &Inst : BB) {
4181 Inst.setMetadata(LLVMContext::MD_memprof,
nullptr);
4182 Inst.setMetadata(LLVMContext::MD_callsite,
nullptr);
4186 auto *PrevF = M.getFunction(
Name);
4190 assert(PrevF->isDeclaration());
4191 NewF->takeName(PrevF);
4192 PrevF->replaceAllUsesWith(NewF);
4193 PrevF->eraseFromParent();
4195 NewF->setName(
Name);
4197 <<
"created clone " <<
ore::NV(
"NewFunction", NewF));
4200 if (!FuncToAliasMap.count(&
F))
4202 for (
auto *
A : FuncToAliasMap[&
F]) {
4204 auto *PrevA = M.getNamedAlias(
Name);
4206 A->getType()->getPointerAddressSpace(),
4207 A->getLinkage(),
Name, NewF);
4208 NewA->copyAttributesFrom(
A);
4212 assert(PrevA->isDeclaration());
4213 NewA->takeName(PrevA);
4214 PrevA->replaceAllUsesWith(NewA);
4215 PrevA->eraseFromParent();
4257bool MemProfContextDisambiguation::initializeIndirectCallPromotionInfo(
4259 ICallAnalysis = std::make_unique<ICallPromotionAnalysis>();
4260 Symtab = std::make_unique<InstrProfSymtab>();
4271 if (
Error E = Symtab->create(M,
true,
false)) {
4272 std::string SymtabFailure =
toString(std::move(
E));
4273 M.getContext().emitError(
"Failed to create symtab: " + SymtabFailure);
4279bool MemProfContextDisambiguation::applyImport(
Module &M) {
4281 bool Changed =
false;
4286 std::map<const Function *, SmallPtrSet<const GlobalAlias *, 1>>
4288 for (
auto &
A :
M.aliases()) {
4289 auto *Aliasee =
A.getAliaseeObject();
4290 if (
auto *
F = dyn_cast<Function>(Aliasee))
4291 FuncToAliasMap[
F].insert(&
A);
4294 if (!initializeIndirectCallPromotionInfo(M))
4304 bool ClonesCreated =
false;
4305 unsigned NumClonesCreated = 0;
4306 auto CloneFuncIfNeeded = [&](
unsigned NumClones) {
4316 if (ClonesCreated) {
4317 assert(NumClonesCreated == NumClones);
4324 ClonesCreated =
true;
4325 NumClonesCreated = NumClones;
4331 CloneFuncIfNeeded(
StackNode.Clones.size());
4338 auto CalleeOrigName = CalledFunction->getName();
4339 for (
unsigned J = 0; J <
StackNode.Clones.size(); J++) {
4344 auto NewF =
M.getOrInsertFunction(
4346 CalledFunction->getFunctionType());
4352 CBClone = cast<CallBase>((*VMaps[J - 1])[CB]);
4355 <<
ore::NV(
"Call", CBClone) <<
" in clone "
4357 <<
" assigned to call function clone "
4358 <<
ore::NV(
"Callee", NewF.getCallee()));
4376 auto SrcModuleMD =
F.getMetadata(
"thinlto_src_module");
4378 "enable-import-metadata is needed to emit thinlto_src_module");
4380 dyn_cast<MDString>(SrcModuleMD->getOperand(0))->getString();
4382 if (GVS->modulePath() == SrcModule) {
4383 GVSummary = GVS.get();
4387 assert(GVSummary && GVSummary->modulePath() == SrcModule);
4392 if (isa<AliasSummary>(GVSummary))
4395 auto *
FS = cast<FunctionSummary>(GVSummary->getBaseObject());
4397 if (
FS->allocs().empty() &&
FS->callsites().empty())
4400 auto SI =
FS->callsites().begin();
4401 auto AI =
FS->allocs().begin();
4409 for (
auto CallsiteIt =
FS->callsites().rbegin();
4410 CallsiteIt !=
FS->callsites().rend(); CallsiteIt++) {
4411 auto &Callsite = *CallsiteIt;
4415 if (!Callsite.StackIdIndices.empty())
4417 MapTailCallCalleeVIToCallsite.
insert({Callsite.Callee, Callsite});
4426 for (
auto &BB :
F) {
4427 for (
auto &
I : BB) {
4428 auto *CB = dyn_cast<CallBase>(&
I);
4433 auto *CalledValue = CB->getCalledOperand();
4434 auto *CalledFunction = CB->getCalledFunction();
4435 if (CalledValue && !CalledFunction) {
4436 CalledValue = CalledValue->stripPointerCasts();
4438 CalledFunction = dyn_cast<Function>(CalledValue);
4442 if (
auto *GA = dyn_cast<GlobalAlias>(CalledValue)) {
4443 assert(!CalledFunction &&
4444 "Expected null called function in callsite for alias");
4445 CalledFunction = dyn_cast<Function>(GA->getAliaseeObject());
4449 I.getMetadata(LLVMContext::MD_callsite));
4450 auto *MemProfMD =
I.getMetadata(LLVMContext::MD_memprof);
4454 if (CB->getAttributes().hasFnAttr(
"memprof")) {
4456 CB->getAttributes().getFnAttr(
"memprof").getValueAsString() ==
"cold"
4457 ? AllocTypeColdThinBackend++
4458 : AllocTypeNotColdThinBackend++;
4459 OrigAllocsThinBackend++;
4460 AllocVersionsThinBackend++;
4461 if (!MaxAllocVersionsThinBackend)
4462 MaxAllocVersionsThinBackend = 1;
4469 auto &AllocNode = *(AI++);
4473 auto MIBIter = AllocNode.MIBs.begin();
4474 for (
auto &MDOp : MemProfMD->operands()) {
4475 assert(MIBIter != AllocNode.MIBs.end());
4477 MIBIter->StackIdIndices.begin();
4478 auto *MIBMD = cast<const MDNode>(MDOp);
4482 auto ContextIterBegin =
4486 (ContextIterBegin != StackContext.
end() &&
4487 *ContextIterBegin == 0)
4490 for (
auto ContextIter = ContextIterBegin;
4491 ContextIter != StackContext.
end(); ++ContextIter) {
4495 if (LastStackContextId == *ContextIter)
4497 LastStackContextId = *ContextIter;
4498 assert(StackIdIndexIter != MIBIter->StackIdIndices.end());
4507 CloneFuncIfNeeded(AllocNode.Versions.size());
4509 OrigAllocsThinBackend++;
4510 AllocVersionsThinBackend += AllocNode.Versions.size();
4511 if (MaxAllocVersionsThinBackend < AllocNode.Versions.size())
4512 MaxAllocVersionsThinBackend = AllocNode.Versions.size();
4522 if (AllocNode.Versions.size() == 1 &&
4528 UnclonableAllocsThinBackend++;
4534 return Type == ((uint8_t)AllocationType::NotCold |
4535 (uint8_t)AllocationType::Cold);
4539 for (
unsigned J = 0; J < AllocNode.Versions.size(); J++) {
4545 : AllocTypeNotColdThinBackend++;
4557 CBClone = cast<CallBase>((*VMaps[J - 1])[CB]);
4560 <<
ore::NV(
"AllocationCall", CBClone) <<
" in clone "
4562 <<
" marked with memprof allocation attribute "
4563 <<
ore::NV(
"Attribute", AllocTypeString));
4565 }
else if (!CallsiteContext.empty()) {
4566 if (!CalledFunction) {
4569 auto *CI = dyn_cast<CallInst>(CB);
4570 assert(!CI || !CI->isInlineAsm());
4573 assert(CalledValue && !isa<Constant>(CalledValue));
4580 recordICPInfo(CB,
FS->callsites(), SI, ICallAnalysisInfo);
4586 CloneFuncIfNeeded(NumClones);
4591 assert(SI !=
FS->callsites().end());
4597 auto StackIdIndexIter =
StackNode.StackIdIndices.begin();
4598 for (
auto StackId : CallsiteContext) {
4606 CloneCallsite(
StackNode, CB, CalledFunction);
4608 }
else if (CB->isTailCall() && CalledFunction) {
4613 if (CalleeVI && MapTailCallCalleeVIToCallsite.
count(CalleeVI)) {
4614 auto Callsite = MapTailCallCalleeVIToCallsite.
find(CalleeVI);
4615 assert(Callsite != MapTailCallCalleeVIToCallsite.
end());
4616 CloneCallsite(Callsite->second, CB, CalledFunction);
4623 performICP(M,
FS->callsites(), VMaps, ICallAnalysisInfo, ORE);
4633 for (
auto &BB :
F) {
4634 for (
auto &
I : BB) {
4635 if (!isa<CallBase>(
I))
4637 I.setMetadata(LLVMContext::MD_memprof,
nullptr);
4638 I.setMetadata(LLVMContext::MD_callsite,
nullptr);
4646unsigned MemProfContextDisambiguation::recordICPInfo(
4653 auto CandidateProfileData =
4654 ICallAnalysis->getPromotionCandidatesForInstruction(CB, TotalCount,
4656 if (CandidateProfileData.empty())
4662 bool ICPNeeded =
false;
4663 unsigned NumClones = 0;
4664 size_t CallsiteInfoStartIndex = std::distance(AllCallsites.
begin(), SI);
4665 for (
const auto &Candidate : CandidateProfileData) {
4667 auto CalleeValueInfo =
4672 assert(!CalleeValueInfo ||
SI->Callee == CalleeValueInfo);
4679 [](
unsigned CloneNo) { return CloneNo != 0; });
4689 ICallAnalysisInfo.
push_back({CB, CandidateProfileData.vec(), NumCandidates,
4690 TotalCount, CallsiteInfoStartIndex});
4694void MemProfContextDisambiguation::performICP(
4696 ArrayRef<std::unique_ptr<ValueToValueMapTy>> VMaps,
4705 for (
auto &
Info : ICallAnalysisInfo) {
4707 auto CallsiteIndex =
Info.CallsiteInfoStartIndex;
4708 auto TotalCount =
Info.TotalCount;
4709 unsigned NumPromoted = 0;
4710 unsigned NumClones = 0;
4712 for (
auto &Candidate :
Info.CandidateProfileData) {
4713 auto &
StackNode = AllCallsites[CallsiteIndex++];
4723 Function *TargetFunction = Symtab->getFunction(Candidate.Value);
4724 if (TargetFunction ==
nullptr ||
4733 <<
"Memprof cannot promote indirect call: target with md5sum "
4734 <<
ore::NV(
"target md5sum", Candidate.Value) <<
" not found";
4743 const char *Reason =
nullptr;
4747 <<
"Memprof cannot promote indirect call to "
4748 <<
ore::NV(
"TargetFunction", TargetFunction)
4749 <<
" with count of " <<
ore::NV(
"TotalCount", TotalCount)
4761 for (
unsigned J = 0; J < NumClones; J++) {
4764 CBClone = cast<CallBase>((*VMaps[J - 1])[CB]);
4770 TotalCount, isSamplePGO, &ORE);
4771 auto *TargetToUse = TargetFunction;
4776 cast<Function>(
M.getOrInsertFunction(
4784 <<
ore::NV(
"Call", CBClone) <<
" in clone "
4786 <<
" promoted and assigned to call function clone "
4787 <<
ore::NV(
"Callee", TargetToUse));
4791 TotalCount -= Candidate.Count;
4797 for (
unsigned J = 0; J < NumClones; J++) {
4800 CBClone = cast<CallBase>((*VMaps[J - 1])[CB]);
4802 CBClone->
setMetadata(LLVMContext::MD_prof,
nullptr);
4805 if (TotalCount != 0)
4808 TotalCount, IPVK_IndirectCallTarget,
Info.NumCandidates);
4813template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4814bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::process() {
4816 dbgs() <<
"CCG before cloning:\n";
4820 exportToDot(
"postbuild");
4833 dbgs() <<
"CCG after cloning:\n";
4837 exportToDot(
"cloned");
4839 bool Changed = assignFunctions();
4842 dbgs() <<
"CCG after assigning function clones:\n";
4846 exportToDot(
"clonefuncassign");
4849 printTotalSizes(
errs());
4854bool MemProfContextDisambiguation::processModule(
4861 return applyImport(M);
4874 ModuleCallsiteContextGraph CCG(M, OREGetter);
4875 return CCG.process();
4880 : ImportSummary(Summary), isSamplePGO(isSamplePGO) {
4881 if (ImportSummary) {
4891 auto ReadSummaryFile =
4893 if (!ReadSummaryFile) {
4900 if (!ImportSummaryForTestingOrErr) {
4906 ImportSummaryForTesting = std::move(*ImportSummaryForTestingOrErr);
4907 ImportSummary = ImportSummaryForTesting.get();
4916 if (!processModule(M, OREGetter))
4933 IndexCallsiteContextGraph CCG(
Index, isPrevailing);
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
#define LLVM_ATTRIBUTE_UNUSED
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
Module.h This file contains the declarations for the Module class.
This file implements a map that provides insertion order iteration.
static SmallVector< std::unique_ptr< ValueToValueMapTy >, 4 > createFunctionClones(Function &F, unsigned NumClones, Module &M, OptimizationRemarkEmitter &ORE, std::map< const Function *, SmallPtrSet< const GlobalAlias *, 1 > > &FuncToAliasMap)
static cl::opt< unsigned > TailCallSearchDepth("memprof-tail-call-search-depth", cl::init(5), cl::Hidden, cl::desc("Max depth to recursively search for missing " "frames through tail calls."))
static bool isMemProfClone(const Function &F)
static cl::opt< bool > ExportToDot("memprof-export-to-dot", cl::init(false), cl::Hidden, cl::desc("Export graph to dot files."))
static void checkEdge(const std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > &Edge)
static cl::opt< bool > AllowRecursiveCallsites("memprof-allow-recursive-callsites", cl::init(true), cl::Hidden, cl::desc("Allow cloning of callsites involved in recursive cycles"))
bool checkColdOrNotCold(uint8_t AllocType)
static ValueInfo findValueInfoForFunc(const Function &F, const Module &M, const ModuleSummaryIndex *ImportSummary)
cl::opt< unsigned > MinClonedColdBytePercent
static std::string getAllocTypeString(uint8_t AllocTypes)
cl::opt< bool > MemProfReportHintedSizes
static cl::opt< bool > VerifyCCG("memprof-verify-ccg", cl::init(false), cl::Hidden, cl::desc("Perform verification checks on CallingContextGraph."))
static void checkNode(const ContextNode< DerivedCCG, FuncTy, CallTy > *Node, bool CheckEdges=true)
static std::string getMemProfFuncName(Twine Base, unsigned CloneNo)
static cl::opt< std::string > MemProfImportSummary("memprof-import-summary", cl::desc("Import summary to use for testing the ThinLTO backend via opt"), cl::Hidden)
static const std::string MemProfCloneSuffix
static cl::opt< bool > AllowRecursiveContexts("memprof-allow-recursive-contexts", cl::init(true), cl::Hidden, cl::desc("Allow cloning of contexts through recursive cycles"))
static cl::opt< std::string > DotFilePathPrefix("memprof-dot-file-path-prefix", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path prefix of the MemProf dot files."))
static cl::opt< bool > VerifyNodes("memprof-verify-nodes", cl::init(false), cl::Hidden, cl::desc("Perform frequent verification checks on nodes."))
static cl::opt< bool > DumpCCG("memprof-dump-ccg", cl::init(false), cl::Hidden, cl::desc("Dump CallingContextGraph to stdout after each stage."))
cl::opt< bool > MemProfReportHintedSizes("memprof-report-hinted-sizes", cl::init(false), cl::Hidden, cl::desc("Report total allocation sizes of hinted allocations"))
This is the interface to build a ModuleSummaryIndex for a module.
ModuleSummaryIndex.h This file contains the declarations the classes that hold the module index and s...
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
FunctionAnalysisManager FAM
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines generic set operations that may be used on set's of different types,...
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
void print(OutputBuffer &OB) const
DomTreeNode::const_iterator end() const
Alias summary information.
ValueInfo getAliaseeVI() const
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
bool empty() const
empty - Check if the array is empty.
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
static Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
void addFnAttr(Attribute::AttrKind Kind)
Adds the attribute to the function.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
iterator find(const_arg_type_t< KeyT > Val)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
void reserve(size_type NumEntries)
Grow the densemap so that it can contain at least NumEntries items before resizing again.
Implements a dense probed hash-table based set.
Lightweight error class with error context and mandatory checking.
Function summary information to aid decisions and implementation of importing.
FunctionType * getFunctionType() const
Returns the FunctionType for me.
static GlobalAlias * create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent)
If a parent module is specified, the alias is automatically inserted into the end of the specified mo...
Function and variable summary information to aid decisions and implementation of importing.
static bool isLocalLinkage(LinkageTypes Linkage)
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
GUID getGUID() const
Return a 64-bit global unique ID constructed from global value name (i.e.
std::string getGlobalIdentifier() const
Return the modified name for this global value suitable to be used as the key for a global lookup (e....
@ InternalLinkage
Rename collisions when linking (static functions).
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
const Function * getFunction() const
Return the function this instruction belongs to.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
const MDOperand & getOperand(unsigned I) const
unsigned getNumOperands() const
Return number of MDNode operands.
This class implements a map that also provides access to all stored values in a deterministic order.
size_type count(const KeyT &Key) const
MemProfContextDisambiguation(const ModuleSummaryIndex *Summary=nullptr, bool isSamplePGO=false)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
Class to hold module path string table and global value map, and encapsulate methods for operating on...
static StringRef getOriginalNameBeforePromote(StringRef Name)
Helper to obtain the unpromoted name for a global value (or the original name if not promoted).
ValueInfo getValueInfo(const GlobalValueSummaryMapTy::value_type &R) const
Return a ValueInfo for the index value_type (convenient when iterating index).
uint64_t getStackIdAtIndex(unsigned Index) const
GlobalValueSummary * findSummaryInModule(ValueInfo VI, StringRef ModuleId) const
Find the summary for ValueInfo VI in module ModuleId, or nullptr if not found.
GlobalValue::GUID getGUIDFromOriginalID(GlobalValue::GUID OriginalID) const
Return the GUID for OriginalId in the OidGuidMap.
A Module instance is used to store all the information related to an LLVM module.
A NodeSet contains a set of SUnit DAG nodes with additional information that assigns a priority to th...
A discriminated union of two or more pointer types, with the discriminator in the low bit of the poin...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM Value Representation.
StringRef getName() const
Return a constant reference to the value's name.
std::pair< iterator, bool > insert(const ValueT &V)
void reserve(size_t Size)
Grow the DenseSet so that it can contain at least NumEntries items before resizing again.
void swap(DenseSetImpl &RHS)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
An efficient, type-erasing, non-owning reference to a callable.
Helper class to iterate through stack ids in both metadata (memprof MIB and callsite) and the corresp...
CallStackIterator end() const
CallStackIterator beginAfterSharedPrefix(CallStack &Other)
This class implements an extremely fast bulk output stream that can only output to a stream.
@ C
The default llvm calling convention, compatible with C.
@ CE
Windows NT (Windows on ARM)
initializer< Ty > init(const Ty &Val)
AllocationType getMIBAllocType(const MDNode *MIB)
Returns the allocation type from an MIB metadata node.
bool hasSingleAllocType(uint8_t AllocTypes)
True if the AllocTypes bitmask contains just a single type.
std::string getAllocTypeAttributeString(AllocationType Type)
Returns the string to use in attributes with the given type.
MDNode * getMIBStackNode(const MDNode *MIB)
Returns the stack node from an MIB metadata node.
DiagnosticInfoOptimizationBase::Argument NV
CallBase & promoteIndirectCall(CallBase &CB, Function *F, uint64_t Count, uint64_t TotalCount, bool AttachProfToDirectCall, OptimizationRemarkEmitter *ORE)
NodeAddr< FuncNode * > Func
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
void logAllUnhandledErrors(Error E, raw_ostream &OS, Twine ErrorBanner={})
Log all errors (if any) in E to OS.
bool isLegalToPromote(const CallBase &CB, Function *Callee, const char **FailureReason=nullptr)
Return true if the given indirect call site can be made to call Callee.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
cl::opt< bool > MemProfRequireDefinitionForPromotion("memprof-require-definition-for-promotion", cl::init(false), cl::Hidden, cl::desc("Require target function definition when promoting indirect calls"))
bool set_is_subset(const S1Ty &S1, const S2Ty &S2)
set_is_subset(A, B) - Return true iff A in B
void set_subtract(S1Ty &S1, const S2Ty &S2)
set_subtract(A, B) - Compute A := A - B
raw_ostream & WriteGraph(raw_ostream &O, const GraphType &G, bool ShortNames=false, const Twine &Title="")
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr)
Expected< std::unique_ptr< ModuleSummaryIndex > > getModuleSummaryIndex(MemoryBufferRef Buffer)
Parse the specified bitcode buffer, returning the module summary index.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
bool set_union(S1Ty &S1, const S2Ty &S2)
set_union(A, B) - Compute A := A u B, return whether A changed.
cl::opt< bool > SupportsHotColdNew
Indicate we are linking with an allocator that supports hot/cold operator new interfaces.
S1Ty set_intersection(const S1Ty &S1, const S2Ty &S2)
set_intersection(A, B) - Return A ^ B
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
cl::opt< bool > EnableMemProfContextDisambiguation
Enable MemProf context disambiguation for thin link.
S1Ty set_difference(const S1Ty &S1, const S2Ty &S2)
set_difference(A, B) - Return A - B
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Expected< T > errorOrToExpected(ErrorOr< T > &&EO)
Convert an ErrorOr<T> to an Expected<T>.
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
const char * toString(DWARFSectionKind Kind)
Function * CloneFunction(Function *F, ValueToValueMapTy &VMap, ClonedCodeInfo *CodeInfo=nullptr)
Return a copy of the specified function and add it to that function's module.
bool mayHaveMemprofSummary(const CallBase *CB)
Returns true if the instruction could have memprof metadata, used to ensure consistency between summa...
Implement std::hash so that hash_code can be used in STL containers.
DOTGraphTraits(bool IsSimple=false)
typename GTraits::NodeRef NodeRef
static std::string getEdgeAttributes(NodeRef, ChildIteratorType ChildIter, GraphType)
const CallsiteContextGraph< DerivedCCG, FuncTy, CallTy > * GraphType
typename GTraits::ChildIteratorType ChildIteratorType
static bool isNodeHidden(NodeRef Node, GraphType)
static std::string getNodeLabel(NodeRef Node, GraphType G)
static std::string getNodeAttributes(NodeRef Node, GraphType)
static NodeRef getNode(const NodePtrTy &P)
static const ContextNode< DerivedCCG, FuncTy, CallTy > * GetCallee(const EdgePtrTy &P)
static ChildIteratorType child_end(NodeRef N)
std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > EdgePtrTy
const CallsiteContextGraph< DerivedCCG, FuncTy, CallTy > * GraphType
const ContextNode< DerivedCCG, FuncTy, CallTy > * NodeRef
static ChildIteratorType child_begin(NodeRef N)
std::unique_ptr< ContextNode< DerivedCCG, FuncTy, CallTy > > NodePtrTy
static NodeRef getEntryNode(GraphType G)
static nodes_iterator nodes_begin(GraphType G)
static nodes_iterator nodes_end(GraphType G)
Summary of memprof metadata on allocations.
SmallVector< uint8_t > Versions
Summary of memprof callsite metadata.
SmallVector< unsigned > Clones
DOTGraphTraits - Template class that can be specialized to customize how graphs are converted to 'dot...
DefaultDOTGraphTraits - This class provides the default implementations of all of the DOTGraphTraits ...
An information struct used to provide DenseMap with the various necessary components for a given valu...
Used in the streaming interface as the general argument type.
typename GraphType::UnknownGraphTypeError NodeRef
Struct that holds a reference to a particular GUID in a global value summary.
ArrayRef< std::unique_ptr< GlobalValueSummary > > getSummaryList() const
GlobalValue::GUID getGUID() const