21 using namespace llvm::bfi_detail;
23 #define DEBUG_TYPE "block-freq"
41 for (
int Digits = 0; Digits < 16; ++Digits)
74 struct DitheringDistributer {
78 DitheringDistributer(Distribution &Dist,
const BlockMass &Mass);
85 DitheringDistributer::DitheringDistributer(Distribution &Dist,
88 RemWeight = Dist.Total;
93 assert(Weight &&
"invalid weight");
94 assert(Weight <= RemWeight);
104 Weight::DistType
Type) {
105 assert(Amount &&
"invalid weight of 0");
106 uint64_t NewTotal = Total + Amount;
109 bool IsOverflow = NewTotal < Total;
110 assert(!(DidOverflow && IsOverflow) &&
"unexpected repeated overflow");
111 DidOverflow |= IsOverflow;
117 Weights.push_back(Weight(Type, Node, Amount));
121 assert(OtherW.TargetNode.isValid());
126 assert(W.Type == OtherW.Type);
127 assert(W.TargetNode == OtherW.TargetNode);
128 assert(OtherW.Amount &&
"Expected non-zero weight");
129 if (W.Amount > W.Amount + OtherW.Amount)
131 W.Amount = UINT64_MAX;
133 W.Amount += OtherW.Amount;
138 std::sort(Weights.begin(), Weights.end(),
140 const Weight &R) {
return L.TargetNode < R.TargetNode; });
143 WeightList::iterator O = Weights.begin();
144 for (WeightList::const_iterator
I = O, L = O,
E = Weights.end();
I !=
E;
149 for (++L; L !=
E && I->TargetNode == L->TargetNode; ++
L)
154 Weights.erase(O, Weights.end());
161 for (
const Weight &W : Weights)
165 if (Weights.size() == Combined.size())
170 Weights.reserve(Combined.size());
171 for (
const auto &
I : Combined)
172 Weights.push_back(
I.second);
177 if (Weights.size() > 128) {
190 return (N >> Shift) + (UINT64_C(1) & N >> (Shift - 1));
193 void Distribution::normalize() {
199 if (Weights.size() > 1)
203 if (Weights.size() == 1) {
205 Weights.front().Amount = 1;
216 else if (Total > UINT32_MAX)
223 assert(Total == std::accumulate(Weights.begin(), Weights.end(), UINT64_C(0),
224 [](uint64_t Sum,
const Weight &W) {
225 return Sum + W.Amount;
227 "Expected total to be correct");
236 for (
Weight &W : Weights) {
239 assert(W.TargetNode.isValid());
241 assert(W.Amount <= UINT32_MAX);
246 assert(Total <= UINT32_MAX);
252 std::vector<FrequencyData>().
swap(Freqs);
253 std::vector<WorkingData>().
swap(Working);
261 std::vector<FrequencyData> SavedFreqs(std::move(BFI.
Freqs));
263 BFI.
Freqs = std::move(SavedFreqs);
274 auto isLoopHeader = [&OuterLoop](
const BlockNode &Node) {
275 return OuterLoop && OuterLoop->
isHeader(Node);
281 auto debugSuccessor = [&](
const char *
Type) {
283 <<
" [" << Type <<
"] weight = " << Weight;
284 if (!isLoopHeader(Resolved))
286 if (Resolved != Succ)
290 (void)debugSuccessor;
293 if (isLoopHeader(Resolved)) {
294 DEBUG(debugSuccessor(
"backedge"));
299 if (Working[Resolved.
Index].getContainingLoop() != OuterLoop) {
300 DEBUG(debugSuccessor(
" exit "));
301 Dist.
addExit(Resolved, Weight);
305 if (Resolved < Pred) {
306 if (!isLoopHeader(Pred)) {
309 "unhandled irreducible control flow");
312 DEBUG(debugSuccessor(
"abort!!!"));
320 "unhandled irreducible control flow");
323 DEBUG(debugSuccessor(
" local "));
331 for (
const auto &
I : Loop.
Exits)
332 if (!addToDist(Dist, OuterLoop, Loop.
getHeader(),
I.first,
343 DEBUG(
dbgs() <<
"compute-loop-scale: " << getLoopName(Loop) <<
"\n");
354 const Scaled64 InfiniteLoopScale(1, 12);
360 TotalBackedgeMass += Mass;
370 <<
" - " << TotalBackedgeMass <<
")\n"
371 <<
" - scale = " << Loop.
Scale <<
"\n");
376 DEBUG(
dbgs() <<
"packaging-loop: " << getLoopName(Loop) <<
"\n");
380 if (
auto *Loop = Working[M.
Index].getPackagedLoop())
389 const DitheringDistributer &
D,
const BlockNode &
T,
391 dbgs() <<
" => assign " << M <<
" (" << D.RemMass <<
")";
393 dbgs() <<
" [" << Desc <<
"]";
404 DEBUG(
dbgs() <<
" => mass: " << Mass <<
"\n");
407 DitheringDistributer
D(Dist, Mass);
419 assert(OuterLoop &&
"backedge or exit outside of loop");
422 if (W.
Type == Weight::Backedge) {
436 const Scaled64 &Min,
const Scaled64 &Max) {
443 const unsigned MaxBits = 64;
444 const unsigned SpreadBits = (Max / Min).lg();
445 Scaled64 ScalingFactor;
446 if (SpreadBits <= MaxBits - 3) {
449 ScalingFactor = Min.inverse();
455 ScalingFactor = Scaled64(1, MaxBits) / Max;
459 DEBUG(
dbgs() <<
"float-to-int: min = " << Min <<
", max = " << Max
460 <<
", factor = " << ScalingFactor <<
"\n");
461 for (
size_t Index = 0; Index < BFI.
Freqs.size(); ++Index) {
462 Scaled64 Scaled = BFI.
Freqs[Index].Scaled * ScalingFactor;
463 BFI.
Freqs[Index].Integer = std::max(UINT64_C(1), Scaled.toInt<uint64_t>());
465 << BFI.
Freqs[Index].Scaled <<
", scaled = " << Scaled
466 <<
", int = " << BFI.
Freqs[Index].Integer <<
"\n");
476 <<
": mass = " << Loop.Mass <<
", scale = " << Loop.Scale
478 Loop.Scale *= Loop.Mass.toScaled();
479 Loop.IsPackaged =
false;
480 DEBUG(
dbgs() <<
" => combined-scale = " << Loop.Scale <<
"\n");
485 for (
const BlockNode &
N : Loop.Nodes) {
486 const auto &Working = BFI.
Working[N.Index];
487 Scaled64 &
F = Working.isAPackage() ? Working.getPackagedLoop()->Scale
488 : BFI.
Freqs[N.Index].Scaled;
489 Scaled64 New = Loop.Scale *
F;
498 for (
size_t Index = 0; Index < Working.size(); ++Index)
499 Freqs[Index].Scaled = Working[Index].Mass.
toScaled();
508 auto Min = Scaled64::getLargest();
509 auto Max = Scaled64::getZero();
510 for (
size_t Index = 0; Index < Working.size(); ++Index) {
512 Min =
std::min(Min, Freqs[Index].Scaled);
513 Max = std::max(Max, Freqs[Index].Scaled);
530 return Freqs[Node.
Index].Integer;
536 return getProfileCountFromFreq(F, getBlockFreq(Node).getFrequency());
541 uint64_t Freq)
const {
546 APInt BlockCount(128, EntryCount.getValue());
547 APInt BlockFreq(128, Freq);
548 APInt EntryFreq(128, getEntryFreq());
549 BlockCount *= BlockFreq;
550 BlockCount = BlockCount.udiv(EntryFreq);
551 return BlockCount.getLimitedValue();
557 return Scaled64::getZero();
558 return Freqs[Node.
Index].Scaled;
564 assert(Node.
Index < Freqs.size() &&
"Expected legal index");
565 Freqs[Node.
Index].Integer = Freq;
570 return std::string();
581 return OS << getFloatingBlockFreq(Node);
590 return OS << Block / Entry;
596 for (
auto N : OuterLoop.
Nodes)
616 if (OuterLoop && OuterLoop->
isHeader(Succ))
622 Irr.
Edges.push_back(&SuccIrr);
623 SuccIrr.
Edges.push_front(&Irr);
647 const std::vector<const IrreducibleGraph::IrrNode *> &SCC,
653 for (
const auto *
I : SCC)
657 auto &Irr = *
I->first;
658 for (
const auto *
P :
make_range(Irr.pred_begin(), Irr.pred_end())) {
664 Headers.push_back(Irr.Node);
669 assert(Headers.size() >= 2 &&
670 "Expected irreducible CFG; -loop-info is likely invalid");
671 if (Headers.size() == InSCC.
size()) {
673 std::sort(Headers.begin(), Headers.end());
678 for (
const auto &
I : InSCC) {
683 auto &Irr = *
I.first;
684 for (
const auto *
P :
make_range(Irr.pred_begin(), Irr.pred_end())) {
686 if (
P->Node < Irr.Node)
695 Headers.push_back(Irr.Node);
699 if (Headers.back() == Irr.Node)
704 Others.push_back(Irr.Node);
707 std::sort(Headers.begin(), Headers.end());
708 std::sort(Others.begin(), Others.end());
713 LoopData *OuterLoop, std::list<LoopData>::iterator Insert,
714 const std::vector<const IrreducibleGraph::IrrNode *> &SCC) {
722 auto Loop = BFI.
Loops.emplace(Insert, OuterLoop, Headers.begin(),
723 Headers.end(), Others.begin(), Others.end());
726 for (
const auto &
N :
Loop->Nodes)
727 if (BFI.
Working[
N.Index].isLoopHeader())
736 std::list<LoopData>::iterator Insert) {
737 assert((OuterLoop ==
nullptr) == (Insert ==
Loops.begin()));
738 auto Prev = OuterLoop ? std::prev(Insert) :
Loops.end();
760 if (!Working[
I->Index].isPackaged())
777 DEBUG(
dbgs() <<
"adjust-loop-header-mass:\n");
779 auto &HeaderNode = Loop.
Nodes[
H];
781 DEBUG(
dbgs() <<
" - Add back edge mass for node "
782 <<
getBlockName(HeaderNode) <<
": " << BackedgeMass <<
"\n");
783 if (BackedgeMass.getMass() > 0)
784 Dist.
addLocal(HeaderNode, BackedgeMass.getMass());
786 DEBUG(
dbgs() <<
" Nothing added. Back edge mass is zero\n");
789 DitheringDistributer
D(Dist, LoopMass);
791 DEBUG(
dbgs() <<
" Distribute loop mass " << LoopMass
792 <<
" to headers using above weights\n");
void push_back(const T &Elt)
static void combineWeights(WeightList &Weights)
void addNodesInLoop(const BFIBase::LoopData &OuterLoop)
This builds on the llvm/ADT/GraphTraits.h file to find the strongly connected components (SCCs) of a ...
static uint64_t shiftRightAndRound(uint64_t N, int Shift)
static void combineWeightsBySorting(WeightList &Weights)
bool IsPackaged
Whether this has been packaged.
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds...
static char getHexDigit(int N)
MachineInstrBuilder MachineInstrBuilder &DefMI const MCInstrDesc & Desc
Stats about a block itself.
static NodeRef getEntryNode(const GraphT &G)
void addLocal(const BlockNode &Node, uint64_t Amount)
static ChildIteratorType child_end(NodeRef N)
SmallDenseMap< uint32_t, IrrNode *, 4 > Lookup
static void cleanup(BlockFrequencyInfoImplBase &BFI)
Clear all memory not needed downstream.
static void combineWeight(Weight &W, const Weight &OtherW)
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
uint32_t NumHeaders
Number of headers.
std::size_t countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the most significant bit to the least stopping at the first 1...
void setBlockFreq(const BlockNode &Node, uint64_t Freq)
GraphT::IrrNode::iterator ChildIteratorType
raw_ostream & print(raw_ostream &OS) const
ScaledNumber< uint64_t > toScaled() const
Convert to scaled number.
BlockFrequency getBlockFreq(const BlockNode &Node) const
static GCRegistry::Add< StatepointGC > D("statepoint-example","an example strategy for statepoint")
void addEdge(IrrNode &Irr, const BlockNode &Succ, const BFIBase::LoopData *OuterLoop)
void adjustLoopHeaderMass(LoopData &Loop)
Adjust the mass of all headers in an irreducible loop.
std::vector< NodeAddr< NodeBase * > > NodeList
static void debugAssign(const BlockFrequencyInfoImplBase &BFI, const DitheringDistributer &D, const BlockNode &T, const BlockMass &M, const char *Desc)
scc_iterator< T > scc_begin(const T &G)
Construct the begin iterator for a deduced graph type T.
Graph of irreducible control flow.
static bool add(uint64_t *dest, const uint64_t *x, const uint64_t *y, unsigned len)
This function adds the integer array x to the integer array Y and places the result in dest...
ScaledNumber inverse() const
Optional< uint64_t > getEntryCount() const
Get the entry count for this function.
Maximum length of the test input libFuzzer tries to guess a good value based on the corpus and reports it always prefer smaller inputs during the corpus shuffle When libFuzzer itself reports a bug this exit code will be used If indicates the maximal total time in seconds to run the fuzzer minimizes the provided crash input Use with etc Experimental Use value profile to guide fuzzing Number of simultaneous worker processes to run the jobs If min(jobs, NumberOfCpuCores()/2)\" is used.") FUZZER_FLAG_INT(reload
void computeLoopScale(LoopData &Loop)
Compute the loop scale for a loop.
void addBackedge(const BlockNode &Node, uint64_t Amount)
static GCRegistry::Add< CoreCLRGC > E("coreclr","CoreCLR-compatible GC")
Optional< uint64_t > getBlockProfileCount(const Function &F, const BlockNode &Node) const
static ChildIteratorType child_begin(NodeRef N)
const GraphT::IrrNode * NodeRef
bool addToDist(Distribution &Dist, const LoopData *OuterLoop, const BlockNode &Pred, const BlockNode &Succ, uint64_t Weight)
Add an edge to the distribution.
std::vector< FrequencyData > Freqs
Data about each block. This is used downstream.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
static void convertFloatingToInteger(BlockFrequencyInfoImplBase &BFI, const Scaled64 &Min, const Scaled64 &Max)
iterator_range< std::list< LoopData >::iterator > analyzeIrreducible(const bfi_detail::IrreducibleGraph &G, LoopData *OuterLoop, std::list< LoopData >::iterator Insert)
Analyze irreducible SCCs.
The instances of the Type class are immutable: once they are created, they are never changed...
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
std::string getLoopName(const LoopData &Loop) const
Distribution of unscaled probability weight.
void addNode(const BlockNode &Node)
Scaled64 getFloatingBlockFreq(const BlockNode &Node) const
raw_ostream & printBlockFreq(raw_ostream &OS, const BlockNode &Node) const
static void unwrapLoop(BlockFrequencyInfoImplBase &BFI, LoopData &Loop)
Unwrap a loop package.
bool isHeader(const BlockNode &Node) const
void clear()
Clear all memory.
void addNodesInFunction()
uint64_t NextPowerOf2(uint64_t A)
NextPowerOf2 - Returns the next power of two (in 64-bits) that is strictly greater than A...
iterator erase(const_iterator CI)
void addExit(const BlockNode &Node, uint64_t Amount)
iterator succ_begin() const
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
ExitMap Exits
Successor edges (and weights).
virtual std::string getBlockName(const BlockNode &Node) const
static void findIrreducibleHeaders(const BlockFrequencyInfoImplBase &BFI, const IrreducibleGraph &G, const std::vector< const IrreducibleGraph::IrrNode * > &SCC, LoopData::NodeList &Headers, LoopData::NodeList &Others)
Find extra irreducible headers.
static BlockMass getEmpty()
std::list< LoopData > Loops
Indexed information about loops.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
size_type count(const KeyT &Val) const
Return 1 if the specified key is in the map, 0 otherwise.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
static void createIrreducibleLoop(BlockFrequencyInfoImplBase &BFI, const IrreducibleGraph &G, LoopData *OuterLoop, std::list< LoopData >::iterator Insert, const std::vector< const IrreducibleGraph::IrrNode * > &SCC)
A range adaptor for a pair of iterators.
std::string getBlockName(const BlockT *BB)
Get the name of a MachineBasicBlock.
Class for arbitrary precision integers.
void updateLoopWithIrreducible(LoopData &OuterLoop)
Update a loop after packaging irreducible SCCs inside of it.
bfi_detail::IrreducibleGraph GraphT
Unscaled probability weight.
static void combineWeightsByHashing(WeightList &Weights)
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Represents a single loop in the control flow graph.
void packageLoop(LoopData &Loop)
Package up a loop.
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Optional< uint64_t > getProfileCountFromFreq(const Function &F, uint64_t Freq) const
static BlockMass getFull()
bool isIrreducible() const
void distributeMass(const BlockNode &Source, LoopData *OuterLoop, Distribution &Dist)
Distribute mass according to a distribution.
NodeList Nodes
Header and the members of the loop.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
std::vector< WorkingData > Working
Loop data: see initializeLoops().
std::vector< IrrNode > Nodes
HeaderMassList BackedgeMass
Mass returned to each loop header.
This class implements an extremely fast bulk output stream that can only output to a stream...
std::deque< const IrrNode * > Edges
Base class for BlockFrequencyInfoImpl.
bool addLoopSuccessorsToDist(const LoopData *OuterLoop, LoopData &Loop, Distribution &Dist)
Add all edges out of a packaged loop to the distribution.
iterator succ_end() const
void unwrapLoops()
Unwrap loops.
HeaderMassList::difference_type getHeaderIndex(const BlockNode &B)
BlockNode getHeader() const
Representative of a block.
void finalizeMetrics()
Finalize frequency metrics.
WeightList Weights
Individual successor weights.