LLVM 23.0.0git
BranchProbabilityInfo.cpp
Go to the documentation of this file.
1//===- BranchProbabilityInfo.cpp - Branch Probability Analysis ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Loops should be simplified before this analysis.
10//
11//===----------------------------------------------------------------------===//
12
16#include "llvm/ADT/STLExtras.h"
22#include "llvm/IR/Attributes.h"
23#include "llvm/IR/BasicBlock.h"
24#include "llvm/IR/CFG.h"
25#include "llvm/IR/Constants.h"
26#include "llvm/IR/Dominators.h"
27#include "llvm/IR/Function.h"
28#include "llvm/IR/InstrTypes.h"
29#include "llvm/IR/Instruction.h"
31#include "llvm/IR/LLVMContext.h"
32#include "llvm/IR/Metadata.h"
33#include "llvm/IR/PassManager.h"
35#include "llvm/IR/Type.h"
36#include "llvm/IR/Value.h"
38#include "llvm/Pass.h"
42#include "llvm/Support/Debug.h"
44#include <cassert>
45#include <cstdint>
46#include <map>
47#include <utility>
48
49using namespace llvm;
50
51#define DEBUG_TYPE "branch-prob"
52
54 "print-bpi", cl::init(false), cl::Hidden,
55 cl::desc("Print the branch probability info."));
56
58 "print-bpi-func-name", cl::Hidden,
59 cl::desc("The option to specify the name of the function "
60 "whose branch probability info is printed."));
61
63 "Branch Probability Analysis", false, true)
69 "Branch Probability Analysis", false, true)
70
73
75
76// Weights are for internal use only. They are used by heuristics to help to
77// estimate edges' probability. Example:
78//
79// Using "Loop Branch Heuristics" we predict weights of edges for the
80// block BB2.
81// ...
82// |
83// V
84// BB1<-+
85// | |
86// | | (Weight = 124)
87// V |
88// BB2--+
89// |
90// | (Weight = 4)
91// V
92// BB3
93//
94// Probability of the edge BB2->BB1 = 124 / (124 + 4) = 0.96875
95// Probability of the edge BB2->BB3 = 4 / (124 + 4) = 0.03125
96static const uint32_t LBH_TAKEN_WEIGHT = 124;
98
99/// Unreachable-terminating branch taken probability.
100///
101/// This is the probability for a branch being taken to a block that terminates
102/// (eventually) in unreachable. These are predicted as unlikely as possible.
103/// All reachable probability will proportionally share the remaining part.
105
106/// Heuristics and lookup tables for non-loop branches:
107/// Pointer Heuristics (PH)
108static const uint32_t PH_TAKEN_WEIGHT = 20;
109static const uint32_t PH_NONTAKEN_WEIGHT = 12;
110static const BranchProbability
112static const BranchProbability
114
116using ProbabilityTable = std::map<CmpInst::Predicate, ProbabilityList>;
117
118/// Pointer comparisons:
120 {ICmpInst::ICMP_NE, {PtrTakenProb, PtrUntakenProb}}, /// p != q -> Likely
121 {ICmpInst::ICMP_EQ, {PtrUntakenProb, PtrTakenProb}}, /// p == q -> Unlikely
122};
123
124/// Zero Heuristics (ZH)
125static const uint32_t ZH_TAKEN_WEIGHT = 20;
126static const uint32_t ZH_NONTAKEN_WEIGHT = 12;
127static const BranchProbability
129static const BranchProbability
131
132/// Integer compares with 0:
134 {CmpInst::ICMP_EQ, {ZeroUntakenProb, ZeroTakenProb}}, /// X == 0 -> Unlikely
135 {CmpInst::ICMP_NE, {ZeroTakenProb, ZeroUntakenProb}}, /// X != 0 -> Likely
136 {CmpInst::ICMP_SLT, {ZeroUntakenProb, ZeroTakenProb}}, /// X < 0 -> Unlikely
137 {CmpInst::ICMP_SGT, {ZeroTakenProb, ZeroUntakenProb}}, /// X > 0 -> Likely
138};
139
140/// Integer compares with -1:
142 {CmpInst::ICMP_EQ, {ZeroUntakenProb, ZeroTakenProb}}, /// X == -1 -> Unlikely
143 {CmpInst::ICMP_NE, {ZeroTakenProb, ZeroUntakenProb}}, /// X != -1 -> Likely
144 // InstCombine canonicalizes X >= 0 into X > -1
145 {CmpInst::ICMP_SGT, {ZeroTakenProb, ZeroUntakenProb}}, /// X >= 0 -> Likely
146};
147
148/// Integer compares with 1:
150 // InstCombine canonicalizes X <= 0 into X < 1
151 {CmpInst::ICMP_SLT, {ZeroUntakenProb, ZeroTakenProb}}, /// X <= 0 -> Unlikely
152};
153
154/// strcmp and similar functions return zero, negative, or positive, if the
155/// first string is equal, less, or greater than the second. We consider it
156/// likely that the strings are not equal, so a comparison with zero is
157/// probably false, but also a comparison with any other number is also
158/// probably false given that what exactly is returned for nonzero values is
159/// not specified. Any kind of comparison other than equality we know
160/// nothing about.
165
166// Floating-Point Heuristics (FPH)
167static const uint32_t FPH_TAKEN_WEIGHT = 20;
169
170/// This is the probability for an ordered floating point comparison.
171static const uint32_t FPH_ORD_WEIGHT = 1024 * 1024 - 1;
172/// This is the probability for an unordered floating point comparison, it means
173/// one or two of the operands are NaN. Usually it is used to test for an
174/// exceptional case, so the result is unlikely.
175static const uint32_t FPH_UNO_WEIGHT = 1;
176
179static const BranchProbability
181static const BranchProbability
183static const BranchProbability
185
186/// Floating-Point compares:
188 {FCmpInst::FCMP_ORD, {FPOrdTakenProb, FPOrdUntakenProb}}, /// !isnan -> Likely
189 {FCmpInst::FCMP_UNO, {FPOrdUntakenProb, FPOrdTakenProb}}, /// isnan -> Unlikely
190};
191
192/// Set of dedicated "absolute" execution weights for a block. These weights are
193/// meaningful relative to each other and their derivatives only.
194enum class BlockExecWeight : std::uint32_t {
195 /// Special weight used for cases with exact zero probability.
196 ZERO = 0x0,
197 /// Minimal possible non zero weight.
199 /// Weight to an 'unreachable' block.
201 /// Weight to a block containing non returning call.
203 /// Weight to 'unwind' block of an invoke instruction.
205 /// Weight to a 'cold' block. Cold blocks are the ones containing calls marked
206 /// with attribute 'cold'.
207 COLD = 0xffff,
208 /// Default weight is used in cases when there is no dedicated execution
209 /// weight set. It is not propagated through the domination line either.
210 DEFAULT = 0xfffff
211};
212
214 // Record SCC numbers of blocks in the CFG to identify irreducible loops.
215 // FIXME: We could only calculate this if the CFG is known to be irreducible
216 // (perhaps cache this info in LoopInfo if we can easily calculate it there?).
217 int SccNum = 0;
219 ++It, ++SccNum) {
220 // Ignore single-block SCCs since they either aren't loops or LoopInfo will
221 // catch them.
222 const std::vector<const BasicBlock *> &Scc = *It;
223 if (Scc.size() == 1)
224 continue;
225
226 LLVM_DEBUG(dbgs() << "BPI: SCC " << SccNum << ":");
227 for (const auto *BB : Scc) {
228 LLVM_DEBUG(dbgs() << " " << BB->getName());
229 SccNums[BB] = SccNum;
230 calculateSccBlockType(BB, SccNum);
231 }
232 LLVM_DEBUG(dbgs() << "\n");
233 }
234}
235
237 auto SccIt = SccNums.find(BB);
238 if (SccIt == SccNums.end())
239 return -1;
240 return SccIt->second;
241}
242
244 int SccNum, SmallVectorImpl<BasicBlock *> &Enters) const {
245
246 for (auto MapIt : SccBlocks[SccNum]) {
247 const auto *BB = MapIt.first;
248 if (isSCCHeader(BB, SccNum))
249 for (const auto *Pred : predecessors(BB))
250 if (getSCCNum(Pred) != SccNum)
251 Enters.push_back(const_cast<BasicBlock *>(BB));
252 }
253}
254
256 int SccNum, SmallVectorImpl<BasicBlock *> &Exits) const {
257 for (auto MapIt : SccBlocks[SccNum]) {
258 const auto *BB = MapIt.first;
259 if (isSCCExitingBlock(BB, SccNum))
260 for (const auto *Succ : successors(BB))
261 if (getSCCNum(Succ) != SccNum)
262 Exits.push_back(const_cast<BasicBlock *>(Succ));
263 }
264}
265
266uint32_t BranchProbabilityInfo::SccInfo::getSccBlockType(const BasicBlock *BB,
267 int SccNum) const {
268 assert(getSCCNum(BB) == SccNum);
269
270 assert(SccBlocks.size() > static_cast<unsigned>(SccNum) && "Unknown SCC");
271 const auto &SccBlockTypes = SccBlocks[SccNum];
272
273 auto It = SccBlockTypes.find(BB);
274 if (It != SccBlockTypes.end()) {
275 return It->second;
276 }
277 return Inner;
278}
279
280void BranchProbabilityInfo::SccInfo::calculateSccBlockType(const BasicBlock *BB,
281 int SccNum) {
282 assert(getSCCNum(BB) == SccNum);
283 uint32_t BlockType = Inner;
284
285 if (llvm::any_of(predecessors(BB), [&](const BasicBlock *Pred) {
286 // Consider any block that is an entry point to the SCC as
287 // a header.
288 return getSCCNum(Pred) != SccNum;
289 }))
290 BlockType |= Header;
291
292 if (llvm::any_of(successors(BB), [&](const BasicBlock *Succ) {
293 return getSCCNum(Succ) != SccNum;
294 }))
295 BlockType |= Exiting;
296
297 // Lazily compute the set of headers for a given SCC and cache the results
298 // in the SccHeaderMap.
299 if (SccBlocks.size() <= static_cast<unsigned>(SccNum))
300 SccBlocks.resize(SccNum + 1);
301 auto &SccBlockTypes = SccBlocks[SccNum];
302
303 if (BlockType != Inner) {
304 bool IsInserted;
305 std::tie(std::ignore, IsInserted) =
306 SccBlockTypes.insert(std::make_pair(BB, BlockType));
307 assert(IsInserted && "Duplicated block in SCC");
308 }
309}
310
311BranchProbabilityInfo::LoopBlock::LoopBlock(const BasicBlock *BB,
312 const LoopInfo &LI,
313 const SccInfo &SccI)
314 : BB(BB) {
315 LD.first = LI.getLoopFor(BB);
316 if (!LD.first) {
317 LD.second = SccI.getSCCNum(BB);
318 }
319}
320
321bool BranchProbabilityInfo::isLoopEnteringEdge(const LoopEdge &Edge) const {
322 const auto &SrcBlock = Edge.first;
323 const auto &DstBlock = Edge.second;
324 return (DstBlock.getLoop() &&
325 !DstBlock.getLoop()->contains(SrcBlock.getLoop())) ||
326 // Assume that SCCs can't be nested.
327 (DstBlock.getSccNum() != -1 &&
328 SrcBlock.getSccNum() != DstBlock.getSccNum());
329}
330
331bool BranchProbabilityInfo::isLoopExitingEdge(const LoopEdge &Edge) const {
332 return isLoopEnteringEdge({Edge.second, Edge.first});
333}
334
335bool BranchProbabilityInfo::isLoopEnteringExitingEdge(
336 const LoopEdge &Edge) const {
337 return isLoopEnteringEdge(Edge) || isLoopExitingEdge(Edge);
338}
339
340bool BranchProbabilityInfo::isLoopBackEdge(const LoopEdge &Edge) const {
341 const auto &SrcBlock = Edge.first;
342 const auto &DstBlock = Edge.second;
343 return SrcBlock.belongsToSameLoop(DstBlock) &&
344 ((DstBlock.getLoop() &&
345 DstBlock.getLoop()->getHeader() == DstBlock.getBlock()) ||
346 (DstBlock.getSccNum() != -1 &&
347 SccI->isSCCHeader(DstBlock.getBlock(), DstBlock.getSccNum())));
348}
349
350void BranchProbabilityInfo::getLoopEnterBlocks(
351 const LoopBlock &LB, SmallVectorImpl<BasicBlock *> &Enters) const {
352 if (LB.getLoop()) {
353 auto *Header = LB.getLoop()->getHeader();
354 Enters.append(pred_begin(Header), pred_end(Header));
355 } else {
356 assert(LB.getSccNum() != -1 && "LB doesn't belong to any loop?");
357 SccI->getSccEnterBlocks(LB.getSccNum(), Enters);
358 }
359}
360
361void BranchProbabilityInfo::getLoopExitBlocks(
362 const LoopBlock &LB, SmallVectorImpl<BasicBlock *> &Exits) const {
363 if (LB.getLoop()) {
364 LB.getLoop()->getExitBlocks(Exits);
365 } else {
366 assert(LB.getSccNum() != -1 && "LB doesn't belong to any loop?");
367 SccI->getSccExitBlocks(LB.getSccNum(), Exits);
368 }
369}
370
371// Propagate existing explicit probabilities from either profile data or
372// 'expect' intrinsic processing. Examine metadata against unreachable
373// heuristic. The probability of the edge coming to unreachable block is
374// set to min of metadata and unreachable heuristic.
375bool BranchProbabilityInfo::calcMetadataWeights(const BasicBlock *BB) {
376 const Instruction *TI = BB->getTerminator();
377 assert(TI->getNumSuccessors() > 1 && "expected more than one successor!");
378 if (!(isa<BranchInst>(TI) || isa<SwitchInst>(TI) || isa<IndirectBrInst>(TI) ||
380 return false;
381
382 MDNode *WeightsNode = getValidBranchWeightMDNode(*TI);
383 if (!WeightsNode)
384 return false;
385
386 // Check that the number of successors is manageable.
387 assert(TI->getNumSuccessors() < UINT32_MAX && "Too many successors");
388
389 // Build up the final weights that will be used in a temporary buffer.
390 // Compute the sum of all weights to later decide whether they need to
391 // be scaled to fit in 32 bits.
392 uint64_t WeightSum = 0;
394 SmallVector<unsigned, 2> UnreachableIdxs;
395 SmallVector<unsigned, 2> ReachableIdxs;
396
397 extractBranchWeights(WeightsNode, Weights);
398 for (unsigned I = 0, E = Weights.size(); I != E; ++I) {
399 WeightSum += Weights[I];
400 const LoopBlock SrcLoopBB = getLoopBlock(BB);
401 const LoopBlock DstLoopBB = getLoopBlock(TI->getSuccessor(I));
402 auto EstimatedWeight = getEstimatedEdgeWeight({SrcLoopBB, DstLoopBB});
403 if (EstimatedWeight &&
404 *EstimatedWeight <= static_cast<uint32_t>(BlockExecWeight::UNREACHABLE))
405 UnreachableIdxs.push_back(I);
406 else
407 ReachableIdxs.push_back(I);
408 }
409 assert(Weights.size() == TI->getNumSuccessors() && "Checked above");
410
411 // If the sum of weights does not fit in 32 bits, scale every weight down
412 // accordingly.
413 uint64_t ScalingFactor =
414 (WeightSum > UINT32_MAX) ? WeightSum / UINT32_MAX + 1 : 1;
415
416 if (ScalingFactor > 1) {
417 WeightSum = 0;
418 for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) {
419 Weights[I] /= ScalingFactor;
420 WeightSum += Weights[I];
421 }
422 }
423 assert(WeightSum <= UINT32_MAX &&
424 "Expected weights to scale down to 32 bits");
425
426 if (WeightSum == 0 || ReachableIdxs.size() == 0) {
427 for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I)
428 Weights[I] = 1;
429 WeightSum = TI->getNumSuccessors();
430 }
431
432 // Set the probability.
434 for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I)
435 BP.push_back({ Weights[I], static_cast<uint32_t>(WeightSum) });
436
437 // Examine the metadata against unreachable heuristic.
438 // If the unreachable heuristic is more strong then we use it for this edge.
439 if (UnreachableIdxs.size() == 0 || ReachableIdxs.size() == 0) {
440 setEdgeProbability(BB, BP);
441 return true;
442 }
443
444 auto UnreachableProb = UR_TAKEN_PROB;
445 for (auto I : UnreachableIdxs)
446 if (UnreachableProb < BP[I]) {
447 BP[I] = UnreachableProb;
448 }
449
450 // Sum of all edge probabilities must be 1.0. If we modified the probability
451 // of some edges then we must distribute the introduced difference over the
452 // reachable blocks.
453 //
454 // Proportional distribution: the relation between probabilities of the
455 // reachable edges is kept unchanged. That is for any reachable edges i and j:
456 // newBP[i] / newBP[j] == oldBP[i] / oldBP[j] =>
457 // newBP[i] / oldBP[i] == newBP[j] / oldBP[j] == K
458 // Where K is independent of i,j.
459 // newBP[i] == oldBP[i] * K
460 // We need to find K.
461 // Make sum of all reachables of the left and right parts:
462 // sum_of_reachable(newBP) == K * sum_of_reachable(oldBP)
463 // Sum of newBP must be equal to 1.0:
464 // sum_of_reachable(newBP) + sum_of_unreachable(newBP) == 1.0 =>
465 // sum_of_reachable(newBP) = 1.0 - sum_of_unreachable(newBP)
466 // Where sum_of_unreachable(newBP) is what has been just changed.
467 // Finally:
468 // K == sum_of_reachable(newBP) / sum_of_reachable(oldBP) =>
469 // K == (1.0 - sum_of_unreachable(newBP)) / sum_of_reachable(oldBP)
470 BranchProbability NewUnreachableSum = BranchProbability::getZero();
471 for (auto I : UnreachableIdxs)
472 NewUnreachableSum += BP[I];
473
474 BranchProbability NewReachableSum =
475 BranchProbability::getOne() - NewUnreachableSum;
476
477 BranchProbability OldReachableSum = BranchProbability::getZero();
478 for (auto I : ReachableIdxs)
479 OldReachableSum += BP[I];
480
481 if (OldReachableSum != NewReachableSum) { // Anything to dsitribute?
482 if (OldReachableSum.isZero()) {
483 // If all oldBP[i] are zeroes then the proportional distribution results
484 // in all zero probabilities and the error stays big. In this case we
485 // evenly spread NewReachableSum over the reachable edges.
486 BranchProbability PerEdge = NewReachableSum / ReachableIdxs.size();
487 for (auto I : ReachableIdxs)
488 BP[I] = PerEdge;
489 } else {
490 for (auto I : ReachableIdxs) {
491 // We use uint64_t to avoid double rounding error of the following
492 // calculation: BP[i] = BP[i] * NewReachableSum / OldReachableSum
493 // The formula is taken from the private constructor
494 // BranchProbability(uint32_t Numerator, uint32_t Denominator)
495 uint64_t Mul = static_cast<uint64_t>(NewReachableSum.getNumerator()) *
496 BP[I].getNumerator();
497 uint32_t Div = static_cast<uint32_t>(
498 divideNearest(Mul, OldReachableSum.getNumerator()));
499 BP[I] = BranchProbability::getRaw(Div);
500 }
501 }
502 }
503
504 setEdgeProbability(BB, BP);
505
506 return true;
507}
508
509// Calculate Edge Weights using "Pointer Heuristics". Predict a comparison
510// between two pointer or pointer and NULL will fail.
511bool BranchProbabilityInfo::calcPointerHeuristics(const BasicBlock *BB) {
512 const BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
513 if (!BI || !BI->isConditional())
514 return false;
515
516 Value *Cond = BI->getCondition();
517 ICmpInst *CI = dyn_cast<ICmpInst>(Cond);
518 if (!CI || !CI->isEquality())
519 return false;
520
521 Value *LHS = CI->getOperand(0);
522
523 if (!LHS->getType()->isPointerTy())
524 return false;
525
526 assert(CI->getOperand(1)->getType()->isPointerTy());
527
528 auto Search = PointerTable.find(CI->getPredicate());
529 if (Search == PointerTable.end())
530 return false;
531 setEdgeProbability(BB, Search->second);
532 return true;
533}
534
535// Compute the unlikely successors to the block BB in the loop L, specifically
536// those that are unlikely because this is a loop, and add them to the
537// UnlikelyBlocks set.
538static void
540 SmallPtrSetImpl<const BasicBlock*> &UnlikelyBlocks) {
541 // Sometimes in a loop we have a branch whose condition is made false by
542 // taking it. This is typically something like
543 // int n = 0;
544 // while (...) {
545 // if (++n >= MAX) {
546 // n = 0;
547 // }
548 // }
549 // In this sort of situation taking the branch means that at the very least it
550 // won't be taken again in the next iteration of the loop, so we should
551 // consider it less likely than a typical branch.
552 //
553 // We detect this by looking back through the graph of PHI nodes that sets the
554 // value that the condition depends on, and seeing if we can reach a successor
555 // block which can be determined to make the condition false.
556 //
557 // FIXME: We currently consider unlikely blocks to be half as likely as other
558 // blocks, but if we consider the example above the likelyhood is actually
559 // 1/MAX. We could therefore be more precise in how unlikely we consider
560 // blocks to be, but it would require more careful examination of the form
561 // of the comparison expression.
563 if (!BI || !BI->isConditional())
564 return;
565
566 // Check if the branch is based on an instruction compared with a constant
568 if (!CI || !isa<Instruction>(CI->getOperand(0)) ||
569 !isa<Constant>(CI->getOperand(1)))
570 return;
571
572 // Either the instruction must be a PHI, or a chain of operations involving
573 // constants that ends in a PHI which we can then collapse into a single value
574 // if the PHI value is known.
576 PHINode *CmpPHI = dyn_cast<PHINode>(CmpLHS);
577 Constant *CmpConst = dyn_cast<Constant>(CI->getOperand(1));
578 // Collect the instructions until we hit a PHI
580 while (!CmpPHI && CmpLHS && isa<BinaryOperator>(CmpLHS) &&
581 isa<Constant>(CmpLHS->getOperand(1))) {
582 // Stop if the chain extends outside of the loop
583 if (!L->contains(CmpLHS))
584 return;
585 InstChain.push_back(cast<BinaryOperator>(CmpLHS));
586 CmpLHS = dyn_cast<Instruction>(CmpLHS->getOperand(0));
587 if (CmpLHS)
588 CmpPHI = dyn_cast<PHINode>(CmpLHS);
589 }
590 if (!CmpPHI || !L->contains(CmpPHI))
591 return;
592
593 // Trace the phi node to find all values that come from successors of BB
594 SmallPtrSet<PHINode*, 8> VisitedInsts;
596 WorkList.push_back(CmpPHI);
597 VisitedInsts.insert(CmpPHI);
598 while (!WorkList.empty()) {
599 PHINode *P = WorkList.pop_back_val();
600 for (BasicBlock *B : P->blocks()) {
601 // Skip blocks that aren't part of the loop
602 if (!L->contains(B))
603 continue;
604 Value *V = P->getIncomingValueForBlock(B);
605 // If the source is a PHI add it to the work list if we haven't
606 // already visited it.
607 if (PHINode *PN = dyn_cast<PHINode>(V)) {
608 if (VisitedInsts.insert(PN).second)
609 WorkList.push_back(PN);
610 continue;
611 }
612 // If this incoming value is a constant and B is a successor of BB, then
613 // we can constant-evaluate the compare to see if it makes the branch be
614 // taken or not.
615 Constant *CmpLHSConst = dyn_cast<Constant>(V);
616 if (!CmpLHSConst || !llvm::is_contained(successors(BB), B))
617 continue;
618 // First collapse InstChain
619 const DataLayout &DL = BB->getDataLayout();
620 for (Instruction *I : llvm::reverse(InstChain)) {
621 CmpLHSConst = ConstantFoldBinaryOpOperands(
622 I->getOpcode(), CmpLHSConst, cast<Constant>(I->getOperand(1)), DL);
623 if (!CmpLHSConst)
624 break;
625 }
626 if (!CmpLHSConst)
627 continue;
628 // Now constant-evaluate the compare
630 CI->getPredicate(), CmpLHSConst, CmpConst, DL);
631 // If the result means we don't branch to the block then that block is
632 // unlikely.
633 if (Result && ((Result->isNullValue() && B == BI->getSuccessor(0)) ||
634 (Result->isOneValue() && B == BI->getSuccessor(1))))
635 UnlikelyBlocks.insert(B);
636 }
637 }
638}
639
640std::optional<uint32_t>
641BranchProbabilityInfo::getEstimatedBlockWeight(const BasicBlock *BB) const {
642 auto WeightIt = EstimatedBlockWeight.find(BB);
643 if (WeightIt == EstimatedBlockWeight.end())
644 return std::nullopt;
645 return WeightIt->second;
646}
647
648std::optional<uint32_t>
649BranchProbabilityInfo::getEstimatedLoopWeight(const LoopData &L) const {
650 auto WeightIt = EstimatedLoopWeight.find(L);
651 if (WeightIt == EstimatedLoopWeight.end())
652 return std::nullopt;
653 return WeightIt->second;
654}
655
656std::optional<uint32_t>
657BranchProbabilityInfo::getEstimatedEdgeWeight(const LoopEdge &Edge) const {
658 // For edges entering a loop take weight of a loop rather than an individual
659 // block in the loop.
660 return isLoopEnteringEdge(Edge)
661 ? getEstimatedLoopWeight(Edge.second.getLoopData())
662 : getEstimatedBlockWeight(Edge.second.getBlock());
663}
664
665template <class IterT>
666std::optional<uint32_t> BranchProbabilityInfo::getMaxEstimatedEdgeWeight(
667 const LoopBlock &SrcLoopBB, iterator_range<IterT> Successors) const {
668 std::optional<uint32_t> MaxWeight;
669 for (const BasicBlock *DstBB : Successors) {
670 const LoopBlock DstLoopBB = getLoopBlock(DstBB);
671 auto Weight = getEstimatedEdgeWeight({SrcLoopBB, DstLoopBB});
672
673 if (!Weight)
674 return std::nullopt;
675
676 if (!MaxWeight || *MaxWeight < *Weight)
677 MaxWeight = Weight;
678 }
679
680 return MaxWeight;
681}
682
683// Updates \p LoopBB's weight and returns true. If \p LoopBB has already
684// an associated weight it is unchanged and false is returned.
685//
686// Please note by the algorithm the weight is not expected to change once set
687// thus 'false' status is used to track visited blocks.
688bool BranchProbabilityInfo::updateEstimatedBlockWeight(
689 LoopBlock &LoopBB, uint32_t BBWeight,
690 SmallVectorImpl<BasicBlock *> &BlockWorkList,
691 SmallVectorImpl<LoopBlock> &LoopWorkList) {
692 BasicBlock *BB = LoopBB.getBlock();
693
694 // In general, weight is assigned to a block when it has final value and
695 // can't/shouldn't be changed. However, there are cases when a block
696 // inherently has several (possibly "contradicting") weights. For example,
697 // "unwind" block may also contain "cold" call. In that case the first
698 // set weight is favored and all consequent weights are ignored.
699 if (!EstimatedBlockWeight.insert({BB, BBWeight}).second)
700 return false;
701
702 for (BasicBlock *PredBlock : predecessors(BB)) {
703 LoopBlock PredLoop = getLoopBlock(PredBlock);
704 // Add affected block/loop to a working list.
705 if (isLoopExitingEdge({PredLoop, LoopBB})) {
706 if (!EstimatedLoopWeight.count(PredLoop.getLoopData()))
707 LoopWorkList.push_back(PredLoop);
708 } else if (!EstimatedBlockWeight.count(PredBlock))
709 BlockWorkList.push_back(PredBlock);
710 }
711 return true;
712}
713
714// Starting from \p BB traverse through dominator blocks and assign \p BBWeight
715// to all such blocks that are post dominated by \BB. In other words to all
716// blocks that the one is executed if and only if another one is executed.
717// Importantly, we skip loops here for two reasons. First weights of blocks in
718// a loop should be scaled by trip count (yet possibly unknown). Second there is
719// no any value in doing that because that doesn't give any additional
720// information regarding distribution of probabilities inside the loop.
721// Exception is loop 'enter' and 'exit' edges that are handled in a special way
722// at calcEstimatedHeuristics.
723//
724// In addition, \p WorkList is populated with basic blocks if at leas one
725// successor has updated estimated weight.
726void BranchProbabilityInfo::propagateEstimatedBlockWeight(
727 const LoopBlock &LoopBB, DominatorTree *DT, PostDominatorTree *PDT,
728 uint32_t BBWeight, SmallVectorImpl<BasicBlock *> &BlockWorkList,
729 SmallVectorImpl<LoopBlock> &LoopWorkList) {
730 const BasicBlock *BB = LoopBB.getBlock();
731 const auto *DTStartNode = DT->getNode(BB);
732 const auto *PDTStartNode = PDT->getNode(BB);
733
734 // TODO: Consider propagating weight down the domination line as well.
735 for (const auto *DTNode = DTStartNode; DTNode != nullptr;
736 DTNode = DTNode->getIDom()) {
737 auto *DomBB = DTNode->getBlock();
738 // Consider blocks which lie on one 'line'.
739 if (!PDT->dominates(PDTStartNode, PDT->getNode(DomBB)))
740 // If BB doesn't post dominate DomBB it will not post dominate dominators
741 // of DomBB as well.
742 break;
743
744 LoopBlock DomLoopBB = getLoopBlock(DomBB);
745 const LoopEdge Edge{DomLoopBB, LoopBB};
746 // Don't propagate weight to blocks belonging to different loops.
747 if (!isLoopEnteringExitingEdge(Edge)) {
748 if (!updateEstimatedBlockWeight(DomLoopBB, BBWeight, BlockWorkList,
749 LoopWorkList))
750 // If DomBB has weight set then all it's predecessors are already
751 // processed (since we propagate weight up to the top of IR each time).
752 break;
753 } else if (isLoopExitingEdge(Edge)) {
754 LoopWorkList.push_back(DomLoopBB);
755 }
756 }
757}
758
759std::optional<uint32_t>
760BranchProbabilityInfo::getInitialEstimatedBlockWeight(const BasicBlock *BB) {
761 // Returns true if \p BB has call marked with "NoReturn" attribute.
762 auto hasNoReturn = [&](const BasicBlock *BB) {
763 for (const auto &I : reverse(*BB))
764 if (const CallInst *CI = dyn_cast<CallInst>(&I))
765 if (CI->hasFnAttr(Attribute::NoReturn))
766 return true;
767
768 return false;
769 };
770
771 // Important note regarding the order of checks. They are ordered by weight
772 // from lowest to highest. Doing that allows to avoid "unstable" results
773 // when several conditions heuristics can be applied simultaneously.
775 // If this block is terminated by a call to
776 // @llvm.experimental.deoptimize then treat it like an unreachable
777 // since it is expected to practically never execute.
778 // TODO: Should we actually treat as never returning call?
780 return hasNoReturn(BB)
781 ? static_cast<uint32_t>(BlockExecWeight::NORETURN)
782 : static_cast<uint32_t>(BlockExecWeight::UNREACHABLE);
783
784 // Check if the block is an exception handling block.
785 if (BB->isEHPad())
786 return static_cast<uint32_t>(BlockExecWeight::UNWIND);
787
788 // Check if the block contains 'cold' call.
789 for (const auto &I : *BB)
790 if (const CallInst *CI = dyn_cast<CallInst>(&I))
791 if (CI->hasFnAttr(Attribute::Cold))
792 return static_cast<uint32_t>(BlockExecWeight::COLD);
793
794 return std::nullopt;
795}
796
797// Does RPO traversal over all blocks in \p F and assigns weights to
798// 'unreachable', 'noreturn', 'cold', 'unwind' blocks. In addition it does its
799// best to propagate the weight to up/down the IR.
800void BranchProbabilityInfo::estimateBlockWeights(const Function &F,
801 DominatorTree *DT,
802 PostDominatorTree *PDT) {
803 SmallVector<BasicBlock *, 8> BlockWorkList;
804 SmallVector<LoopBlock, 8> LoopWorkList;
805 SmallDenseMap<LoopData, SmallVector<BasicBlock *, 4>> LoopExitBlocks;
806
807 // By doing RPO we make sure that all predecessors already have weights
808 // calculated before visiting theirs successors.
809 ReversePostOrderTraversal<const Function *> RPOT(&F);
810 for (const auto *BB : RPOT)
811 if (auto BBWeight = getInitialEstimatedBlockWeight(BB))
812 // If we were able to find estimated weight for the block set it to this
813 // block and propagate up the IR.
814 propagateEstimatedBlockWeight(getLoopBlock(BB), DT, PDT, *BBWeight,
815 BlockWorkList, LoopWorkList);
816
817 // BlockWorklist/LoopWorkList contains blocks/loops with at least one
818 // successor/exit having estimated weight. Try to propagate weight to such
819 // blocks/loops from successors/exits.
820 // Process loops and blocks. Order is not important.
821 do {
822 while (!LoopWorkList.empty()) {
823 const LoopBlock LoopBB = LoopWorkList.pop_back_val();
824 const LoopData LD = LoopBB.getLoopData();
825 if (EstimatedLoopWeight.count(LD))
826 continue;
827
828 auto Res = LoopExitBlocks.try_emplace(LD);
829 SmallVectorImpl<BasicBlock *> &Exits = Res.first->second;
830 if (Res.second)
831 getLoopExitBlocks(LoopBB, Exits);
832 auto LoopWeight = getMaxEstimatedEdgeWeight(
833 LoopBB, make_range(Exits.begin(), Exits.end()));
834
835 if (LoopWeight) {
836 // If we never exit the loop then we can enter it once at maximum.
837 if (LoopWeight <= static_cast<uint32_t>(BlockExecWeight::UNREACHABLE))
838 LoopWeight = static_cast<uint32_t>(BlockExecWeight::LOWEST_NON_ZERO);
839
840 EstimatedLoopWeight.insert({LD, *LoopWeight});
841 // Add all blocks entering the loop into working list.
842 getLoopEnterBlocks(LoopBB, BlockWorkList);
843 }
844 }
845
846 while (!BlockWorkList.empty()) {
847 // We can reach here only if BlockWorkList is not empty.
848 const BasicBlock *BB = BlockWorkList.pop_back_val();
849 if (EstimatedBlockWeight.count(BB))
850 continue;
851
852 // We take maximum over all weights of successors. In other words we take
853 // weight of "hot" path. In theory we can probably find a better function
854 // which gives higher accuracy results (comparing to "maximum") but I
855 // can't
856 // think of any right now. And I doubt it will make any difference in
857 // practice.
858 const LoopBlock LoopBB = getLoopBlock(BB);
859 auto MaxWeight = getMaxEstimatedEdgeWeight(LoopBB, successors(BB));
860
861 if (MaxWeight)
862 propagateEstimatedBlockWeight(LoopBB, DT, PDT, *MaxWeight,
863 BlockWorkList, LoopWorkList);
864 }
865 } while (!BlockWorkList.empty() || !LoopWorkList.empty());
866}
867
868// Calculate edge probabilities based on block's estimated weight.
869// Note that gathered weights were not scaled for loops. Thus edges entering
870// and exiting loops requires special processing.
871bool BranchProbabilityInfo::calcEstimatedHeuristics(const BasicBlock *BB) {
873 "expected more than one successor!");
874
875 const LoopBlock LoopBB = getLoopBlock(BB);
876
877 SmallPtrSet<const BasicBlock *, 8> UnlikelyBlocks;
879 if (LoopBB.getLoop())
880 computeUnlikelySuccessors(BB, LoopBB.getLoop(), UnlikelyBlocks);
881
882 // Changed to 'true' if at least one successor has estimated weight.
883 bool FoundEstimatedWeight = false;
884 SmallVector<uint32_t, 4> SuccWeights;
885 uint64_t TotalWeight = 0;
886 // Go over all successors of BB and put their weights into SuccWeights.
887 for (const BasicBlock *SuccBB : successors(BB)) {
888 std::optional<uint32_t> Weight;
889 const LoopBlock SuccLoopBB = getLoopBlock(SuccBB);
890 const LoopEdge Edge{LoopBB, SuccLoopBB};
891
892 Weight = getEstimatedEdgeWeight(Edge);
893
894 if (isLoopExitingEdge(Edge) &&
895 // Avoid adjustment of ZERO weight since it should remain unchanged.
896 Weight != static_cast<uint32_t>(BlockExecWeight::ZERO)) {
897 // Scale down loop exiting weight by trip count.
898 Weight = std::max(
899 static_cast<uint32_t>(BlockExecWeight::LOWEST_NON_ZERO),
900 Weight.value_or(static_cast<uint32_t>(BlockExecWeight::DEFAULT)) /
901 TC);
902 }
903 bool IsUnlikelyEdge = LoopBB.getLoop() && UnlikelyBlocks.contains(SuccBB);
904 if (IsUnlikelyEdge &&
905 // Avoid adjustment of ZERO weight since it should remain unchanged.
906 Weight != static_cast<uint32_t>(BlockExecWeight::ZERO)) {
907 // 'Unlikely' blocks have twice lower weight.
908 Weight = std::max(
909 static_cast<uint32_t>(BlockExecWeight::LOWEST_NON_ZERO),
910 Weight.value_or(static_cast<uint32_t>(BlockExecWeight::DEFAULT)) / 2);
911 }
912
913 if (Weight)
914 FoundEstimatedWeight = true;
915
916 auto WeightVal =
917 Weight.value_or(static_cast<uint32_t>(BlockExecWeight::DEFAULT));
918 TotalWeight += WeightVal;
919 SuccWeights.push_back(WeightVal);
920 }
921
922 // If non of blocks have estimated weight bail out.
923 // If TotalWeight is 0 that means weight of each successor is 0 as well and
924 // equally likely. Bail out early to not deal with devision by zero.
925 if (!FoundEstimatedWeight || TotalWeight == 0)
926 return false;
927
928 assert(SuccWeights.size() == succ_size(BB) && "Missed successor?");
929 const unsigned SuccCount = SuccWeights.size();
930
931 // If the sum of weights does not fit in 32 bits, scale every weight down
932 // accordingly.
933 if (TotalWeight > UINT32_MAX) {
934 uint64_t ScalingFactor = TotalWeight / UINT32_MAX + 1;
935 TotalWeight = 0;
936 for (unsigned Idx = 0; Idx < SuccCount; ++Idx) {
937 SuccWeights[Idx] /= ScalingFactor;
938 if (SuccWeights[Idx] == static_cast<uint32_t>(BlockExecWeight::ZERO))
939 SuccWeights[Idx] =
940 static_cast<uint32_t>(BlockExecWeight::LOWEST_NON_ZERO);
941 TotalWeight += SuccWeights[Idx];
942 }
943 assert(TotalWeight <= UINT32_MAX && "Total weight overflows");
944 }
945
946 // Finally set probabilities to edges according to estimated block weights.
947 SmallVector<BranchProbability, 4> EdgeProbabilities(
948 SuccCount, BranchProbability::getUnknown());
949
950 for (unsigned Idx = 0; Idx < SuccCount; ++Idx) {
951 EdgeProbabilities[Idx] =
952 BranchProbability(SuccWeights[Idx], (uint32_t)TotalWeight);
953 }
954 setEdgeProbability(BB, EdgeProbabilities);
955 return true;
956}
957
958bool BranchProbabilityInfo::calcZeroHeuristics(const BasicBlock *BB,
959 const TargetLibraryInfo *TLI) {
960 const BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
961 if (!BI || !BI->isConditional())
962 return false;
963
964 Value *Cond = BI->getCondition();
965 ICmpInst *CI = dyn_cast<ICmpInst>(Cond);
966 if (!CI)
967 return false;
968
969 auto GetConstantInt = [](Value *V) {
970 if (auto *I = dyn_cast<BitCastInst>(V))
971 return dyn_cast<ConstantInt>(I->getOperand(0));
972 return dyn_cast<ConstantInt>(V);
973 };
974
975 Value *RHS = CI->getOperand(1);
976 ConstantInt *CV = GetConstantInt(RHS);
977 if (!CV)
978 return false;
979
980 // If the LHS is the result of AND'ing a value with a single bit bitmask,
981 // we don't have information about probabilities.
982 if (Instruction *LHS = dyn_cast<Instruction>(CI->getOperand(0)))
983 if (LHS->getOpcode() == Instruction::And)
984 if (ConstantInt *AndRHS = GetConstantInt(LHS->getOperand(1)))
985 if (AndRHS->getValue().isPowerOf2())
986 return false;
987
988 // Check if the LHS is the return value of a library function
989 LibFunc Func = LibFunc::NotLibFunc;
990 if (TLI)
991 if (CallInst *Call = dyn_cast<CallInst>(CI->getOperand(0)))
992 if (Function *CalledFn = Call->getCalledFunction())
993 TLI->getLibFunc(*CalledFn, Func);
994
995 ProbabilityTable::const_iterator Search;
996 if (Func == LibFunc_strcasecmp ||
997 Func == LibFunc_strcmp ||
998 Func == LibFunc_strncasecmp ||
999 Func == LibFunc_strncmp ||
1000 Func == LibFunc_memcmp ||
1001 Func == LibFunc_bcmp) {
1002 Search = ICmpWithLibCallTable.find(CI->getPredicate());
1003 if (Search == ICmpWithLibCallTable.end())
1004 return false;
1005 } else if (CV->isZero()) {
1006 Search = ICmpWithZeroTable.find(CI->getPredicate());
1007 if (Search == ICmpWithZeroTable.end())
1008 return false;
1009 } else if (CV->isOne()) {
1010 Search = ICmpWithOneTable.find(CI->getPredicate());
1011 if (Search == ICmpWithOneTable.end())
1012 return false;
1013 } else if (CV->isMinusOne()) {
1014 Search = ICmpWithMinusOneTable.find(CI->getPredicate());
1015 if (Search == ICmpWithMinusOneTable.end())
1016 return false;
1017 } else {
1018 return false;
1019 }
1020
1021 setEdgeProbability(BB, Search->second);
1022 return true;
1023}
1024
1025bool BranchProbabilityInfo::calcFloatingPointHeuristics(const BasicBlock *BB) {
1026 const BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
1027 if (!BI || !BI->isConditional())
1028 return false;
1029
1030 Value *Cond = BI->getCondition();
1031 FCmpInst *FCmp = dyn_cast<FCmpInst>(Cond);
1032 if (!FCmp)
1033 return false;
1034
1035 ProbabilityList ProbList;
1036 if (FCmp->isEquality()) {
1037 ProbList = !FCmp->isTrueWhenEqual() ?
1038 // f1 == f2 -> Unlikely
1040 // f1 != f2 -> Likely
1042 } else {
1043 auto Search = FCmpTable.find(FCmp->getPredicate());
1044 if (Search == FCmpTable.end())
1045 return false;
1046 ProbList = Search->second;
1047 }
1048
1049 setEdgeProbability(BB, ProbList);
1050 return true;
1051}
1052
1054 Probs.clear();
1055 Handles.clear();
1056}
1057
1059 FunctionAnalysisManager::Invalidator &) {
1060 // Check whether the analysis, all analyses on functions, or the function's
1061 // CFG have been preserved.
1062 auto PAC = PA.getChecker<BranchProbabilityAnalysis>();
1063 return !(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Function>>() ||
1064 PAC.preservedSet<CFGAnalyses>());
1065}
1066
1068 OS << "---- Branch Probabilities ----\n";
1069 // We print the probabilities from the last function the analysis ran over,
1070 // or the function it is currently running over.
1071 assert(LastF && "Cannot print prior to running over a function");
1072 for (const auto &BI : *LastF) {
1073 for (const BasicBlock *Succ : successors(&BI))
1074 printEdgeProbability(OS << " ", &BI, Succ);
1075 }
1076}
1077
1079isEdgeHot(const BasicBlock *Src, const BasicBlock *Dst) const {
1080 // Hot probability is at least 4/5 = 80%
1081 // FIXME: Compare against a static "hot" BranchProbability.
1082 return getEdgeProbability(Src, Dst) > BranchProbability(4, 5);
1083}
1084
1085/// Get the raw edge probability for the edge. If can't find it, return a
1086/// default probability 1/N where N is the number of successors. Here an edge is
1087/// specified using PredBlock and an
1088/// index to the successors.
1091 unsigned IndexInSuccessors) const {
1092 auto I = Probs.find(std::make_pair(Src, IndexInSuccessors));
1093 assert((Probs.end() == Probs.find(std::make_pair(Src, 0))) ==
1094 (Probs.end() == I) &&
1095 "Probability for I-th successor must always be defined along with the "
1096 "probability for the first successor");
1097
1098 if (I != Probs.end())
1099 return I->second;
1100
1101 return {1, static_cast<uint32_t>(succ_size(Src))};
1102}
1103
1106 const_succ_iterator Dst) const {
1107 return getEdgeProbability(Src, Dst.getSuccessorIndex());
1108}
1109
1110/// Get the raw edge probability calculated for the block pair. This returns the
1111/// sum of all raw edge probabilities from Src to Dst.
1114 const BasicBlock *Dst) const {
1115 if (!Probs.count(std::make_pair(Src, 0)))
1116 return BranchProbability(llvm::count(successors(Src), Dst), succ_size(Src));
1117
1118 auto Prob = BranchProbability::getZero();
1119 for (const_succ_iterator I = succ_begin(Src), E = succ_end(Src); I != E; ++I)
1120 if (*I == Dst)
1121 Prob += Probs.find(std::make_pair(Src, I.getSuccessorIndex()))->second;
1122
1123 return Prob;
1124}
1125
1126/// Set the edge probability for all edges at once.
1128 const BasicBlock *Src, const SmallVectorImpl<BranchProbability> &Probs) {
1129 assert(Src->getTerminator()->getNumSuccessors() == Probs.size());
1130 eraseBlock(Src); // Erase stale data if any.
1131 if (Probs.size() == 0)
1132 return; // Nothing to set.
1133
1134 Handles.insert(BasicBlockCallbackVH(Src, this));
1135 uint64_t TotalNumerator = 0;
1136 for (unsigned SuccIdx = 0; SuccIdx < Probs.size(); ++SuccIdx) {
1137 this->Probs[std::make_pair(Src, SuccIdx)] = Probs[SuccIdx];
1138 LLVM_DEBUG(dbgs() << "set edge " << Src->getName() << " -> " << SuccIdx
1139 << " successor probability to " << Probs[SuccIdx]
1140 << "\n");
1141 TotalNumerator += Probs[SuccIdx].getNumerator();
1142 }
1143
1144 // Because of rounding errors the total probability cannot be checked to be
1145 // 1.0 exactly. That is TotalNumerator == BranchProbability::getDenominator.
1146 // Instead, every single probability in Probs must be as accurate as possible.
1147 // This results in error 1/denominator at most, thus the total absolute error
1148 // should be within Probs.size / BranchProbability::getDenominator.
1149 assert(TotalNumerator <= BranchProbability::getDenominator() + Probs.size());
1150 assert(TotalNumerator >= BranchProbability::getDenominator() - Probs.size());
1151 (void)TotalNumerator;
1152}
1153
1155 BasicBlock *Dst) {
1156 eraseBlock(Dst); // Erase stale data if any.
1157 unsigned NumSuccessors = Src->getTerminator()->getNumSuccessors();
1158 assert(NumSuccessors == Dst->getTerminator()->getNumSuccessors());
1159 if (NumSuccessors == 0)
1160 return; // Nothing to set.
1161 if (!this->Probs.contains(std::make_pair(Src, 0)))
1162 return; // No probability is set for edges from Src. Keep the same for Dst.
1163
1164 Handles.insert(BasicBlockCallbackVH(Dst, this));
1165 for (unsigned SuccIdx = 0; SuccIdx < NumSuccessors; ++SuccIdx) {
1166 auto Prob = this->Probs[std::make_pair(Src, SuccIdx)];
1167 this->Probs[std::make_pair(Dst, SuccIdx)] = Prob;
1168 LLVM_DEBUG(dbgs() << "set edge " << Dst->getName() << " -> " << SuccIdx
1169 << " successor probability to " << Prob << "\n");
1170 }
1171}
1172
1174 assert(Src->getTerminator()->getNumSuccessors() == 2);
1175 auto It0 = Probs.find(std::make_pair(Src, 0));
1176 if (It0 == Probs.end())
1177 return; // No probability is set for edges from Src
1178 auto It1 = Probs.find(std::make_pair(Src, 1));
1179 assert(It1 != Probs.end());
1180 std::swap(It0->second, It1->second);
1181}
1182
1185 const BasicBlock *Src,
1186 const BasicBlock *Dst) const {
1187 const BranchProbability Prob = getEdgeProbability(Src, Dst);
1188 OS << "edge ";
1189 Src->printAsOperand(OS, false, Src->getModule());
1190 OS << " -> ";
1191 Dst->printAsOperand(OS, false, Dst->getModule());
1192 OS << " probability is " << Prob
1193 << (isEdgeHot(Src, Dst) ? " [HOT edge]\n" : "\n");
1194
1195 return OS;
1196}
1197
1199 LLVM_DEBUG(dbgs() << "eraseBlock " << BB->getName() << "\n");
1200
1201 // Note that we cannot use successors of BB because the terminator of BB may
1202 // have changed when eraseBlock is called as a BasicBlockCallbackVH callback.
1203 // Instead we remove prob data for the block by iterating successors by their
1204 // indices from 0 till the last which exists. There could not be prob data for
1205 // a pair (BB, N) if there is no data for (BB, N-1) because the data is always
1206 // set for all successors from 0 to M at once by the method
1207 // setEdgeProbability().
1208 Handles.erase(BasicBlockCallbackVH(BB, this));
1209 for (unsigned I = 0;; ++I) {
1210 auto MapI = Probs.find(std::make_pair(BB, I));
1211 if (MapI == Probs.end()) {
1212 assert(Probs.count(std::make_pair(BB, I + 1)) == 0 &&
1213 "Must be no more successors");
1214 return;
1215 }
1216 Probs.erase(MapI);
1217 }
1218}
1219
1221 const TargetLibraryInfo *TLI,
1222 DominatorTree *DT,
1223 PostDominatorTree *PDT) {
1224 LLVM_DEBUG(dbgs() << "---- Branch Probability Info : " << F.getName()
1225 << " ----\n\n");
1226 LastF = &F; // Store the last function we ran on for printing.
1227 LI = &LoopI;
1228
1229 SccI = std::make_unique<SccInfo>(F);
1230
1231 assert(EstimatedBlockWeight.empty());
1232 assert(EstimatedLoopWeight.empty());
1233
1234 std::unique_ptr<DominatorTree> DTPtr;
1235 std::unique_ptr<PostDominatorTree> PDTPtr;
1236
1237 if (!DT) {
1238 DTPtr = std::make_unique<DominatorTree>(const_cast<Function &>(F));
1239 DT = DTPtr.get();
1240 }
1241
1242 if (!PDT) {
1243 PDTPtr = std::make_unique<PostDominatorTree>(const_cast<Function &>(F));
1244 PDT = PDTPtr.get();
1245 }
1246
1247 estimateBlockWeights(F, DT, PDT);
1248
1249 // Walk the basic blocks in post-order so that we can build up state about
1250 // the successors of a block iteratively.
1251 for (const auto *BB : post_order(&F.getEntryBlock())) {
1252 LLVM_DEBUG(dbgs() << "Computing probabilities for " << BB->getName()
1253 << "\n");
1254 // If there is no at least two successors, no sense to set probability.
1255 if (BB->getTerminator()->getNumSuccessors() < 2)
1256 continue;
1257 if (calcMetadataWeights(BB))
1258 continue;
1259 if (calcEstimatedHeuristics(BB))
1260 continue;
1261 if (calcPointerHeuristics(BB))
1262 continue;
1263 if (calcZeroHeuristics(BB, TLI))
1264 continue;
1265 if (calcFloatingPointHeuristics(BB))
1266 continue;
1267 }
1268
1269 EstimatedLoopWeight.clear();
1270 EstimatedBlockWeight.clear();
1271 SccI.reset();
1272
1273 if (PrintBranchProb && (PrintBranchProbFuncName.empty() ||
1274 F.getName() == PrintBranchProbFuncName)) {
1275 print(dbgs());
1276 }
1277}
1278
1280 AnalysisUsage &AU) const {
1281 // We require DT so it's available when LI is available. The LI updating code
1282 // asserts that DT is also present so if we don't make sure that we have DT
1283 // here, that assert will trigger.
1289 AU.setPreservesAll();
1290}
1291
1293 const LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1294 const TargetLibraryInfo &TLI =
1297 PostDominatorTree &PDT =
1299 BPI.calculate(F, LI, &TLI, &DT, &PDT);
1300 return false;
1301}
1302
1304
1306 const Module *) const {
1307 BPI.print(OS);
1308}
1309
1310AnalysisKey BranchProbabilityAnalysis::Key;
1313 auto &LI = AM.getResult<LoopAnalysis>(F);
1314 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
1315 auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
1316 auto &PDT = AM.getResult<PostDominatorTreeAnalysis>(F);
1318 BPI.calculate(F, LI, &TLI, &DT, &PDT);
1319 return BPI;
1320}
1321
1324 OS << "Printing analysis 'Branch Probability Analysis' for function '"
1325 << F.getName() << "':\n";
1327 return PreservedAnalyses::all();
1328}
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
BlockExecWeight
Set of dedicated "absolute" execution weights for a block.
@ NORETURN
Weight to a block containing non returning call.
@ UNWIND
Weight to 'unwind' block of an invoke instruction.
@ COLD
Weight to a 'cold' block.
@ ZERO
Special weight used for cases with exact zero probability.
@ UNREACHABLE
Weight to an 'unreachable' block.
@ DEFAULT
Default weight is used in cases when there is no dedicated execution weight set.
@ LOWEST_NON_ZERO
Minimal possible non zero weight.
static const uint32_t FPH_TAKEN_WEIGHT
static const BranchProbability FPUntakenProb(FPH_NONTAKEN_WEIGHT, FPH_TAKEN_WEIGHT+FPH_NONTAKEN_WEIGHT)
static const BranchProbability ZeroUntakenProb(ZH_NONTAKEN_WEIGHT, ZH_TAKEN_WEIGHT+ZH_NONTAKEN_WEIGHT)
static const uint32_t LBH_TAKEN_WEIGHT
static const uint32_t ZH_NONTAKEN_WEIGHT
static const ProbabilityTable ICmpWithLibCallTable
strcmp and similar functions return zero, negative, or positive, if the first string is equal,...
static const ProbabilityTable ICmpWithMinusOneTable
Integer compares with -1:
static const BranchProbability FPOrdTakenProb(FPH_ORD_WEIGHT, FPH_ORD_WEIGHT+FPH_UNO_WEIGHT)
static const ProbabilityTable ICmpWithZeroTable
Integer compares with 0:
static const uint32_t PH_NONTAKEN_WEIGHT
std::map< CmpInst::Predicate, ProbabilityList > ProbabilityTable
static const BranchProbability PtrTakenProb(PH_TAKEN_WEIGHT, PH_TAKEN_WEIGHT+PH_NONTAKEN_WEIGHT)
static const BranchProbability ZeroTakenProb(ZH_TAKEN_WEIGHT, ZH_TAKEN_WEIGHT+ZH_NONTAKEN_WEIGHT)
static const BranchProbability FPOrdUntakenProb(FPH_UNO_WEIGHT, FPH_ORD_WEIGHT+FPH_UNO_WEIGHT)
static const uint32_t PH_TAKEN_WEIGHT
Heuristics and lookup tables for non-loop branches: Pointer Heuristics (PH)
static const BranchProbability FPTakenProb(FPH_TAKEN_WEIGHT, FPH_TAKEN_WEIGHT+FPH_NONTAKEN_WEIGHT)
static const BranchProbability PtrUntakenProb(PH_NONTAKEN_WEIGHT, PH_TAKEN_WEIGHT+PH_NONTAKEN_WEIGHT)
SmallVector< BranchProbability > ProbabilityList
static const ProbabilityTable ICmpWithOneTable
Integer compares with 1:
static const uint32_t ZH_TAKEN_WEIGHT
Zero Heuristics (ZH)
static const uint32_t FPH_NONTAKEN_WEIGHT
static const BranchProbability UR_TAKEN_PROB
Unreachable-terminating branch taken probability.
static const ProbabilityTable FCmpTable
Floating-Point compares:
static const uint32_t LBH_NONTAKEN_WEIGHT
static const ProbabilityTable PointerTable
Pointer comparisons:
static const uint32_t FPH_ORD_WEIGHT
This is the probability for an ordered floating point comparison.
static void computeUnlikelySuccessors(const BasicBlock *BB, Loop *L, SmallPtrSetImpl< const BasicBlock * > &UnlikelyBlocks)
static const uint32_t FPH_UNO_WEIGHT
This is the probability for an unordered floating point comparison, it means one or two of the operan...
static cl::opt< std::string > PrintBranchProbFuncName("print-bpi-func-name", cl::Hidden, cl::desc("The option to specify the name of the function " "whose branch probability info is printed."))
static cl::opt< bool > PrintBranchProb("print-bpi", cl::init(false), cl::Hidden, cl::desc("Print the branch probability info."))
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
This header defines various interfaces for pass management in LLVM.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
This file contains the declarations for metadata subclasses.
#define P(N)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
This builds on the llvm/ADT/GraphTraits.h file to find the strongly connected components (SCCs) of a ...
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:114
Value * RHS
Value * LHS
This templated class represents "all analyses that operate over <aparticular IR unit>" (e....
Definition Analysis.h:50
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesAll()
Set by analyses that do not transform their input at all.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
LLVM_ABI const CallInst * getTerminatingDeoptimizeCall() const
Returns the call instruction calling @llvm.experimental.deoptimize prior to the terminating return in...
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
bool isEHPad() const
Return true if this basic block is an exception handling block.
Definition BasicBlock.h:713
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
Conditional or Unconditional Branch instruction.
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
Value * getCondition() const
Analysis pass which computes BranchProbabilityInfo.
LLVM_ABI BranchProbabilityInfo run(Function &F, FunctionAnalysisManager &AM)
Run the analysis pass over a function and produce BPI.
Legacy analysis pass which computes BranchProbabilityInfo.
void releaseMemory() override
releaseMemory() - This member can be implemented by a pass if it wants to be able to release its memo...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
bool runOnFunction(Function &F) override
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
void print(raw_ostream &OS, const Module *M=nullptr) const override
print - Print out the internal state of the pass.
bool isSCCHeader(const BasicBlock *BB, int SccNum) const
Returns true if BB is a 'header' block in SCC with SccNum ID, false otherwise.
LLVM_ABI void getSccEnterBlocks(int SccNum, SmallVectorImpl< BasicBlock * > &Enters) const
Fills in Enters vector with all such blocks that don't belong to SCC with SccNum ID but there is an e...
bool isSCCExitingBlock(const BasicBlock *BB, int SccNum) const
Returns true if BB is an 'exiting' block in SCC with SccNum ID, false otherwise.
LLVM_ABI void getSccExitBlocks(int SccNum, SmallVectorImpl< BasicBlock * > &Exits) const
Fills in Exits vector with all such blocks that don't belong to SCC with SccNum ID but there is an ed...
LLVM_ABI int getSCCNum(const BasicBlock *BB) const
If BB belongs to some SCC then ID of that SCC is returned, otherwise -1 is returned.
Analysis providing branch probability information.
LLVM_ABI void eraseBlock(const BasicBlock *BB)
Forget analysis results for the given basic block.
LLVM_ABI void setEdgeProbability(const BasicBlock *Src, const SmallVectorImpl< BranchProbability > &Probs)
Set the raw probabilities for all edges from the given block.
LLVM_ABI bool invalidate(Function &, const PreservedAnalyses &PA, FunctionAnalysisManager::Invalidator &)
LLVM_ABI BranchProbability getEdgeProbability(const BasicBlock *Src, unsigned IndexInSuccessors) const
Get an edge's probability, relative to other out-edges of the Src.
LLVM_ABI void calculate(const Function &F, const LoopInfo &LI, const TargetLibraryInfo *TLI, DominatorTree *DT, PostDominatorTree *PDT)
LLVM_ABI bool isEdgeHot(const BasicBlock *Src, const BasicBlock *Dst) const
Test if an edge is hot relative to other out-edges of the Src.
LLVM_ABI void swapSuccEdgesProbabilities(const BasicBlock *Src)
Swap outgoing edges probabilities for Src with branch terminator.
LLVM_ABI void print(raw_ostream &OS) const
LLVM_ABI raw_ostream & printEdgeProbability(raw_ostream &OS, const BasicBlock *Src, const BasicBlock *Dst) const
Print an edge's probability.
LLVM_ABI void copyEdgeProbabilities(BasicBlock *Src, BasicBlock *Dst)
Copy outgoing edge probabilities from Src to Dst.
LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
static uint32_t getDenominator()
static BranchProbability getRaw(uint32_t N)
static BranchProbability getOne()
static BranchProbability getUnknown()
uint32_t getNumerator() const
static BranchProbability getZero()
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
This class is the base class for the comparison instructions.
Definition InstrTypes.h:664
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
Definition InstrTypes.h:685
@ ICMP_NE
not equal
Definition InstrTypes.h:698
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition InstrTypes.h:686
bool isTrueWhenEqual() const
This is just a convenience.
Definition InstrTypes.h:942
Predicate getPredicate() const
Return the predicate for this instruction.
Definition InstrTypes.h:765
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition Constants.h:231
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition Constants.h:225
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:219
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:256
Analysis pass which computes a DominatorTree.
Definition Dominators.h:283
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Legacy analysis pass which computes a DominatorTree.
Definition Dominators.h:321
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:164
static bool isEquality(Predicate Pred)
FunctionPass(char &pid)
Definition Pass.h:316
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
LLVM_ABI BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
Analysis pass that exposes the LoopInfo for a function.
Definition LoopInfo.h:569
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
The legacy pass manager's analysis pass to compute loop information.
Definition LoopInfo.h:596
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
Analysis pass which computes a PostDominatorTree.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
LLVM_ABI bool dominates(const Instruction *I1, const Instruction *I2) const
Return true if I1 dominates I2.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalysisChecker getChecker() const
Build a checker for this PreservedAnalyses and the specified analysis type.
Definition Analysis.h:275
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:267
Value * getOperand(unsigned i) const
Definition User.h:207
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
Enumerate the SCCs of a directed graph in reverse topological order of the SCC DAG.
Definition SCCIterator.h:49
bool isAtEnd() const
Direct loop termination test which is more efficient than comparison with end().
CallInst * Call
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
initializer< Ty > init(const Ty &Val)
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
auto pred_end(const MachineBasicBlock *BB)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
auto successors(const MachineBasicBlock *BB)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
scc_iterator< T > scc_begin(const T &G)
Construct the begin iterator for a deduced graph type T.
LLVM_ABI Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
iterator_range< po_iterator< T > > post_order(const T &G)
constexpr T divideNearest(U Numerator, V Denominator)
Returns (Numerator / Denominator) rounded by round-half-up.
Definition MathExtras.h:458
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
auto reverse(ContainerTy &&C)
Definition STLExtras.h:408
LLVM_ABI MDNode * getValidBranchWeightMDNode(const Instruction &I)
Get the valid branch weights metadata node.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
auto succ_size(const MachineBasicBlock *BB)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI Constant * ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL)
Attempt to constant fold a binary operation with the specified operands.
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
iterator_range(Container &&) -> iterator_range< llvm::detail::IterOfRange< Container > >
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
@ Mul
Product of integers.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:2012
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
auto pred_begin(const MachineBasicBlock *BB)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1947
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
SuccIterator< const Instruction, const BasicBlock > const_succ_iterator
Definition CFG.h:245
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition Analysis.h:29