LLVM 20.0.0git
BranchProbabilityInfo.cpp
Go to the documentation of this file.
1//===- BranchProbabilityInfo.cpp - Branch Probability Analysis ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Loops should be simplified before this analysis.
10//
11//===----------------------------------------------------------------------===//
12
16#include "llvm/ADT/STLExtras.h"
22#include "llvm/IR/Attributes.h"
23#include "llvm/IR/BasicBlock.h"
24#include "llvm/IR/CFG.h"
25#include "llvm/IR/Constants.h"
26#include "llvm/IR/Dominators.h"
27#include "llvm/IR/Function.h"
28#include "llvm/IR/InstrTypes.h"
29#include "llvm/IR/Instruction.h"
31#include "llvm/IR/LLVMContext.h"
32#include "llvm/IR/Metadata.h"
33#include "llvm/IR/PassManager.h"
35#include "llvm/IR/Type.h"
36#include "llvm/IR/Value.h"
38#include "llvm/Pass.h"
42#include "llvm/Support/Debug.h"
44#include <cassert>
45#include <cstdint>
46#include <map>
47#include <utility>
48
49using namespace llvm;
50
51#define DEBUG_TYPE "branch-prob"
52
54 "print-bpi", cl::init(false), cl::Hidden,
55 cl::desc("Print the branch probability info."));
56
58 "print-bpi-func-name", cl::Hidden,
59 cl::desc("The option to specify the name of the function "
60 "whose branch probability info is printed."));
61
63 "Branch Probability Analysis", false, true)
69 "Branch Probability Analysis", false, true)
70
72 : FunctionPass(ID) {
75}
76
78
79// Weights are for internal use only. They are used by heuristics to help to
80// estimate edges' probability. Example:
81//
82// Using "Loop Branch Heuristics" we predict weights of edges for the
83// block BB2.
84// ...
85// |
86// V
87// BB1<-+
88// | |
89// | | (Weight = 124)
90// V |
91// BB2--+
92// |
93// | (Weight = 4)
94// V
95// BB3
96//
97// Probability of the edge BB2->BB1 = 124 / (124 + 4) = 0.96875
98// Probability of the edge BB2->BB3 = 4 / (124 + 4) = 0.03125
99static const uint32_t LBH_TAKEN_WEIGHT = 124;
101
102/// Unreachable-terminating branch taken probability.
103///
104/// This is the probability for a branch being taken to a block that terminates
105/// (eventually) in unreachable. These are predicted as unlikely as possible.
106/// All reachable probability will proportionally share the remaining part.
108
109/// Heuristics and lookup tables for non-loop branches:
110/// Pointer Heuristics (PH)
111static const uint32_t PH_TAKEN_WEIGHT = 20;
112static const uint32_t PH_NONTAKEN_WEIGHT = 12;
113static const BranchProbability
115static const BranchProbability
117
119using ProbabilityTable = std::map<CmpInst::Predicate, ProbabilityList>;
120
121/// Pointer comparisons:
123 {ICmpInst::ICMP_NE, {PtrTakenProb, PtrUntakenProb}}, /// p != q -> Likely
124 {ICmpInst::ICMP_EQ, {PtrUntakenProb, PtrTakenProb}}, /// p == q -> Unlikely
125};
126
127/// Zero Heuristics (ZH)
128static const uint32_t ZH_TAKEN_WEIGHT = 20;
129static const uint32_t ZH_NONTAKEN_WEIGHT = 12;
130static const BranchProbability
132static const BranchProbability
134
135/// Integer compares with 0:
137 {CmpInst::ICMP_EQ, {ZeroUntakenProb, ZeroTakenProb}}, /// X == 0 -> Unlikely
138 {CmpInst::ICMP_NE, {ZeroTakenProb, ZeroUntakenProb}}, /// X != 0 -> Likely
139 {CmpInst::ICMP_SLT, {ZeroUntakenProb, ZeroTakenProb}}, /// X < 0 -> Unlikely
140 {CmpInst::ICMP_SGT, {ZeroTakenProb, ZeroUntakenProb}}, /// X > 0 -> Likely
141};
142
143/// Integer compares with -1:
145 {CmpInst::ICMP_EQ, {ZeroUntakenProb, ZeroTakenProb}}, /// X == -1 -> Unlikely
146 {CmpInst::ICMP_NE, {ZeroTakenProb, ZeroUntakenProb}}, /// X != -1 -> Likely
147 // InstCombine canonicalizes X >= 0 into X > -1
148 {CmpInst::ICMP_SGT, {ZeroTakenProb, ZeroUntakenProb}}, /// X >= 0 -> Likely
149};
150
151/// Integer compares with 1:
153 // InstCombine canonicalizes X <= 0 into X < 1
154 {CmpInst::ICMP_SLT, {ZeroUntakenProb, ZeroTakenProb}}, /// X <= 0 -> Unlikely
155};
156
157/// strcmp and similar functions return zero, negative, or positive, if the
158/// first string is equal, less, or greater than the second. We consider it
159/// likely that the strings are not equal, so a comparison with zero is
160/// probably false, but also a comparison with any other number is also
161/// probably false given that what exactly is returned for nonzero values is
162/// not specified. Any kind of comparison other than equality we know
163/// nothing about.
167};
168
169// Floating-Point Heuristics (FPH)
170static const uint32_t FPH_TAKEN_WEIGHT = 20;
172
173/// This is the probability for an ordered floating point comparison.
174static const uint32_t FPH_ORD_WEIGHT = 1024 * 1024 - 1;
175/// This is the probability for an unordered floating point comparison, it means
176/// one or two of the operands are NaN. Usually it is used to test for an
177/// exceptional case, so the result is unlikely.
178static const uint32_t FPH_UNO_WEIGHT = 1;
179
182static const BranchProbability
184static const BranchProbability
186static const BranchProbability
188
189/// Floating-Point compares:
191 {FCmpInst::FCMP_ORD, {FPOrdTakenProb, FPOrdUntakenProb}}, /// !isnan -> Likely
192 {FCmpInst::FCMP_UNO, {FPOrdUntakenProb, FPOrdTakenProb}}, /// isnan -> Unlikely
193};
194
195/// Set of dedicated "absolute" execution weights for a block. These weights are
196/// meaningful relative to each other and their derivatives only.
197enum class BlockExecWeight : std::uint32_t {
198 /// Special weight used for cases with exact zero probability.
199 ZERO = 0x0,
200 /// Minimal possible non zero weight.
201 LOWEST_NON_ZERO = 0x1,
202 /// Weight to an 'unreachable' block.
204 /// Weight to a block containing non returning call.
206 /// Weight to 'unwind' block of an invoke instruction.
208 /// Weight to a 'cold' block. Cold blocks are the ones containing calls marked
209 /// with attribute 'cold'.
210 COLD = 0xffff,
211 /// Default weight is used in cases when there is no dedicated execution
212 /// weight set. It is not propagated through the domination line either.
213 DEFAULT = 0xfffff
214};
215
217 // Record SCC numbers of blocks in the CFG to identify irreducible loops.
218 // FIXME: We could only calculate this if the CFG is known to be irreducible
219 // (perhaps cache this info in LoopInfo if we can easily calculate it there?).
220 int SccNum = 0;
221 for (scc_iterator<const Function *> It = scc_begin(&F); !It.isAtEnd();
222 ++It, ++SccNum) {
223 // Ignore single-block SCCs since they either aren't loops or LoopInfo will
224 // catch them.
225 const std::vector<const BasicBlock *> &Scc = *It;
226 if (Scc.size() == 1)
227 continue;
228
229 LLVM_DEBUG(dbgs() << "BPI: SCC " << SccNum << ":");
230 for (const auto *BB : Scc) {
231 LLVM_DEBUG(dbgs() << " " << BB->getName());
232 SccNums[BB] = SccNum;
233 calculateSccBlockType(BB, SccNum);
234 }
235 LLVM_DEBUG(dbgs() << "\n");
236 }
237}
238
240 auto SccIt = SccNums.find(BB);
241 if (SccIt == SccNums.end())
242 return -1;
243 return SccIt->second;
244}
245
247 int SccNum, SmallVectorImpl<BasicBlock *> &Enters) const {
248
249 for (auto MapIt : SccBlocks[SccNum]) {
250 const auto *BB = MapIt.first;
251 if (isSCCHeader(BB, SccNum))
252 for (const auto *Pred : predecessors(BB))
253 if (getSCCNum(Pred) != SccNum)
254 Enters.push_back(const_cast<BasicBlock *>(BB));
255 }
256}
257
259 int SccNum, SmallVectorImpl<BasicBlock *> &Exits) const {
260 for (auto MapIt : SccBlocks[SccNum]) {
261 const auto *BB = MapIt.first;
262 if (isSCCExitingBlock(BB, SccNum))
263 for (const auto *Succ : successors(BB))
264 if (getSCCNum(Succ) != SccNum)
265 Exits.push_back(const_cast<BasicBlock *>(Succ));
266 }
267}
268
269uint32_t BranchProbabilityInfo::SccInfo::getSccBlockType(const BasicBlock *BB,
270 int SccNum) const {
271 assert(getSCCNum(BB) == SccNum);
272
273 assert(SccBlocks.size() > static_cast<unsigned>(SccNum) && "Unknown SCC");
274 const auto &SccBlockTypes = SccBlocks[SccNum];
275
276 auto It = SccBlockTypes.find(BB);
277 if (It != SccBlockTypes.end()) {
278 return It->second;
279 }
280 return Inner;
281}
282
283void BranchProbabilityInfo::SccInfo::calculateSccBlockType(const BasicBlock *BB,
284 int SccNum) {
285 assert(getSCCNum(BB) == SccNum);
286 uint32_t BlockType = Inner;
287
288 if (llvm::any_of(predecessors(BB), [&](const BasicBlock *Pred) {
289 // Consider any block that is an entry point to the SCC as
290 // a header.
291 return getSCCNum(Pred) != SccNum;
292 }))
293 BlockType |= Header;
294
295 if (llvm::any_of(successors(BB), [&](const BasicBlock *Succ) {
296 return getSCCNum(Succ) != SccNum;
297 }))
298 BlockType |= Exiting;
299
300 // Lazily compute the set of headers for a given SCC and cache the results
301 // in the SccHeaderMap.
302 if (SccBlocks.size() <= static_cast<unsigned>(SccNum))
303 SccBlocks.resize(SccNum + 1);
304 auto &SccBlockTypes = SccBlocks[SccNum];
305
306 if (BlockType != Inner) {
307 bool IsInserted;
308 std::tie(std::ignore, IsInserted) =
309 SccBlockTypes.insert(std::make_pair(BB, BlockType));
310 assert(IsInserted && "Duplicated block in SCC");
311 }
312}
313
314BranchProbabilityInfo::LoopBlock::LoopBlock(const BasicBlock *BB,
315 const LoopInfo &LI,
316 const SccInfo &SccI)
317 : BB(BB) {
318 LD.first = LI.getLoopFor(BB);
319 if (!LD.first) {
320 LD.second = SccI.getSCCNum(BB);
321 }
322}
323
324bool BranchProbabilityInfo::isLoopEnteringEdge(const LoopEdge &Edge) const {
325 const auto &SrcBlock = Edge.first;
326 const auto &DstBlock = Edge.second;
327 return (DstBlock.getLoop() &&
328 !DstBlock.getLoop()->contains(SrcBlock.getLoop())) ||
329 // Assume that SCCs can't be nested.
330 (DstBlock.getSccNum() != -1 &&
331 SrcBlock.getSccNum() != DstBlock.getSccNum());
332}
333
334bool BranchProbabilityInfo::isLoopExitingEdge(const LoopEdge &Edge) const {
335 return isLoopEnteringEdge({Edge.second, Edge.first});
336}
337
338bool BranchProbabilityInfo::isLoopEnteringExitingEdge(
339 const LoopEdge &Edge) const {
340 return isLoopEnteringEdge(Edge) || isLoopExitingEdge(Edge);
341}
342
343bool BranchProbabilityInfo::isLoopBackEdge(const LoopEdge &Edge) const {
344 const auto &SrcBlock = Edge.first;
345 const auto &DstBlock = Edge.second;
346 return SrcBlock.belongsToSameLoop(DstBlock) &&
347 ((DstBlock.getLoop() &&
348 DstBlock.getLoop()->getHeader() == DstBlock.getBlock()) ||
349 (DstBlock.getSccNum() != -1 &&
350 SccI->isSCCHeader(DstBlock.getBlock(), DstBlock.getSccNum())));
351}
352
353void BranchProbabilityInfo::getLoopEnterBlocks(
354 const LoopBlock &LB, SmallVectorImpl<BasicBlock *> &Enters) const {
355 if (LB.getLoop()) {
356 auto *Header = LB.getLoop()->getHeader();
357 Enters.append(pred_begin(Header), pred_end(Header));
358 } else {
359 assert(LB.getSccNum() != -1 && "LB doesn't belong to any loop?");
360 SccI->getSccEnterBlocks(LB.getSccNum(), Enters);
361 }
362}
363
364void BranchProbabilityInfo::getLoopExitBlocks(
365 const LoopBlock &LB, SmallVectorImpl<BasicBlock *> &Exits) const {
366 if (LB.getLoop()) {
367 LB.getLoop()->getExitBlocks(Exits);
368 } else {
369 assert(LB.getSccNum() != -1 && "LB doesn't belong to any loop?");
370 SccI->getSccExitBlocks(LB.getSccNum(), Exits);
371 }
372}
373
374// Propagate existing explicit probabilities from either profile data or
375// 'expect' intrinsic processing. Examine metadata against unreachable
376// heuristic. The probability of the edge coming to unreachable block is
377// set to min of metadata and unreachable heuristic.
378bool BranchProbabilityInfo::calcMetadataWeights(const BasicBlock *BB) {
379 const Instruction *TI = BB->getTerminator();
380 assert(TI->getNumSuccessors() > 1 && "expected more than one successor!");
381 if (!(isa<BranchInst>(TI) || isa<SwitchInst>(TI) || isa<IndirectBrInst>(TI) ||
382 isa<InvokeInst>(TI) || isa<CallBrInst>(TI)))
383 return false;
384
385 MDNode *WeightsNode = getValidBranchWeightMDNode(*TI);
386 if (!WeightsNode)
387 return false;
388
389 // Check that the number of successors is manageable.
390 assert(TI->getNumSuccessors() < UINT32_MAX && "Too many successors");
391
392 // Build up the final weights that will be used in a temporary buffer.
393 // Compute the sum of all weights to later decide whether they need to
394 // be scaled to fit in 32 bits.
395 uint64_t WeightSum = 0;
397 SmallVector<unsigned, 2> UnreachableIdxs;
398 SmallVector<unsigned, 2> ReachableIdxs;
399
400 extractBranchWeights(WeightsNode, Weights);
401 for (unsigned I = 0, E = Weights.size(); I != E; ++I) {
402 WeightSum += Weights[I];
403 const LoopBlock SrcLoopBB = getLoopBlock(BB);
404 const LoopBlock DstLoopBB = getLoopBlock(TI->getSuccessor(I));
405 auto EstimatedWeight = getEstimatedEdgeWeight({SrcLoopBB, DstLoopBB});
406 if (EstimatedWeight &&
407 *EstimatedWeight <= static_cast<uint32_t>(BlockExecWeight::UNREACHABLE))
408 UnreachableIdxs.push_back(I);
409 else
410 ReachableIdxs.push_back(I);
411 }
412 assert(Weights.size() == TI->getNumSuccessors() && "Checked above");
413
414 // If the sum of weights does not fit in 32 bits, scale every weight down
415 // accordingly.
416 uint64_t ScalingFactor =
417 (WeightSum > UINT32_MAX) ? WeightSum / UINT32_MAX + 1 : 1;
418
419 if (ScalingFactor > 1) {
420 WeightSum = 0;
421 for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) {
422 Weights[I] /= ScalingFactor;
423 WeightSum += Weights[I];
424 }
425 }
426 assert(WeightSum <= UINT32_MAX &&
427 "Expected weights to scale down to 32 bits");
428
429 if (WeightSum == 0 || ReachableIdxs.size() == 0) {
430 for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I)
431 Weights[I] = 1;
432 WeightSum = TI->getNumSuccessors();
433 }
434
435 // Set the probability.
437 for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I)
438 BP.push_back({ Weights[I], static_cast<uint32_t>(WeightSum) });
439
440 // Examine the metadata against unreachable heuristic.
441 // If the unreachable heuristic is more strong then we use it for this edge.
442 if (UnreachableIdxs.size() == 0 || ReachableIdxs.size() == 0) {
443 setEdgeProbability(BB, BP);
444 return true;
445 }
446
447 auto UnreachableProb = UR_TAKEN_PROB;
448 for (auto I : UnreachableIdxs)
449 if (UnreachableProb < BP[I]) {
450 BP[I] = UnreachableProb;
451 }
452
453 // Sum of all edge probabilities must be 1.0. If we modified the probability
454 // of some edges then we must distribute the introduced difference over the
455 // reachable blocks.
456 //
457 // Proportional distribution: the relation between probabilities of the
458 // reachable edges is kept unchanged. That is for any reachable edges i and j:
459 // newBP[i] / newBP[j] == oldBP[i] / oldBP[j] =>
460 // newBP[i] / oldBP[i] == newBP[j] / oldBP[j] == K
461 // Where K is independent of i,j.
462 // newBP[i] == oldBP[i] * K
463 // We need to find K.
464 // Make sum of all reachables of the left and right parts:
465 // sum_of_reachable(newBP) == K * sum_of_reachable(oldBP)
466 // Sum of newBP must be equal to 1.0:
467 // sum_of_reachable(newBP) + sum_of_unreachable(newBP) == 1.0 =>
468 // sum_of_reachable(newBP) = 1.0 - sum_of_unreachable(newBP)
469 // Where sum_of_unreachable(newBP) is what has been just changed.
470 // Finally:
471 // K == sum_of_reachable(newBP) / sum_of_reachable(oldBP) =>
472 // K == (1.0 - sum_of_unreachable(newBP)) / sum_of_reachable(oldBP)
473 BranchProbability NewUnreachableSum = BranchProbability::getZero();
474 for (auto I : UnreachableIdxs)
475 NewUnreachableSum += BP[I];
476
477 BranchProbability NewReachableSum =
478 BranchProbability::getOne() - NewUnreachableSum;
479
481 for (auto I : ReachableIdxs)
482 OldReachableSum += BP[I];
483
484 if (OldReachableSum != NewReachableSum) { // Anything to dsitribute?
485 if (OldReachableSum.isZero()) {
486 // If all oldBP[i] are zeroes then the proportional distribution results
487 // in all zero probabilities and the error stays big. In this case we
488 // evenly spread NewReachableSum over the reachable edges.
489 BranchProbability PerEdge = NewReachableSum / ReachableIdxs.size();
490 for (auto I : ReachableIdxs)
491 BP[I] = PerEdge;
492 } else {
493 for (auto I : ReachableIdxs) {
494 // We use uint64_t to avoid double rounding error of the following
495 // calculation: BP[i] = BP[i] * NewReachableSum / OldReachableSum
496 // The formula is taken from the private constructor
497 // BranchProbability(uint32_t Numerator, uint32_t Denominator)
498 uint64_t Mul = static_cast<uint64_t>(NewReachableSum.getNumerator()) *
499 BP[I].getNumerator();
500 uint32_t Div = static_cast<uint32_t>(
501 divideNearest(Mul, OldReachableSum.getNumerator()));
502 BP[I] = BranchProbability::getRaw(Div);
503 }
504 }
505 }
506
507 setEdgeProbability(BB, BP);
508
509 return true;
510}
511
512// Calculate Edge Weights using "Pointer Heuristics". Predict a comparison
513// between two pointer or pointer and NULL will fail.
514bool BranchProbabilityInfo::calcPointerHeuristics(const BasicBlock *BB) {
515 const BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
516 if (!BI || !BI->isConditional())
517 return false;
518
519 Value *Cond = BI->getCondition();
520 ICmpInst *CI = dyn_cast<ICmpInst>(Cond);
521 if (!CI || !CI->isEquality())
522 return false;
523
524 Value *LHS = CI->getOperand(0);
525
526 if (!LHS->getType()->isPointerTy())
527 return false;
528
529 assert(CI->getOperand(1)->getType()->isPointerTy());
530
531 auto Search = PointerTable.find(CI->getPredicate());
532 if (Search == PointerTable.end())
533 return false;
534 setEdgeProbability(BB, Search->second);
535 return true;
536}
537
538// Compute the unlikely successors to the block BB in the loop L, specifically
539// those that are unlikely because this is a loop, and add them to the
540// UnlikelyBlocks set.
541static void
543 SmallPtrSetImpl<const BasicBlock*> &UnlikelyBlocks) {
544 // Sometimes in a loop we have a branch whose condition is made false by
545 // taking it. This is typically something like
546 // int n = 0;
547 // while (...) {
548 // if (++n >= MAX) {
549 // n = 0;
550 // }
551 // }
552 // In this sort of situation taking the branch means that at the very least it
553 // won't be taken again in the next iteration of the loop, so we should
554 // consider it less likely than a typical branch.
555 //
556 // We detect this by looking back through the graph of PHI nodes that sets the
557 // value that the condition depends on, and seeing if we can reach a successor
558 // block which can be determined to make the condition false.
559 //
560 // FIXME: We currently consider unlikely blocks to be half as likely as other
561 // blocks, but if we consider the example above the likelyhood is actually
562 // 1/MAX. We could therefore be more precise in how unlikely we consider
563 // blocks to be, but it would require more careful examination of the form
564 // of the comparison expression.
565 const BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
566 if (!BI || !BI->isConditional())
567 return;
568
569 // Check if the branch is based on an instruction compared with a constant
570 CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition());
571 if (!CI || !isa<Instruction>(CI->getOperand(0)) ||
572 !isa<Constant>(CI->getOperand(1)))
573 return;
574
575 // Either the instruction must be a PHI, or a chain of operations involving
576 // constants that ends in a PHI which we can then collapse into a single value
577 // if the PHI value is known.
578 Instruction *CmpLHS = dyn_cast<Instruction>(CI->getOperand(0));
579 PHINode *CmpPHI = dyn_cast<PHINode>(CmpLHS);
580 Constant *CmpConst = dyn_cast<Constant>(CI->getOperand(1));
581 // Collect the instructions until we hit a PHI
583 while (!CmpPHI && CmpLHS && isa<BinaryOperator>(CmpLHS) &&
584 isa<Constant>(CmpLHS->getOperand(1))) {
585 // Stop if the chain extends outside of the loop
586 if (!L->contains(CmpLHS))
587 return;
588 InstChain.push_back(cast<BinaryOperator>(CmpLHS));
589 CmpLHS = dyn_cast<Instruction>(CmpLHS->getOperand(0));
590 if (CmpLHS)
591 CmpPHI = dyn_cast<PHINode>(CmpLHS);
592 }
593 if (!CmpPHI || !L->contains(CmpPHI))
594 return;
595
596 // Trace the phi node to find all values that come from successors of BB
597 SmallPtrSet<PHINode*, 8> VisitedInsts;
599 WorkList.push_back(CmpPHI);
600 VisitedInsts.insert(CmpPHI);
601 while (!WorkList.empty()) {
602 PHINode *P = WorkList.pop_back_val();
603 for (BasicBlock *B : P->blocks()) {
604 // Skip blocks that aren't part of the loop
605 if (!L->contains(B))
606 continue;
607 Value *V = P->getIncomingValueForBlock(B);
608 // If the source is a PHI add it to the work list if we haven't
609 // already visited it.
610 if (PHINode *PN = dyn_cast<PHINode>(V)) {
611 if (VisitedInsts.insert(PN).second)
612 WorkList.push_back(PN);
613 continue;
614 }
615 // If this incoming value is a constant and B is a successor of BB, then
616 // we can constant-evaluate the compare to see if it makes the branch be
617 // taken or not.
618 Constant *CmpLHSConst = dyn_cast<Constant>(V);
619 if (!CmpLHSConst || !llvm::is_contained(successors(BB), B))
620 continue;
621 // First collapse InstChain
622 const DataLayout &DL = BB->getDataLayout();
623 for (Instruction *I : llvm::reverse(InstChain)) {
624 CmpLHSConst = ConstantFoldBinaryOpOperands(
625 I->getOpcode(), CmpLHSConst, cast<Constant>(I->getOperand(1)), DL);
626 if (!CmpLHSConst)
627 break;
628 }
629 if (!CmpLHSConst)
630 continue;
631 // Now constant-evaluate the compare
633 CI->getPredicate(), CmpLHSConst, CmpConst, DL);
634 // If the result means we don't branch to the block then that block is
635 // unlikely.
636 if (Result &&
637 ((Result->isZeroValue() && B == BI->getSuccessor(0)) ||
638 (Result->isOneValue() && B == BI->getSuccessor(1))))
639 UnlikelyBlocks.insert(B);
640 }
641 }
642}
643
644std::optional<uint32_t>
645BranchProbabilityInfo::getEstimatedBlockWeight(const BasicBlock *BB) const {
646 auto WeightIt = EstimatedBlockWeight.find(BB);
647 if (WeightIt == EstimatedBlockWeight.end())
648 return std::nullopt;
649 return WeightIt->second;
650}
651
652std::optional<uint32_t>
653BranchProbabilityInfo::getEstimatedLoopWeight(const LoopData &L) const {
654 auto WeightIt = EstimatedLoopWeight.find(L);
655 if (WeightIt == EstimatedLoopWeight.end())
656 return std::nullopt;
657 return WeightIt->second;
658}
659
660std::optional<uint32_t>
661BranchProbabilityInfo::getEstimatedEdgeWeight(const LoopEdge &Edge) const {
662 // For edges entering a loop take weight of a loop rather than an individual
663 // block in the loop.
664 return isLoopEnteringEdge(Edge)
665 ? getEstimatedLoopWeight(Edge.second.getLoopData())
666 : getEstimatedBlockWeight(Edge.second.getBlock());
667}
668
669template <class IterT>
670std::optional<uint32_t> BranchProbabilityInfo::getMaxEstimatedEdgeWeight(
671 const LoopBlock &SrcLoopBB, iterator_range<IterT> Successors) const {
673 std::optional<uint32_t> MaxWeight;
674 for (const BasicBlock *DstBB : Successors) {
675 const LoopBlock DstLoopBB = getLoopBlock(DstBB);
676 auto Weight = getEstimatedEdgeWeight({SrcLoopBB, DstLoopBB});
677
678 if (!Weight)
679 return std::nullopt;
680
681 if (!MaxWeight || *MaxWeight < *Weight)
682 MaxWeight = Weight;
683 }
684
685 return MaxWeight;
686}
687
688// Updates \p LoopBB's weight and returns true. If \p LoopBB has already
689// an associated weight it is unchanged and false is returned.
690//
691// Please note by the algorithm the weight is not expected to change once set
692// thus 'false' status is used to track visited blocks.
693bool BranchProbabilityInfo::updateEstimatedBlockWeight(
694 LoopBlock &LoopBB, uint32_t BBWeight,
695 SmallVectorImpl<BasicBlock *> &BlockWorkList,
696 SmallVectorImpl<LoopBlock> &LoopWorkList) {
697 BasicBlock *BB = LoopBB.getBlock();
698
699 // In general, weight is assigned to a block when it has final value and
700 // can't/shouldn't be changed. However, there are cases when a block
701 // inherently has several (possibly "contradicting") weights. For example,
702 // "unwind" block may also contain "cold" call. In that case the first
703 // set weight is favored and all consequent weights are ignored.
704 if (!EstimatedBlockWeight.insert({BB, BBWeight}).second)
705 return false;
706
707 for (BasicBlock *PredBlock : predecessors(BB)) {
708 LoopBlock PredLoop = getLoopBlock(PredBlock);
709 // Add affected block/loop to a working list.
710 if (isLoopExitingEdge({PredLoop, LoopBB})) {
711 if (!EstimatedLoopWeight.count(PredLoop.getLoopData()))
712 LoopWorkList.push_back(PredLoop);
713 } else if (!EstimatedBlockWeight.count(PredBlock))
714 BlockWorkList.push_back(PredBlock);
715 }
716 return true;
717}
718
719// Starting from \p BB traverse through dominator blocks and assign \p BBWeight
720// to all such blocks that are post dominated by \BB. In other words to all
721// blocks that the one is executed if and only if another one is executed.
722// Importantly, we skip loops here for two reasons. First weights of blocks in
723// a loop should be scaled by trip count (yet possibly unknown). Second there is
724// no any value in doing that because that doesn't give any additional
725// information regarding distribution of probabilities inside the loop.
726// Exception is loop 'enter' and 'exit' edges that are handled in a special way
727// at calcEstimatedHeuristics.
728//
729// In addition, \p WorkList is populated with basic blocks if at leas one
730// successor has updated estimated weight.
731void BranchProbabilityInfo::propagateEstimatedBlockWeight(
732 const LoopBlock &LoopBB, DominatorTree *DT, PostDominatorTree *PDT,
733 uint32_t BBWeight, SmallVectorImpl<BasicBlock *> &BlockWorkList,
734 SmallVectorImpl<LoopBlock> &LoopWorkList) {
735 const BasicBlock *BB = LoopBB.getBlock();
736 const auto *DTStartNode = DT->getNode(BB);
737 const auto *PDTStartNode = PDT->getNode(BB);
738
739 // TODO: Consider propagating weight down the domination line as well.
740 for (const auto *DTNode = DTStartNode; DTNode != nullptr;
741 DTNode = DTNode->getIDom()) {
742 auto *DomBB = DTNode->getBlock();
743 // Consider blocks which lie on one 'line'.
744 if (!PDT->dominates(PDTStartNode, PDT->getNode(DomBB)))
745 // If BB doesn't post dominate DomBB it will not post dominate dominators
746 // of DomBB as well.
747 break;
748
749 LoopBlock DomLoopBB = getLoopBlock(DomBB);
750 const LoopEdge Edge{DomLoopBB, LoopBB};
751 // Don't propagate weight to blocks belonging to different loops.
752 if (!isLoopEnteringExitingEdge(Edge)) {
753 if (!updateEstimatedBlockWeight(DomLoopBB, BBWeight, BlockWorkList,
754 LoopWorkList))
755 // If DomBB has weight set then all it's predecessors are already
756 // processed (since we propagate weight up to the top of IR each time).
757 break;
758 } else if (isLoopExitingEdge(Edge)) {
759 LoopWorkList.push_back(DomLoopBB);
760 }
761 }
762}
763
764std::optional<uint32_t>
765BranchProbabilityInfo::getInitialEstimatedBlockWeight(const BasicBlock *BB) {
766 // Returns true if \p BB has call marked with "NoReturn" attribute.
767 auto hasNoReturn = [&](const BasicBlock *BB) {
768 for (const auto &I : reverse(*BB))
769 if (const CallInst *CI = dyn_cast<CallInst>(&I))
770 if (CI->hasFnAttr(Attribute::NoReturn))
771 return true;
772
773 return false;
774 };
775
776 // Important note regarding the order of checks. They are ordered by weight
777 // from lowest to highest. Doing that allows to avoid "unstable" results
778 // when several conditions heuristics can be applied simultaneously.
779 if (isa<UnreachableInst>(BB->getTerminator()) ||
780 // If this block is terminated by a call to
781 // @llvm.experimental.deoptimize then treat it like an unreachable
782 // since it is expected to practically never execute.
783 // TODO: Should we actually treat as never returning call?
785 return hasNoReturn(BB)
786 ? static_cast<uint32_t>(BlockExecWeight::NORETURN)
787 : static_cast<uint32_t>(BlockExecWeight::UNREACHABLE);
788
789 // Check if the block is an exception handling block.
790 if (BB->isEHPad())
791 return static_cast<uint32_t>(BlockExecWeight::UNWIND);
792
793 // Check if the block contains 'cold' call.
794 for (const auto &I : *BB)
795 if (const CallInst *CI = dyn_cast<CallInst>(&I))
796 if (CI->hasFnAttr(Attribute::Cold))
797 return static_cast<uint32_t>(BlockExecWeight::COLD);
798
799 return std::nullopt;
800}
801
802// Does RPO traversal over all blocks in \p F and assigns weights to
803// 'unreachable', 'noreturn', 'cold', 'unwind' blocks. In addition it does its
804// best to propagate the weight to up/down the IR.
805void BranchProbabilityInfo::computeEestimateBlockWeight(
806 const Function &F, DominatorTree *DT, PostDominatorTree *PDT) {
807 SmallVector<BasicBlock *, 8> BlockWorkList;
808 SmallVector<LoopBlock, 8> LoopWorkList;
810
811 // By doing RPO we make sure that all predecessors already have weights
812 // calculated before visiting theirs successors.
814 for (const auto *BB : RPOT)
815 if (auto BBWeight = getInitialEstimatedBlockWeight(BB))
816 // If we were able to find estimated weight for the block set it to this
817 // block and propagate up the IR.
818 propagateEstimatedBlockWeight(getLoopBlock(BB), DT, PDT, *BBWeight,
819 BlockWorkList, LoopWorkList);
820
821 // BlockWorklist/LoopWorkList contains blocks/loops with at least one
822 // successor/exit having estimated weight. Try to propagate weight to such
823 // blocks/loops from successors/exits.
824 // Process loops and blocks. Order is not important.
825 do {
826 while (!LoopWorkList.empty()) {
827 const LoopBlock LoopBB = LoopWorkList.pop_back_val();
828 const LoopData LD = LoopBB.getLoopData();
829 if (EstimatedLoopWeight.count(LD))
830 continue;
831
832 auto Res = LoopExitBlocks.try_emplace(LD);
833 SmallVectorImpl<BasicBlock *> &Exits = Res.first->second;
834 if (Res.second)
835 getLoopExitBlocks(LoopBB, Exits);
836 auto LoopWeight = getMaxEstimatedEdgeWeight(
837 LoopBB, make_range(Exits.begin(), Exits.end()));
838
839 if (LoopWeight) {
840 // If we never exit the loop then we can enter it once at maximum.
841 if (LoopWeight <= static_cast<uint32_t>(BlockExecWeight::UNREACHABLE))
842 LoopWeight = static_cast<uint32_t>(BlockExecWeight::LOWEST_NON_ZERO);
843
844 EstimatedLoopWeight.insert({LD, *LoopWeight});
845 // Add all blocks entering the loop into working list.
846 getLoopEnterBlocks(LoopBB, BlockWorkList);
847 }
848 }
849
850 while (!BlockWorkList.empty()) {
851 // We can reach here only if BlockWorkList is not empty.
852 const BasicBlock *BB = BlockWorkList.pop_back_val();
853 if (EstimatedBlockWeight.count(BB))
854 continue;
855
856 // We take maximum over all weights of successors. In other words we take
857 // weight of "hot" path. In theory we can probably find a better function
858 // which gives higher accuracy results (comparing to "maximum") but I
859 // can't
860 // think of any right now. And I doubt it will make any difference in
861 // practice.
862 const LoopBlock LoopBB = getLoopBlock(BB);
863 auto MaxWeight = getMaxEstimatedEdgeWeight(LoopBB, successors(BB));
864
865 if (MaxWeight)
866 propagateEstimatedBlockWeight(LoopBB, DT, PDT, *MaxWeight,
867 BlockWorkList, LoopWorkList);
868 }
869 } while (!BlockWorkList.empty() || !LoopWorkList.empty());
870}
871
872// Calculate edge probabilities based on block's estimated weight.
873// Note that gathered weights were not scaled for loops. Thus edges entering
874// and exiting loops requires special processing.
875bool BranchProbabilityInfo::calcEstimatedHeuristics(const BasicBlock *BB) {
877 "expected more than one successor!");
878
879 const LoopBlock LoopBB = getLoopBlock(BB);
880
883 if (LoopBB.getLoop())
884 computeUnlikelySuccessors(BB, LoopBB.getLoop(), UnlikelyBlocks);
885
886 // Changed to 'true' if at least one successor has estimated weight.
887 bool FoundEstimatedWeight = false;
888 SmallVector<uint32_t, 4> SuccWeights;
889 uint64_t TotalWeight = 0;
890 // Go over all successors of BB and put their weights into SuccWeights.
891 for (const BasicBlock *SuccBB : successors(BB)) {
892 std::optional<uint32_t> Weight;
893 const LoopBlock SuccLoopBB = getLoopBlock(SuccBB);
894 const LoopEdge Edge{LoopBB, SuccLoopBB};
895
896 Weight = getEstimatedEdgeWeight(Edge);
897
898 if (isLoopExitingEdge(Edge) &&
899 // Avoid adjustment of ZERO weight since it should remain unchanged.
900 Weight != static_cast<uint32_t>(BlockExecWeight::ZERO)) {
901 // Scale down loop exiting weight by trip count.
902 Weight = std::max(
903 static_cast<uint32_t>(BlockExecWeight::LOWEST_NON_ZERO),
904 Weight.value_or(static_cast<uint32_t>(BlockExecWeight::DEFAULT)) /
905 TC);
906 }
907 bool IsUnlikelyEdge = LoopBB.getLoop() && UnlikelyBlocks.contains(SuccBB);
908 if (IsUnlikelyEdge &&
909 // Avoid adjustment of ZERO weight since it should remain unchanged.
910 Weight != static_cast<uint32_t>(BlockExecWeight::ZERO)) {
911 // 'Unlikely' blocks have twice lower weight.
912 Weight = std::max(
913 static_cast<uint32_t>(BlockExecWeight::LOWEST_NON_ZERO),
914 Weight.value_or(static_cast<uint32_t>(BlockExecWeight::DEFAULT)) / 2);
915 }
916
917 if (Weight)
918 FoundEstimatedWeight = true;
919
920 auto WeightVal =
921 Weight.value_or(static_cast<uint32_t>(BlockExecWeight::DEFAULT));
922 TotalWeight += WeightVal;
923 SuccWeights.push_back(WeightVal);
924 }
925
926 // If non of blocks have estimated weight bail out.
927 // If TotalWeight is 0 that means weight of each successor is 0 as well and
928 // equally likely. Bail out early to not deal with devision by zero.
929 if (!FoundEstimatedWeight || TotalWeight == 0)
930 return false;
931
932 assert(SuccWeights.size() == succ_size(BB) && "Missed successor?");
933 const unsigned SuccCount = SuccWeights.size();
934
935 // If the sum of weights does not fit in 32 bits, scale every weight down
936 // accordingly.
937 if (TotalWeight > UINT32_MAX) {
938 uint64_t ScalingFactor = TotalWeight / UINT32_MAX + 1;
939 TotalWeight = 0;
940 for (unsigned Idx = 0; Idx < SuccCount; ++Idx) {
941 SuccWeights[Idx] /= ScalingFactor;
942 if (SuccWeights[Idx] == static_cast<uint32_t>(BlockExecWeight::ZERO))
943 SuccWeights[Idx] =
944 static_cast<uint32_t>(BlockExecWeight::LOWEST_NON_ZERO);
945 TotalWeight += SuccWeights[Idx];
946 }
947 assert(TotalWeight <= UINT32_MAX && "Total weight overflows");
948 }
949
950 // Finally set probabilities to edges according to estimated block weights.
951 SmallVector<BranchProbability, 4> EdgeProbabilities(
952 SuccCount, BranchProbability::getUnknown());
953
954 for (unsigned Idx = 0; Idx < SuccCount; ++Idx) {
955 EdgeProbabilities[Idx] =
956 BranchProbability(SuccWeights[Idx], (uint32_t)TotalWeight);
957 }
958 setEdgeProbability(BB, EdgeProbabilities);
959 return true;
960}
961
962bool BranchProbabilityInfo::calcZeroHeuristics(const BasicBlock *BB,
963 const TargetLibraryInfo *TLI) {
964 const BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
965 if (!BI || !BI->isConditional())
966 return false;
967
968 Value *Cond = BI->getCondition();
969 ICmpInst *CI = dyn_cast<ICmpInst>(Cond);
970 if (!CI)
971 return false;
972
973 auto GetConstantInt = [](Value *V) {
974 if (auto *I = dyn_cast<BitCastInst>(V))
975 return dyn_cast<ConstantInt>(I->getOperand(0));
976 return dyn_cast<ConstantInt>(V);
977 };
978
979 Value *RHS = CI->getOperand(1);
980 ConstantInt *CV = GetConstantInt(RHS);
981 if (!CV)
982 return false;
983
984 // If the LHS is the result of AND'ing a value with a single bit bitmask,
985 // we don't have information about probabilities.
986 if (Instruction *LHS = dyn_cast<Instruction>(CI->getOperand(0)))
987 if (LHS->getOpcode() == Instruction::And)
988 if (ConstantInt *AndRHS = GetConstantInt(LHS->getOperand(1)))
989 if (AndRHS->getValue().isPowerOf2())
990 return false;
991
992 // Check if the LHS is the return value of a library function
994 if (TLI)
995 if (CallInst *Call = dyn_cast<CallInst>(CI->getOperand(0)))
996 if (Function *CalledFn = Call->getCalledFunction())
997 TLI->getLibFunc(*CalledFn, Func);
998
999 ProbabilityTable::const_iterator Search;
1000 if (Func == LibFunc_strcasecmp ||
1001 Func == LibFunc_strcmp ||
1002 Func == LibFunc_strncasecmp ||
1003 Func == LibFunc_strncmp ||
1004 Func == LibFunc_memcmp ||
1005 Func == LibFunc_bcmp) {
1006 Search = ICmpWithLibCallTable.find(CI->getPredicate());
1007 if (Search == ICmpWithLibCallTable.end())
1008 return false;
1009 } else if (CV->isZero()) {
1010 Search = ICmpWithZeroTable.find(CI->getPredicate());
1011 if (Search == ICmpWithZeroTable.end())
1012 return false;
1013 } else if (CV->isOne()) {
1014 Search = ICmpWithOneTable.find(CI->getPredicate());
1015 if (Search == ICmpWithOneTable.end())
1016 return false;
1017 } else if (CV->isMinusOne()) {
1018 Search = ICmpWithMinusOneTable.find(CI->getPredicate());
1019 if (Search == ICmpWithMinusOneTable.end())
1020 return false;
1021 } else {
1022 return false;
1023 }
1024
1025 setEdgeProbability(BB, Search->second);
1026 return true;
1027}
1028
1029bool BranchProbabilityInfo::calcFloatingPointHeuristics(const BasicBlock *BB) {
1030 const BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
1031 if (!BI || !BI->isConditional())
1032 return false;
1033
1034 Value *Cond = BI->getCondition();
1035 FCmpInst *FCmp = dyn_cast<FCmpInst>(Cond);
1036 if (!FCmp)
1037 return false;
1038
1039 ProbabilityList ProbList;
1040 if (FCmp->isEquality()) {
1041 ProbList = !FCmp->isTrueWhenEqual() ?
1042 // f1 == f2 -> Unlikely
1044 // f1 != f2 -> Likely
1046 } else {
1047 auto Search = FCmpTable.find(FCmp->getPredicate());
1048 if (Search == FCmpTable.end())
1049 return false;
1050 ProbList = Search->second;
1051 }
1052
1053 setEdgeProbability(BB, ProbList);
1054 return true;
1055}
1056
1058 Probs.clear();
1059 Handles.clear();
1060}
1061
1064 // Check whether the analysis, all analyses on functions, or the function's
1065 // CFG have been preserved.
1066 auto PAC = PA.getChecker<BranchProbabilityAnalysis>();
1067 return !(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Function>>() ||
1068 PAC.preservedSet<CFGAnalyses>());
1069}
1070
1072 OS << "---- Branch Probabilities ----\n";
1073 // We print the probabilities from the last function the analysis ran over,
1074 // or the function it is currently running over.
1075 assert(LastF && "Cannot print prior to running over a function");
1076 for (const auto &BI : *LastF) {
1077 for (const BasicBlock *Succ : successors(&BI))
1078 printEdgeProbability(OS << " ", &BI, Succ);
1079 }
1080}
1081
1083isEdgeHot(const BasicBlock *Src, const BasicBlock *Dst) const {
1084 // Hot probability is at least 4/5 = 80%
1085 // FIXME: Compare against a static "hot" BranchProbability.
1086 return getEdgeProbability(Src, Dst) > BranchProbability(4, 5);
1087}
1088
1089/// Get the raw edge probability for the edge. If can't find it, return a
1090/// default probability 1/N where N is the number of successors. Here an edge is
1091/// specified using PredBlock and an
1092/// index to the successors.
1095 unsigned IndexInSuccessors) const {
1096 auto I = Probs.find(std::make_pair(Src, IndexInSuccessors));
1097 assert((Probs.end() == Probs.find(std::make_pair(Src, 0))) ==
1098 (Probs.end() == I) &&
1099 "Probability for I-th successor must always be defined along with the "
1100 "probability for the first successor");
1101
1102 if (I != Probs.end())
1103 return I->second;
1104
1105 return {1, static_cast<uint32_t>(succ_size(Src))};
1106}
1107
1110 const_succ_iterator Dst) const {
1111 return getEdgeProbability(Src, Dst.getSuccessorIndex());
1112}
1113
1114/// Get the raw edge probability calculated for the block pair. This returns the
1115/// sum of all raw edge probabilities from Src to Dst.
1118 const BasicBlock *Dst) const {
1119 if (!Probs.count(std::make_pair(Src, 0)))
1120 return BranchProbability(llvm::count(successors(Src), Dst), succ_size(Src));
1121
1122 auto Prob = BranchProbability::getZero();
1123 for (const_succ_iterator I = succ_begin(Src), E = succ_end(Src); I != E; ++I)
1124 if (*I == Dst)
1125 Prob += Probs.find(std::make_pair(Src, I.getSuccessorIndex()))->second;
1126
1127 return Prob;
1128}
1129
1130/// Set the edge probability for all edges at once.
1132 const BasicBlock *Src, const SmallVectorImpl<BranchProbability> &Probs) {
1133 assert(Src->getTerminator()->getNumSuccessors() == Probs.size());
1134 eraseBlock(Src); // Erase stale data if any.
1135 if (Probs.size() == 0)
1136 return; // Nothing to set.
1137
1138 Handles.insert(BasicBlockCallbackVH(Src, this));
1139 uint64_t TotalNumerator = 0;
1140 for (unsigned SuccIdx = 0; SuccIdx < Probs.size(); ++SuccIdx) {
1141 this->Probs[std::make_pair(Src, SuccIdx)] = Probs[SuccIdx];
1142 LLVM_DEBUG(dbgs() << "set edge " << Src->getName() << " -> " << SuccIdx
1143 << " successor probability to " << Probs[SuccIdx]
1144 << "\n");
1145 TotalNumerator += Probs[SuccIdx].getNumerator();
1146 }
1147
1148 // Because of rounding errors the total probability cannot be checked to be
1149 // 1.0 exactly. That is TotalNumerator == BranchProbability::getDenominator.
1150 // Instead, every single probability in Probs must be as accurate as possible.
1151 // This results in error 1/denominator at most, thus the total absolute error
1152 // should be within Probs.size / BranchProbability::getDenominator.
1153 assert(TotalNumerator <= BranchProbability::getDenominator() + Probs.size());
1154 assert(TotalNumerator >= BranchProbability::getDenominator() - Probs.size());
1155 (void)TotalNumerator;
1156}
1157
1159 BasicBlock *Dst) {
1160 eraseBlock(Dst); // Erase stale data if any.
1161 unsigned NumSuccessors = Src->getTerminator()->getNumSuccessors();
1162 assert(NumSuccessors == Dst->getTerminator()->getNumSuccessors());
1163 if (NumSuccessors == 0)
1164 return; // Nothing to set.
1165 if (!this->Probs.contains(std::make_pair(Src, 0)))
1166 return; // No probability is set for edges from Src. Keep the same for Dst.
1167
1168 Handles.insert(BasicBlockCallbackVH(Dst, this));
1169 for (unsigned SuccIdx = 0; SuccIdx < NumSuccessors; ++SuccIdx) {
1170 auto Prob = this->Probs[std::make_pair(Src, SuccIdx)];
1171 this->Probs[std::make_pair(Dst, SuccIdx)] = Prob;
1172 LLVM_DEBUG(dbgs() << "set edge " << Dst->getName() << " -> " << SuccIdx
1173 << " successor probability to " << Prob << "\n");
1174 }
1175}
1176
1178 assert(Src->getTerminator()->getNumSuccessors() == 2);
1179 if (!Probs.contains(std::make_pair(Src, 0)))
1180 return; // No probability is set for edges from Src
1181 assert(Probs.contains(std::make_pair(Src, 1)));
1182 std::swap(Probs[std::make_pair(Src, 0)], Probs[std::make_pair(Src, 1)]);
1183}
1184
1187 const BasicBlock *Src,
1188 const BasicBlock *Dst) const {
1189 const BranchProbability Prob = getEdgeProbability(Src, Dst);
1190 OS << "edge ";
1191 Src->printAsOperand(OS, false, Src->getModule());
1192 OS << " -> ";
1193 Dst->printAsOperand(OS, false, Dst->getModule());
1194 OS << " probability is " << Prob
1195 << (isEdgeHot(Src, Dst) ? " [HOT edge]\n" : "\n");
1196
1197 return OS;
1198}
1199
1201 LLVM_DEBUG(dbgs() << "eraseBlock " << BB->getName() << "\n");
1202
1203 // Note that we cannot use successors of BB because the terminator of BB may
1204 // have changed when eraseBlock is called as a BasicBlockCallbackVH callback.
1205 // Instead we remove prob data for the block by iterating successors by their
1206 // indices from 0 till the last which exists. There could not be prob data for
1207 // a pair (BB, N) if there is no data for (BB, N-1) because the data is always
1208 // set for all successors from 0 to M at once by the method
1209 // setEdgeProbability().
1210 Handles.erase(BasicBlockCallbackVH(BB, this));
1211 for (unsigned I = 0;; ++I) {
1212 auto MapI = Probs.find(std::make_pair(BB, I));
1213 if (MapI == Probs.end()) {
1214 assert(Probs.count(std::make_pair(BB, I + 1)) == 0 &&
1215 "Must be no more successors");
1216 return;
1217 }
1218 Probs.erase(MapI);
1219 }
1220}
1221
1223 const TargetLibraryInfo *TLI,
1224 DominatorTree *DT,
1225 PostDominatorTree *PDT) {
1226 LLVM_DEBUG(dbgs() << "---- Branch Probability Info : " << F.getName()
1227 << " ----\n\n");
1228 LastF = &F; // Store the last function we ran on for printing.
1229 LI = &LoopI;
1230
1231 SccI = std::make_unique<SccInfo>(F);
1232
1233 assert(EstimatedBlockWeight.empty());
1234 assert(EstimatedLoopWeight.empty());
1235
1236 std::unique_ptr<DominatorTree> DTPtr;
1237 std::unique_ptr<PostDominatorTree> PDTPtr;
1238
1239 if (!DT) {
1240 DTPtr = std::make_unique<DominatorTree>(const_cast<Function &>(F));
1241 DT = DTPtr.get();
1242 }
1243
1244 if (!PDT) {
1245 PDTPtr = std::make_unique<PostDominatorTree>(const_cast<Function &>(F));
1246 PDT = PDTPtr.get();
1247 }
1248
1249 computeEestimateBlockWeight(F, DT, PDT);
1250
1251 // Walk the basic blocks in post-order so that we can build up state about
1252 // the successors of a block iteratively.
1253 for (const auto *BB : post_order(&F.getEntryBlock())) {
1254 LLVM_DEBUG(dbgs() << "Computing probabilities for " << BB->getName()
1255 << "\n");
1256 // If there is no at least two successors, no sense to set probability.
1257 if (BB->getTerminator()->getNumSuccessors() < 2)
1258 continue;
1259 if (calcMetadataWeights(BB))
1260 continue;
1261 if (calcEstimatedHeuristics(BB))
1262 continue;
1263 if (calcPointerHeuristics(BB))
1264 continue;
1265 if (calcZeroHeuristics(BB, TLI))
1266 continue;
1267 if (calcFloatingPointHeuristics(BB))
1268 continue;
1269 }
1270
1271 EstimatedLoopWeight.clear();
1272 EstimatedBlockWeight.clear();
1273 SccI.reset();
1274
1275 if (PrintBranchProb && (PrintBranchProbFuncName.empty() ||
1276 F.getName() == PrintBranchProbFuncName)) {
1277 print(dbgs());
1278 }
1279}
1280
1282 AnalysisUsage &AU) const {
1283 // We require DT so it's available when LI is available. The LI updating code
1284 // asserts that DT is also present so if we don't make sure that we have DT
1285 // here, that assert will trigger.
1291 AU.setPreservesAll();
1292}
1293
1295 const LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1296 const TargetLibraryInfo &TLI =
1297 getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
1298 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1299 PostDominatorTree &PDT =
1300 getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
1301 BPI.calculate(F, LI, &TLI, &DT, &PDT);
1302 return false;
1303}
1304
1306
1308 const Module *) const {
1309 BPI.print(OS);
1310}
1311
1312AnalysisKey BranchProbabilityAnalysis::Key;
1315 auto &LI = AM.getResult<LoopAnalysis>(F);
1316 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
1317 auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
1318 auto &PDT = AM.getResult<PostDominatorTreeAnalysis>(F);
1320 BPI.calculate(F, LI, &TLI, &DT, &PDT);
1321 return BPI;
1322}
1323
1326 OS << "Printing analysis 'Branch Probability Analysis' for function '"
1327 << F.getName() << "':\n";
1329 return PreservedAnalyses::all();
1330}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
basic Basic Alias true
BlockExecWeight
Set of dedicated "absolute" execution weights for a block.
@ NORETURN
Weight to a block containing non returning call.
@ UNWIND
Weight to 'unwind' block of an invoke instruction.
@ COLD
Weight to a 'cold' block.
@ ZERO
Special weight used for cases with exact zero probability.
@ UNREACHABLE
Weight to an 'unreachable' block.
@ DEFAULT
Default weight is used in cases when there is no dedicated execution weight set.
@ LOWEST_NON_ZERO
Minimal possible non zero weight.
static const uint32_t FPH_TAKEN_WEIGHT
static const BranchProbability FPUntakenProb(FPH_NONTAKEN_WEIGHT, FPH_TAKEN_WEIGHT+FPH_NONTAKEN_WEIGHT)
static const BranchProbability ZeroUntakenProb(ZH_NONTAKEN_WEIGHT, ZH_TAKEN_WEIGHT+ZH_NONTAKEN_WEIGHT)
static const uint32_t LBH_TAKEN_WEIGHT
static const uint32_t ZH_NONTAKEN_WEIGHT
static const ProbabilityTable ICmpWithLibCallTable
strcmp and similar functions return zero, negative, or positive, if the first string is equal,...
static const ProbabilityTable ICmpWithMinusOneTable
Integer compares with -1:
branch prob
static const BranchProbability FPOrdTakenProb(FPH_ORD_WEIGHT, FPH_ORD_WEIGHT+FPH_UNO_WEIGHT)
static const ProbabilityTable ICmpWithZeroTable
Integer compares with 0:
static const uint32_t PH_NONTAKEN_WEIGHT
std::map< CmpInst::Predicate, ProbabilityList > ProbabilityTable
static const BranchProbability PtrTakenProb(PH_TAKEN_WEIGHT, PH_TAKEN_WEIGHT+PH_NONTAKEN_WEIGHT)
static const BranchProbability ZeroTakenProb(ZH_TAKEN_WEIGHT, ZH_TAKEN_WEIGHT+ZH_NONTAKEN_WEIGHT)
static const BranchProbability FPOrdUntakenProb(FPH_UNO_WEIGHT, FPH_ORD_WEIGHT+FPH_UNO_WEIGHT)
static const uint32_t PH_TAKEN_WEIGHT
Heuristics and lookup tables for non-loop branches: Pointer Heuristics (PH)
static const BranchProbability FPTakenProb(FPH_TAKEN_WEIGHT, FPH_TAKEN_WEIGHT+FPH_NONTAKEN_WEIGHT)
branch Branch Probability Analysis
static const BranchProbability PtrUntakenProb(PH_NONTAKEN_WEIGHT, PH_TAKEN_WEIGHT+PH_NONTAKEN_WEIGHT)
SmallVector< BranchProbability > ProbabilityList
static const ProbabilityTable ICmpWithOneTable
Integer compares with 1:
static const uint32_t ZH_TAKEN_WEIGHT
Zero Heuristics (ZH)
static const uint32_t FPH_NONTAKEN_WEIGHT
static const BranchProbability UR_TAKEN_PROB
Unreachable-terminating branch taken probability.
static const ProbabilityTable FCmpTable
Floating-Point compares:
static const uint32_t LBH_NONTAKEN_WEIGHT
static const ProbabilityTable PointerTable
Pointer comparisons:
static const uint32_t FPH_ORD_WEIGHT
This is the probability for an ordered floating point comparison.
static void computeUnlikelySuccessors(const BasicBlock *BB, Loop *L, SmallPtrSetImpl< const BasicBlock * > &UnlikelyBlocks)
static const uint32_t FPH_UNO_WEIGHT
This is the probability for an unordered floating point comparison, it means one or two of the operan...
cl::opt< std::string > PrintBranchProbFuncName("print-bpi-func-name", cl::Hidden, cl::desc("The option to specify the name of the function " "whose branch probability info is printed."))
static cl::opt< bool > PrintBranchProb("print-bpi", cl::init(false), cl::Hidden, cl::desc("Print the branch probability info."))
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(...)
Definition: Debug.h:106
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
This header defines various interfaces for pass management in LLVM.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file contains the declarations for metadata subclasses.
#define P(N)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
This builds on the llvm/ADT/GraphTraits.h file to find the strongly connected components (SCCs) of a ...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
raw_pwrite_stream & OS
This file defines the SmallVector class.
Value * RHS
Value * LHS
This templated class represents "all analyses that operate over <a particular IR unit>" (e....
Definition: Analysis.h:49
API to communicate dependencies between analyses during invalidation.
Definition: PassManager.h:292
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:410
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesAll()
Set by analyses that do not transform their input at all.
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
const CallInst * getTerminatingDeoptimizeCall() const
Returns the call instruction calling @llvm.experimental.deoptimize prior to the terminating return in...
Definition: BasicBlock.cpp:331
const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
Definition: BasicBlock.cpp:296
bool isEHPad() const
Return true if this basic block is an exception handling block.
Definition: BasicBlock.h:675
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:239
Conditional or Unconditional Branch instruction.
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
Value * getCondition() const
Analysis pass which computes BranchProbabilityInfo.
BranchProbabilityInfo run(Function &F, FunctionAnalysisManager &AM)
Run the analysis pass over a function and produce BPI.
Legacy analysis pass which computes BranchProbabilityInfo.
void releaseMemory() override
releaseMemory() - This member can be implemented by a pass if it wants to be able to release its memo...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
bool runOnFunction(Function &F) override
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
void print(raw_ostream &OS, const Module *M=nullptr) const override
print - Print out the internal state of the pass.
void getSccEnterBlocks(int SccNum, SmallVectorImpl< BasicBlock * > &Enters) const
Fills in Enters vector with all such blocks that don't belong to SCC with SccNum ID but there is an e...
void getSccExitBlocks(int SccNum, SmallVectorImpl< BasicBlock * > &Exits) const
Fills in Exits vector with all such blocks that don't belong to SCC with SccNum ID but there is an ed...
int getSCCNum(const BasicBlock *BB) const
If BB belongs to some SCC then ID of that SCC is returned, otherwise -1 is returned.
Analysis providing branch probability information.
void eraseBlock(const BasicBlock *BB)
Forget analysis results for the given basic block.
void setEdgeProbability(const BasicBlock *Src, const SmallVectorImpl< BranchProbability > &Probs)
Set the raw probabilities for all edges from the given block.
bool invalidate(Function &, const PreservedAnalyses &PA, FunctionAnalysisManager::Invalidator &)
BranchProbability getEdgeProbability(const BasicBlock *Src, unsigned IndexInSuccessors) const
Get an edge's probability, relative to other out-edges of the Src.
void calculate(const Function &F, const LoopInfo &LI, const TargetLibraryInfo *TLI, DominatorTree *DT, PostDominatorTree *PDT)
bool isEdgeHot(const BasicBlock *Src, const BasicBlock *Dst) const
Test if an edge is hot relative to other out-edges of the Src.
void swapSuccEdgesProbabilities(const BasicBlock *Src)
Swap outgoing edges probabilities for Src with branch terminator.
void print(raw_ostream &OS) const
raw_ostream & printEdgeProbability(raw_ostream &OS, const BasicBlock *Src, const BasicBlock *Dst) const
Print an edge's probability.
void copyEdgeProbabilities(BasicBlock *Src, BasicBlock *Dst)
Copy outgoing edge probabilities from Src to Dst.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
static uint32_t getDenominator()
static BranchProbability getRaw(uint32_t N)
static BranchProbability getOne()
static BranchProbability getUnknown()
uint32_t getNumerator() const
static BranchProbability getZero()
Represents analyses that only rely on functions' control flow.
Definition: Analysis.h:72
This class represents a function call, abstracting a target machine's calling convention.
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:661
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:702
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:700
@ ICMP_EQ
equal
Definition: InstrTypes.h:694
@ ICMP_NE
not equal
Definition: InstrTypes.h:695
bool isTrueWhenEqual() const
This is just a convenience.
Definition: InstrTypes.h:940
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:763
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition: Constants.h:220
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition: Constants.h:214
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:208
This is an important base class in LLVM.
Definition: Constant.h:42
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:156
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition: DenseMap.h:226
bool empty() const
Definition: DenseMap.h:98
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:152
iterator end()
Definition: DenseMap.h:84
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:211
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:317
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
This instruction compares its operands according to the predicate given to the constructor.
static bool isEquality(Predicate Pred)
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
This instruction compares its operands according to the predicate given to the constructor.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:566
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:593
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:39
Metadata node.
Definition: Metadata.h:1069
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Analysis pass which computes a PostDominatorTree.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
bool dominates(const Instruction *I1, const Instruction *I2) const
Return true if I1 dominates I2.
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
PreservedAnalysisChecker getChecker() const
Build a checker for this PreservedAnalyses and the specified analysis type.
Definition: Analysis.h:264
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:363
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:384
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:458
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:683
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:264
Value * getOperand(unsigned i) const
Definition: User.h:228
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
Enumerate the SCCs of a directed graph in reverse topological order of the SCC DAG.
Definition: SCCIterator.h:49
BlockType
Used as immediate MachineOperands for block signatures.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
NodeAddr< FuncNode * > Func
Definition: RDFGraph.h:393
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto pred_end(const MachineBasicBlock *BB)
auto successors(const MachineBasicBlock *BB)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
scc_iterator< T > scc_begin(const T &G)
Construct the begin iterator for a deduced graph type T.
Definition: SCCIterator.h:233
Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
iterator_range< po_iterator< T > > post_order(const T &G)
void initializeBranchProbabilityInfoWrapperPassPass(PassRegistry &)
constexpr T divideNearest(U Numerator, V Denominator)
Returns (Numerator / Denominator) rounded by round-half-up.
Definition: MathExtras.h:467
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:420
MDNode * getValidBranchWeightMDNode(const Instruction &I)
Get the valid branch weights metadata node.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
auto succ_size(const MachineBasicBlock *BB)
Constant * ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL)
Attempt to constant fold a binary operation with the specified operands.
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
@ Mul
Product of integers.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1938
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
auto pred_begin(const MachineBasicBlock *BB)
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1903
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: Analysis.h:28