LLVM 20.0.0git
BranchProbabilityInfo.cpp
Go to the documentation of this file.
1//===- BranchProbabilityInfo.cpp - Branch Probability Analysis ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Loops should be simplified before this analysis.
10//
11//===----------------------------------------------------------------------===//
12
16#include "llvm/ADT/STLExtras.h"
22#include "llvm/IR/Attributes.h"
23#include "llvm/IR/BasicBlock.h"
24#include "llvm/IR/CFG.h"
25#include "llvm/IR/Constants.h"
26#include "llvm/IR/Dominators.h"
27#include "llvm/IR/Function.h"
28#include "llvm/IR/InstrTypes.h"
29#include "llvm/IR/Instruction.h"
31#include "llvm/IR/LLVMContext.h"
32#include "llvm/IR/Metadata.h"
33#include "llvm/IR/PassManager.h"
35#include "llvm/IR/Type.h"
36#include "llvm/IR/Value.h"
38#include "llvm/Pass.h"
42#include "llvm/Support/Debug.h"
44#include <cassert>
45#include <cstdint>
46#include <iterator>
47#include <map>
48#include <utility>
49
50using namespace llvm;
51
52#define DEBUG_TYPE "branch-prob"
53
55 "print-bpi", cl::init(false), cl::Hidden,
56 cl::desc("Print the branch probability info."));
57
59 "print-bpi-func-name", cl::Hidden,
60 cl::desc("The option to specify the name of the function "
61 "whose branch probability info is printed."));
62
64 "Branch Probability Analysis", false, true)
70 "Branch Probability Analysis", false, true)
71
73 : FunctionPass(ID) {
76}
77
79
80// Weights are for internal use only. They are used by heuristics to help to
81// estimate edges' probability. Example:
82//
83// Using "Loop Branch Heuristics" we predict weights of edges for the
84// block BB2.
85// ...
86// |
87// V
88// BB1<-+
89// | |
90// | | (Weight = 124)
91// V |
92// BB2--+
93// |
94// | (Weight = 4)
95// V
96// BB3
97//
98// Probability of the edge BB2->BB1 = 124 / (124 + 4) = 0.96875
99// Probability of the edge BB2->BB3 = 4 / (124 + 4) = 0.03125
100static const uint32_t LBH_TAKEN_WEIGHT = 124;
102
103/// Unreachable-terminating branch taken probability.
104///
105/// This is the probability for a branch being taken to a block that terminates
106/// (eventually) in unreachable. These are predicted as unlikely as possible.
107/// All reachable probability will proportionally share the remaining part.
109
110/// Heuristics and lookup tables for non-loop branches:
111/// Pointer Heuristics (PH)
112static const uint32_t PH_TAKEN_WEIGHT = 20;
113static const uint32_t PH_NONTAKEN_WEIGHT = 12;
114static const BranchProbability
116static const BranchProbability
118
120using ProbabilityTable = std::map<CmpInst::Predicate, ProbabilityList>;
121
122/// Pointer comparisons:
124 {ICmpInst::ICMP_NE, {PtrTakenProb, PtrUntakenProb}}, /// p != q -> Likely
125 {ICmpInst::ICMP_EQ, {PtrUntakenProb, PtrTakenProb}}, /// p == q -> Unlikely
126};
127
128/// Zero Heuristics (ZH)
129static const uint32_t ZH_TAKEN_WEIGHT = 20;
130static const uint32_t ZH_NONTAKEN_WEIGHT = 12;
131static const BranchProbability
133static const BranchProbability
135
136/// Integer compares with 0:
138 {CmpInst::ICMP_EQ, {ZeroUntakenProb, ZeroTakenProb}}, /// X == 0 -> Unlikely
139 {CmpInst::ICMP_NE, {ZeroTakenProb, ZeroUntakenProb}}, /// X != 0 -> Likely
140 {CmpInst::ICMP_SLT, {ZeroUntakenProb, ZeroTakenProb}}, /// X < 0 -> Unlikely
141 {CmpInst::ICMP_SGT, {ZeroTakenProb, ZeroUntakenProb}}, /// X > 0 -> Likely
142};
143
144/// Integer compares with -1:
146 {CmpInst::ICMP_EQ, {ZeroUntakenProb, ZeroTakenProb}}, /// X == -1 -> Unlikely
147 {CmpInst::ICMP_NE, {ZeroTakenProb, ZeroUntakenProb}}, /// X != -1 -> Likely
148 // InstCombine canonicalizes X >= 0 into X > -1
149 {CmpInst::ICMP_SGT, {ZeroTakenProb, ZeroUntakenProb}}, /// X >= 0 -> Likely
150};
151
152/// Integer compares with 1:
154 // InstCombine canonicalizes X <= 0 into X < 1
155 {CmpInst::ICMP_SLT, {ZeroUntakenProb, ZeroTakenProb}}, /// X <= 0 -> Unlikely
156};
157
158/// strcmp and similar functions return zero, negative, or positive, if the
159/// first string is equal, less, or greater than the second. We consider it
160/// likely that the strings are not equal, so a comparison with zero is
161/// probably false, but also a comparison with any other number is also
162/// probably false given that what exactly is returned for nonzero values is
163/// not specified. Any kind of comparison other than equality we know
164/// nothing about.
168};
169
170// Floating-Point Heuristics (FPH)
171static const uint32_t FPH_TAKEN_WEIGHT = 20;
173
174/// This is the probability for an ordered floating point comparison.
175static const uint32_t FPH_ORD_WEIGHT = 1024 * 1024 - 1;
176/// This is the probability for an unordered floating point comparison, it means
177/// one or two of the operands are NaN. Usually it is used to test for an
178/// exceptional case, so the result is unlikely.
179static const uint32_t FPH_UNO_WEIGHT = 1;
180
183static const BranchProbability
185static const BranchProbability
187static const BranchProbability
189
190/// Floating-Point compares:
192 {FCmpInst::FCMP_ORD, {FPOrdTakenProb, FPOrdUntakenProb}}, /// !isnan -> Likely
193 {FCmpInst::FCMP_UNO, {FPOrdUntakenProb, FPOrdTakenProb}}, /// isnan -> Unlikely
194};
195
196/// Set of dedicated "absolute" execution weights for a block. These weights are
197/// meaningful relative to each other and their derivatives only.
198enum class BlockExecWeight : std::uint32_t {
199 /// Special weight used for cases with exact zero probability.
200 ZERO = 0x0,
201 /// Minimal possible non zero weight.
202 LOWEST_NON_ZERO = 0x1,
203 /// Weight to an 'unreachable' block.
205 /// Weight to a block containing non returning call.
207 /// Weight to 'unwind' block of an invoke instruction.
209 /// Weight to a 'cold' block. Cold blocks are the ones containing calls marked
210 /// with attribute 'cold'.
211 COLD = 0xffff,
212 /// Default weight is used in cases when there is no dedicated execution
213 /// weight set. It is not propagated through the domination line either.
214 DEFAULT = 0xfffff
215};
216
218 // Record SCC numbers of blocks in the CFG to identify irreducible loops.
219 // FIXME: We could only calculate this if the CFG is known to be irreducible
220 // (perhaps cache this info in LoopInfo if we can easily calculate it there?).
221 int SccNum = 0;
222 for (scc_iterator<const Function *> It = scc_begin(&F); !It.isAtEnd();
223 ++It, ++SccNum) {
224 // Ignore single-block SCCs since they either aren't loops or LoopInfo will
225 // catch them.
226 const std::vector<const BasicBlock *> &Scc = *It;
227 if (Scc.size() == 1)
228 continue;
229
230 LLVM_DEBUG(dbgs() << "BPI: SCC " << SccNum << ":");
231 for (const auto *BB : Scc) {
232 LLVM_DEBUG(dbgs() << " " << BB->getName());
233 SccNums[BB] = SccNum;
234 calculateSccBlockType(BB, SccNum);
235 }
236 LLVM_DEBUG(dbgs() << "\n");
237 }
238}
239
241 auto SccIt = SccNums.find(BB);
242 if (SccIt == SccNums.end())
243 return -1;
244 return SccIt->second;
245}
246
248 int SccNum, SmallVectorImpl<BasicBlock *> &Enters) const {
249
250 for (auto MapIt : SccBlocks[SccNum]) {
251 const auto *BB = MapIt.first;
252 if (isSCCHeader(BB, SccNum))
253 for (const auto *Pred : predecessors(BB))
254 if (getSCCNum(Pred) != SccNum)
255 Enters.push_back(const_cast<BasicBlock *>(BB));
256 }
257}
258
260 int SccNum, SmallVectorImpl<BasicBlock *> &Exits) const {
261 for (auto MapIt : SccBlocks[SccNum]) {
262 const auto *BB = MapIt.first;
263 if (isSCCExitingBlock(BB, SccNum))
264 for (const auto *Succ : successors(BB))
265 if (getSCCNum(Succ) != SccNum)
266 Exits.push_back(const_cast<BasicBlock *>(Succ));
267 }
268}
269
270uint32_t BranchProbabilityInfo::SccInfo::getSccBlockType(const BasicBlock *BB,
271 int SccNum) const {
272 assert(getSCCNum(BB) == SccNum);
273
274 assert(SccBlocks.size() > static_cast<unsigned>(SccNum) && "Unknown SCC");
275 const auto &SccBlockTypes = SccBlocks[SccNum];
276
277 auto It = SccBlockTypes.find(BB);
278 if (It != SccBlockTypes.end()) {
279 return It->second;
280 }
281 return Inner;
282}
283
284void BranchProbabilityInfo::SccInfo::calculateSccBlockType(const BasicBlock *BB,
285 int SccNum) {
286 assert(getSCCNum(BB) == SccNum);
287 uint32_t BlockType = Inner;
288
289 if (llvm::any_of(predecessors(BB), [&](const BasicBlock *Pred) {
290 // Consider any block that is an entry point to the SCC as
291 // a header.
292 return getSCCNum(Pred) != SccNum;
293 }))
294 BlockType |= Header;
295
296 if (llvm::any_of(successors(BB), [&](const BasicBlock *Succ) {
297 return getSCCNum(Succ) != SccNum;
298 }))
299 BlockType |= Exiting;
300
301 // Lazily compute the set of headers for a given SCC and cache the results
302 // in the SccHeaderMap.
303 if (SccBlocks.size() <= static_cast<unsigned>(SccNum))
304 SccBlocks.resize(SccNum + 1);
305 auto &SccBlockTypes = SccBlocks[SccNum];
306
307 if (BlockType != Inner) {
308 bool IsInserted;
309 std::tie(std::ignore, IsInserted) =
310 SccBlockTypes.insert(std::make_pair(BB, BlockType));
311 assert(IsInserted && "Duplicated block in SCC");
312 }
313}
314
315BranchProbabilityInfo::LoopBlock::LoopBlock(const BasicBlock *BB,
316 const LoopInfo &LI,
317 const SccInfo &SccI)
318 : BB(BB) {
319 LD.first = LI.getLoopFor(BB);
320 if (!LD.first) {
321 LD.second = SccI.getSCCNum(BB);
322 }
323}
324
325bool BranchProbabilityInfo::isLoopEnteringEdge(const LoopEdge &Edge) const {
326 const auto &SrcBlock = Edge.first;
327 const auto &DstBlock = Edge.second;
328 return (DstBlock.getLoop() &&
329 !DstBlock.getLoop()->contains(SrcBlock.getLoop())) ||
330 // Assume that SCCs can't be nested.
331 (DstBlock.getSccNum() != -1 &&
332 SrcBlock.getSccNum() != DstBlock.getSccNum());
333}
334
335bool BranchProbabilityInfo::isLoopExitingEdge(const LoopEdge &Edge) const {
336 return isLoopEnteringEdge({Edge.second, Edge.first});
337}
338
339bool BranchProbabilityInfo::isLoopEnteringExitingEdge(
340 const LoopEdge &Edge) const {
341 return isLoopEnteringEdge(Edge) || isLoopExitingEdge(Edge);
342}
343
344bool BranchProbabilityInfo::isLoopBackEdge(const LoopEdge &Edge) const {
345 const auto &SrcBlock = Edge.first;
346 const auto &DstBlock = Edge.second;
347 return SrcBlock.belongsToSameLoop(DstBlock) &&
348 ((DstBlock.getLoop() &&
349 DstBlock.getLoop()->getHeader() == DstBlock.getBlock()) ||
350 (DstBlock.getSccNum() != -1 &&
351 SccI->isSCCHeader(DstBlock.getBlock(), DstBlock.getSccNum())));
352}
353
354void BranchProbabilityInfo::getLoopEnterBlocks(
355 const LoopBlock &LB, SmallVectorImpl<BasicBlock *> &Enters) const {
356 if (LB.getLoop()) {
357 auto *Header = LB.getLoop()->getHeader();
358 Enters.append(pred_begin(Header), pred_end(Header));
359 } else {
360 assert(LB.getSccNum() != -1 && "LB doesn't belong to any loop?");
361 SccI->getSccEnterBlocks(LB.getSccNum(), Enters);
362 }
363}
364
365void BranchProbabilityInfo::getLoopExitBlocks(
366 const LoopBlock &LB, SmallVectorImpl<BasicBlock *> &Exits) const {
367 if (LB.getLoop()) {
368 LB.getLoop()->getExitBlocks(Exits);
369 } else {
370 assert(LB.getSccNum() != -1 && "LB doesn't belong to any loop?");
371 SccI->getSccExitBlocks(LB.getSccNum(), Exits);
372 }
373}
374
375// Propagate existing explicit probabilities from either profile data or
376// 'expect' intrinsic processing. Examine metadata against unreachable
377// heuristic. The probability of the edge coming to unreachable block is
378// set to min of metadata and unreachable heuristic.
379bool BranchProbabilityInfo::calcMetadataWeights(const BasicBlock *BB) {
380 const Instruction *TI = BB->getTerminator();
381 assert(TI->getNumSuccessors() > 1 && "expected more than one successor!");
382 if (!(isa<BranchInst>(TI) || isa<SwitchInst>(TI) || isa<IndirectBrInst>(TI) ||
383 isa<InvokeInst>(TI) || isa<CallBrInst>(TI)))
384 return false;
385
386 MDNode *WeightsNode = getValidBranchWeightMDNode(*TI);
387 if (!WeightsNode)
388 return false;
389
390 // Check that the number of successors is manageable.
391 assert(TI->getNumSuccessors() < UINT32_MAX && "Too many successors");
392
393 // Build up the final weights that will be used in a temporary buffer.
394 // Compute the sum of all weights to later decide whether they need to
395 // be scaled to fit in 32 bits.
396 uint64_t WeightSum = 0;
398 SmallVector<unsigned, 2> UnreachableIdxs;
399 SmallVector<unsigned, 2> ReachableIdxs;
400
401 extractBranchWeights(WeightsNode, Weights);
402 for (unsigned I = 0, E = Weights.size(); I != E; ++I) {
403 WeightSum += Weights[I];
404 const LoopBlock SrcLoopBB = getLoopBlock(BB);
405 const LoopBlock DstLoopBB = getLoopBlock(TI->getSuccessor(I));
406 auto EstimatedWeight = getEstimatedEdgeWeight({SrcLoopBB, DstLoopBB});
407 if (EstimatedWeight &&
408 *EstimatedWeight <= static_cast<uint32_t>(BlockExecWeight::UNREACHABLE))
409 UnreachableIdxs.push_back(I);
410 else
411 ReachableIdxs.push_back(I);
412 }
413 assert(Weights.size() == TI->getNumSuccessors() && "Checked above");
414
415 // If the sum of weights does not fit in 32 bits, scale every weight down
416 // accordingly.
417 uint64_t ScalingFactor =
418 (WeightSum > UINT32_MAX) ? WeightSum / UINT32_MAX + 1 : 1;
419
420 if (ScalingFactor > 1) {
421 WeightSum = 0;
422 for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) {
423 Weights[I] /= ScalingFactor;
424 WeightSum += Weights[I];
425 }
426 }
427 assert(WeightSum <= UINT32_MAX &&
428 "Expected weights to scale down to 32 bits");
429
430 if (WeightSum == 0 || ReachableIdxs.size() == 0) {
431 for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I)
432 Weights[I] = 1;
433 WeightSum = TI->getNumSuccessors();
434 }
435
436 // Set the probability.
438 for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I)
439 BP.push_back({ Weights[I], static_cast<uint32_t>(WeightSum) });
440
441 // Examine the metadata against unreachable heuristic.
442 // If the unreachable heuristic is more strong then we use it for this edge.
443 if (UnreachableIdxs.size() == 0 || ReachableIdxs.size() == 0) {
444 setEdgeProbability(BB, BP);
445 return true;
446 }
447
448 auto UnreachableProb = UR_TAKEN_PROB;
449 for (auto I : UnreachableIdxs)
450 if (UnreachableProb < BP[I]) {
451 BP[I] = UnreachableProb;
452 }
453
454 // Sum of all edge probabilities must be 1.0. If we modified the probability
455 // of some edges then we must distribute the introduced difference over the
456 // reachable blocks.
457 //
458 // Proportional distribution: the relation between probabilities of the
459 // reachable edges is kept unchanged. That is for any reachable edges i and j:
460 // newBP[i] / newBP[j] == oldBP[i] / oldBP[j] =>
461 // newBP[i] / oldBP[i] == newBP[j] / oldBP[j] == K
462 // Where K is independent of i,j.
463 // newBP[i] == oldBP[i] * K
464 // We need to find K.
465 // Make sum of all reachables of the left and right parts:
466 // sum_of_reachable(newBP) == K * sum_of_reachable(oldBP)
467 // Sum of newBP must be equal to 1.0:
468 // sum_of_reachable(newBP) + sum_of_unreachable(newBP) == 1.0 =>
469 // sum_of_reachable(newBP) = 1.0 - sum_of_unreachable(newBP)
470 // Where sum_of_unreachable(newBP) is what has been just changed.
471 // Finally:
472 // K == sum_of_reachable(newBP) / sum_of_reachable(oldBP) =>
473 // K == (1.0 - sum_of_unreachable(newBP)) / sum_of_reachable(oldBP)
474 BranchProbability NewUnreachableSum = BranchProbability::getZero();
475 for (auto I : UnreachableIdxs)
476 NewUnreachableSum += BP[I];
477
478 BranchProbability NewReachableSum =
479 BranchProbability::getOne() - NewUnreachableSum;
480
482 for (auto I : ReachableIdxs)
483 OldReachableSum += BP[I];
484
485 if (OldReachableSum != NewReachableSum) { // Anything to dsitribute?
486 if (OldReachableSum.isZero()) {
487 // If all oldBP[i] are zeroes then the proportional distribution results
488 // in all zero probabilities and the error stays big. In this case we
489 // evenly spread NewReachableSum over the reachable edges.
490 BranchProbability PerEdge = NewReachableSum / ReachableIdxs.size();
491 for (auto I : ReachableIdxs)
492 BP[I] = PerEdge;
493 } else {
494 for (auto I : ReachableIdxs) {
495 // We use uint64_t to avoid double rounding error of the following
496 // calculation: BP[i] = BP[i] * NewReachableSum / OldReachableSum
497 // The formula is taken from the private constructor
498 // BranchProbability(uint32_t Numerator, uint32_t Denominator)
499 uint64_t Mul = static_cast<uint64_t>(NewReachableSum.getNumerator()) *
500 BP[I].getNumerator();
501 uint32_t Div = static_cast<uint32_t>(
502 divideNearest(Mul, OldReachableSum.getNumerator()));
503 BP[I] = BranchProbability::getRaw(Div);
504 }
505 }
506 }
507
508 setEdgeProbability(BB, BP);
509
510 return true;
511}
512
513// Calculate Edge Weights using "Pointer Heuristics". Predict a comparison
514// between two pointer or pointer and NULL will fail.
515bool BranchProbabilityInfo::calcPointerHeuristics(const BasicBlock *BB) {
516 const BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
517 if (!BI || !BI->isConditional())
518 return false;
519
520 Value *Cond = BI->getCondition();
521 ICmpInst *CI = dyn_cast<ICmpInst>(Cond);
522 if (!CI || !CI->isEquality())
523 return false;
524
525 Value *LHS = CI->getOperand(0);
526
527 if (!LHS->getType()->isPointerTy())
528 return false;
529
530 assert(CI->getOperand(1)->getType()->isPointerTy());
531
532 auto Search = PointerTable.find(CI->getPredicate());
533 if (Search == PointerTable.end())
534 return false;
535 setEdgeProbability(BB, Search->second);
536 return true;
537}
538
539// Compute the unlikely successors to the block BB in the loop L, specifically
540// those that are unlikely because this is a loop, and add them to the
541// UnlikelyBlocks set.
542static void
544 SmallPtrSetImpl<const BasicBlock*> &UnlikelyBlocks) {
545 // Sometimes in a loop we have a branch whose condition is made false by
546 // taking it. This is typically something like
547 // int n = 0;
548 // while (...) {
549 // if (++n >= MAX) {
550 // n = 0;
551 // }
552 // }
553 // In this sort of situation taking the branch means that at the very least it
554 // won't be taken again in the next iteration of the loop, so we should
555 // consider it less likely than a typical branch.
556 //
557 // We detect this by looking back through the graph of PHI nodes that sets the
558 // value that the condition depends on, and seeing if we can reach a successor
559 // block which can be determined to make the condition false.
560 //
561 // FIXME: We currently consider unlikely blocks to be half as likely as other
562 // blocks, but if we consider the example above the likelyhood is actually
563 // 1/MAX. We could therefore be more precise in how unlikely we consider
564 // blocks to be, but it would require more careful examination of the form
565 // of the comparison expression.
566 const BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
567 if (!BI || !BI->isConditional())
568 return;
569
570 // Check if the branch is based on an instruction compared with a constant
571 CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition());
572 if (!CI || !isa<Instruction>(CI->getOperand(0)) ||
573 !isa<Constant>(CI->getOperand(1)))
574 return;
575
576 // Either the instruction must be a PHI, or a chain of operations involving
577 // constants that ends in a PHI which we can then collapse into a single value
578 // if the PHI value is known.
579 Instruction *CmpLHS = dyn_cast<Instruction>(CI->getOperand(0));
580 PHINode *CmpPHI = dyn_cast<PHINode>(CmpLHS);
581 Constant *CmpConst = dyn_cast<Constant>(CI->getOperand(1));
582 // Collect the instructions until we hit a PHI
584 while (!CmpPHI && CmpLHS && isa<BinaryOperator>(CmpLHS) &&
585 isa<Constant>(CmpLHS->getOperand(1))) {
586 // Stop if the chain extends outside of the loop
587 if (!L->contains(CmpLHS))
588 return;
589 InstChain.push_back(cast<BinaryOperator>(CmpLHS));
590 CmpLHS = dyn_cast<Instruction>(CmpLHS->getOperand(0));
591 if (CmpLHS)
592 CmpPHI = dyn_cast<PHINode>(CmpLHS);
593 }
594 if (!CmpPHI || !L->contains(CmpPHI))
595 return;
596
597 // Trace the phi node to find all values that come from successors of BB
598 SmallPtrSet<PHINode*, 8> VisitedInsts;
600 WorkList.push_back(CmpPHI);
601 VisitedInsts.insert(CmpPHI);
602 while (!WorkList.empty()) {
603 PHINode *P = WorkList.pop_back_val();
604 for (BasicBlock *B : P->blocks()) {
605 // Skip blocks that aren't part of the loop
606 if (!L->contains(B))
607 continue;
608 Value *V = P->getIncomingValueForBlock(B);
609 // If the source is a PHI add it to the work list if we haven't
610 // already visited it.
611 if (PHINode *PN = dyn_cast<PHINode>(V)) {
612 if (VisitedInsts.insert(PN).second)
613 WorkList.push_back(PN);
614 continue;
615 }
616 // If this incoming value is a constant and B is a successor of BB, then
617 // we can constant-evaluate the compare to see if it makes the branch be
618 // taken or not.
619 Constant *CmpLHSConst = dyn_cast<Constant>(V);
620 if (!CmpLHSConst || !llvm::is_contained(successors(BB), B))
621 continue;
622 // First collapse InstChain
623 const DataLayout &DL = BB->getDataLayout();
624 for (Instruction *I : llvm::reverse(InstChain)) {
625 CmpLHSConst = ConstantFoldBinaryOpOperands(
626 I->getOpcode(), CmpLHSConst, cast<Constant>(I->getOperand(1)), DL);
627 if (!CmpLHSConst)
628 break;
629 }
630 if (!CmpLHSConst)
631 continue;
632 // Now constant-evaluate the compare
634 CI->getPredicate(), CmpLHSConst, CmpConst, DL);
635 // If the result means we don't branch to the block then that block is
636 // unlikely.
637 if (Result &&
638 ((Result->isZeroValue() && B == BI->getSuccessor(0)) ||
639 (Result->isOneValue() && B == BI->getSuccessor(1))))
640 UnlikelyBlocks.insert(B);
641 }
642 }
643}
644
645std::optional<uint32_t>
646BranchProbabilityInfo::getEstimatedBlockWeight(const BasicBlock *BB) const {
647 auto WeightIt = EstimatedBlockWeight.find(BB);
648 if (WeightIt == EstimatedBlockWeight.end())
649 return std::nullopt;
650 return WeightIt->second;
651}
652
653std::optional<uint32_t>
654BranchProbabilityInfo::getEstimatedLoopWeight(const LoopData &L) const {
655 auto WeightIt = EstimatedLoopWeight.find(L);
656 if (WeightIt == EstimatedLoopWeight.end())
657 return std::nullopt;
658 return WeightIt->second;
659}
660
661std::optional<uint32_t>
662BranchProbabilityInfo::getEstimatedEdgeWeight(const LoopEdge &Edge) const {
663 // For edges entering a loop take weight of a loop rather than an individual
664 // block in the loop.
665 return isLoopEnteringEdge(Edge)
666 ? getEstimatedLoopWeight(Edge.second.getLoopData())
667 : getEstimatedBlockWeight(Edge.second.getBlock());
668}
669
670template <class IterT>
671std::optional<uint32_t> BranchProbabilityInfo::getMaxEstimatedEdgeWeight(
672 const LoopBlock &SrcLoopBB, iterator_range<IterT> Successors) const {
674 std::optional<uint32_t> MaxWeight;
675 for (const BasicBlock *DstBB : Successors) {
676 const LoopBlock DstLoopBB = getLoopBlock(DstBB);
677 auto Weight = getEstimatedEdgeWeight({SrcLoopBB, DstLoopBB});
678
679 if (!Weight)
680 return std::nullopt;
681
682 if (!MaxWeight || *MaxWeight < *Weight)
683 MaxWeight = Weight;
684 }
685
686 return MaxWeight;
687}
688
689// Updates \p LoopBB's weight and returns true. If \p LoopBB has already
690// an associated weight it is unchanged and false is returned.
691//
692// Please note by the algorithm the weight is not expected to change once set
693// thus 'false' status is used to track visited blocks.
694bool BranchProbabilityInfo::updateEstimatedBlockWeight(
695 LoopBlock &LoopBB, uint32_t BBWeight,
696 SmallVectorImpl<BasicBlock *> &BlockWorkList,
697 SmallVectorImpl<LoopBlock> &LoopWorkList) {
698 BasicBlock *BB = LoopBB.getBlock();
699
700 // In general, weight is assigned to a block when it has final value and
701 // can't/shouldn't be changed. However, there are cases when a block
702 // inherently has several (possibly "contradicting") weights. For example,
703 // "unwind" block may also contain "cold" call. In that case the first
704 // set weight is favored and all consequent weights are ignored.
705 if (!EstimatedBlockWeight.insert({BB, BBWeight}).second)
706 return false;
707
708 for (BasicBlock *PredBlock : predecessors(BB)) {
709 LoopBlock PredLoop = getLoopBlock(PredBlock);
710 // Add affected block/loop to a working list.
711 if (isLoopExitingEdge({PredLoop, LoopBB})) {
712 if (!EstimatedLoopWeight.count(PredLoop.getLoopData()))
713 LoopWorkList.push_back(PredLoop);
714 } else if (!EstimatedBlockWeight.count(PredBlock))
715 BlockWorkList.push_back(PredBlock);
716 }
717 return true;
718}
719
720// Starting from \p BB traverse through dominator blocks and assign \p BBWeight
721// to all such blocks that are post dominated by \BB. In other words to all
722// blocks that the one is executed if and only if another one is executed.
723// Importantly, we skip loops here for two reasons. First weights of blocks in
724// a loop should be scaled by trip count (yet possibly unknown). Second there is
725// no any value in doing that because that doesn't give any additional
726// information regarding distribution of probabilities inside the loop.
727// Exception is loop 'enter' and 'exit' edges that are handled in a special way
728// at calcEstimatedHeuristics.
729//
730// In addition, \p WorkList is populated with basic blocks if at leas one
731// successor has updated estimated weight.
732void BranchProbabilityInfo::propagateEstimatedBlockWeight(
733 const LoopBlock &LoopBB, DominatorTree *DT, PostDominatorTree *PDT,
734 uint32_t BBWeight, SmallVectorImpl<BasicBlock *> &BlockWorkList,
735 SmallVectorImpl<LoopBlock> &LoopWorkList) {
736 const BasicBlock *BB = LoopBB.getBlock();
737 const auto *DTStartNode = DT->getNode(BB);
738 const auto *PDTStartNode = PDT->getNode(BB);
739
740 // TODO: Consider propagating weight down the domination line as well.
741 for (const auto *DTNode = DTStartNode; DTNode != nullptr;
742 DTNode = DTNode->getIDom()) {
743 auto *DomBB = DTNode->getBlock();
744 // Consider blocks which lie on one 'line'.
745 if (!PDT->dominates(PDTStartNode, PDT->getNode(DomBB)))
746 // If BB doesn't post dominate DomBB it will not post dominate dominators
747 // of DomBB as well.
748 break;
749
750 LoopBlock DomLoopBB = getLoopBlock(DomBB);
751 const LoopEdge Edge{DomLoopBB, LoopBB};
752 // Don't propagate weight to blocks belonging to different loops.
753 if (!isLoopEnteringExitingEdge(Edge)) {
754 if (!updateEstimatedBlockWeight(DomLoopBB, BBWeight, BlockWorkList,
755 LoopWorkList))
756 // If DomBB has weight set then all it's predecessors are already
757 // processed (since we propagate weight up to the top of IR each time).
758 break;
759 } else if (isLoopExitingEdge(Edge)) {
760 LoopWorkList.push_back(DomLoopBB);
761 }
762 }
763}
764
765std::optional<uint32_t>
766BranchProbabilityInfo::getInitialEstimatedBlockWeight(const BasicBlock *BB) {
767 // Returns true if \p BB has call marked with "NoReturn" attribute.
768 auto hasNoReturn = [&](const BasicBlock *BB) {
769 for (const auto &I : reverse(*BB))
770 if (const CallInst *CI = dyn_cast<CallInst>(&I))
771 if (CI->hasFnAttr(Attribute::NoReturn))
772 return true;
773
774 return false;
775 };
776
777 // Important note regarding the order of checks. They are ordered by weight
778 // from lowest to highest. Doing that allows to avoid "unstable" results
779 // when several conditions heuristics can be applied simultaneously.
780 if (isa<UnreachableInst>(BB->getTerminator()) ||
781 // If this block is terminated by a call to
782 // @llvm.experimental.deoptimize then treat it like an unreachable
783 // since it is expected to practically never execute.
784 // TODO: Should we actually treat as never returning call?
786 return hasNoReturn(BB)
787 ? static_cast<uint32_t>(BlockExecWeight::NORETURN)
788 : static_cast<uint32_t>(BlockExecWeight::UNREACHABLE);
789
790 // Check if the block is an exception handling block.
791 if (BB->isEHPad())
792 return static_cast<uint32_t>(BlockExecWeight::UNWIND);
793
794 // Check if the block contains 'cold' call.
795 for (const auto &I : *BB)
796 if (const CallInst *CI = dyn_cast<CallInst>(&I))
797 if (CI->hasFnAttr(Attribute::Cold))
798 return static_cast<uint32_t>(BlockExecWeight::COLD);
799
800 return std::nullopt;
801}
802
803// Does RPO traversal over all blocks in \p F and assigns weights to
804// 'unreachable', 'noreturn', 'cold', 'unwind' blocks. In addition it does its
805// best to propagate the weight to up/down the IR.
806void BranchProbabilityInfo::computeEestimateBlockWeight(
807 const Function &F, DominatorTree *DT, PostDominatorTree *PDT) {
808 SmallVector<BasicBlock *, 8> BlockWorkList;
809 SmallVector<LoopBlock, 8> LoopWorkList;
811
812 // By doing RPO we make sure that all predecessors already have weights
813 // calculated before visiting theirs successors.
815 for (const auto *BB : RPOT)
816 if (auto BBWeight = getInitialEstimatedBlockWeight(BB))
817 // If we were able to find estimated weight for the block set it to this
818 // block and propagate up the IR.
819 propagateEstimatedBlockWeight(getLoopBlock(BB), DT, PDT, *BBWeight,
820 BlockWorkList, LoopWorkList);
821
822 // BlockWorklist/LoopWorkList contains blocks/loops with at least one
823 // successor/exit having estimated weight. Try to propagate weight to such
824 // blocks/loops from successors/exits.
825 // Process loops and blocks. Order is not important.
826 do {
827 while (!LoopWorkList.empty()) {
828 const LoopBlock LoopBB = LoopWorkList.pop_back_val();
829 const LoopData LD = LoopBB.getLoopData();
830 if (EstimatedLoopWeight.count(LD))
831 continue;
832
833 auto Res = LoopExitBlocks.try_emplace(LD);
834 SmallVectorImpl<BasicBlock *> &Exits = Res.first->second;
835 if (Res.second)
836 getLoopExitBlocks(LoopBB, Exits);
837 auto LoopWeight = getMaxEstimatedEdgeWeight(
838 LoopBB, make_range(Exits.begin(), Exits.end()));
839
840 if (LoopWeight) {
841 // If we never exit the loop then we can enter it once at maximum.
842 if (LoopWeight <= static_cast<uint32_t>(BlockExecWeight::UNREACHABLE))
843 LoopWeight = static_cast<uint32_t>(BlockExecWeight::LOWEST_NON_ZERO);
844
845 EstimatedLoopWeight.insert({LD, *LoopWeight});
846 // Add all blocks entering the loop into working list.
847 getLoopEnterBlocks(LoopBB, BlockWorkList);
848 }
849 }
850
851 while (!BlockWorkList.empty()) {
852 // We can reach here only if BlockWorkList is not empty.
853 const BasicBlock *BB = BlockWorkList.pop_back_val();
854 if (EstimatedBlockWeight.count(BB))
855 continue;
856
857 // We take maximum over all weights of successors. In other words we take
858 // weight of "hot" path. In theory we can probably find a better function
859 // which gives higher accuracy results (comparing to "maximum") but I
860 // can't
861 // think of any right now. And I doubt it will make any difference in
862 // practice.
863 const LoopBlock LoopBB = getLoopBlock(BB);
864 auto MaxWeight = getMaxEstimatedEdgeWeight(LoopBB, successors(BB));
865
866 if (MaxWeight)
867 propagateEstimatedBlockWeight(LoopBB, DT, PDT, *MaxWeight,
868 BlockWorkList, LoopWorkList);
869 }
870 } while (!BlockWorkList.empty() || !LoopWorkList.empty());
871}
872
873// Calculate edge probabilities based on block's estimated weight.
874// Note that gathered weights were not scaled for loops. Thus edges entering
875// and exiting loops requires special processing.
876bool BranchProbabilityInfo::calcEstimatedHeuristics(const BasicBlock *BB) {
878 "expected more than one successor!");
879
880 const LoopBlock LoopBB = getLoopBlock(BB);
881
884 if (LoopBB.getLoop())
885 computeUnlikelySuccessors(BB, LoopBB.getLoop(), UnlikelyBlocks);
886
887 // Changed to 'true' if at least one successor has estimated weight.
888 bool FoundEstimatedWeight = false;
889 SmallVector<uint32_t, 4> SuccWeights;
890 uint64_t TotalWeight = 0;
891 // Go over all successors of BB and put their weights into SuccWeights.
892 for (const BasicBlock *SuccBB : successors(BB)) {
893 std::optional<uint32_t> Weight;
894 const LoopBlock SuccLoopBB = getLoopBlock(SuccBB);
895 const LoopEdge Edge{LoopBB, SuccLoopBB};
896
897 Weight = getEstimatedEdgeWeight(Edge);
898
899 if (isLoopExitingEdge(Edge) &&
900 // Avoid adjustment of ZERO weight since it should remain unchanged.
901 Weight != static_cast<uint32_t>(BlockExecWeight::ZERO)) {
902 // Scale down loop exiting weight by trip count.
903 Weight = std::max(
904 static_cast<uint32_t>(BlockExecWeight::LOWEST_NON_ZERO),
905 Weight.value_or(static_cast<uint32_t>(BlockExecWeight::DEFAULT)) /
906 TC);
907 }
908 bool IsUnlikelyEdge = LoopBB.getLoop() && UnlikelyBlocks.contains(SuccBB);
909 if (IsUnlikelyEdge &&
910 // Avoid adjustment of ZERO weight since it should remain unchanged.
911 Weight != static_cast<uint32_t>(BlockExecWeight::ZERO)) {
912 // 'Unlikely' blocks have twice lower weight.
913 Weight = std::max(
914 static_cast<uint32_t>(BlockExecWeight::LOWEST_NON_ZERO),
915 Weight.value_or(static_cast<uint32_t>(BlockExecWeight::DEFAULT)) / 2);
916 }
917
918 if (Weight)
919 FoundEstimatedWeight = true;
920
921 auto WeightVal =
922 Weight.value_or(static_cast<uint32_t>(BlockExecWeight::DEFAULT));
923 TotalWeight += WeightVal;
924 SuccWeights.push_back(WeightVal);
925 }
926
927 // If non of blocks have estimated weight bail out.
928 // If TotalWeight is 0 that means weight of each successor is 0 as well and
929 // equally likely. Bail out early to not deal with devision by zero.
930 if (!FoundEstimatedWeight || TotalWeight == 0)
931 return false;
932
933 assert(SuccWeights.size() == succ_size(BB) && "Missed successor?");
934 const unsigned SuccCount = SuccWeights.size();
935
936 // If the sum of weights does not fit in 32 bits, scale every weight down
937 // accordingly.
938 if (TotalWeight > UINT32_MAX) {
939 uint64_t ScalingFactor = TotalWeight / UINT32_MAX + 1;
940 TotalWeight = 0;
941 for (unsigned Idx = 0; Idx < SuccCount; ++Idx) {
942 SuccWeights[Idx] /= ScalingFactor;
943 if (SuccWeights[Idx] == static_cast<uint32_t>(BlockExecWeight::ZERO))
944 SuccWeights[Idx] =
945 static_cast<uint32_t>(BlockExecWeight::LOWEST_NON_ZERO);
946 TotalWeight += SuccWeights[Idx];
947 }
948 assert(TotalWeight <= UINT32_MAX && "Total weight overflows");
949 }
950
951 // Finally set probabilities to edges according to estimated block weights.
952 SmallVector<BranchProbability, 4> EdgeProbabilities(
953 SuccCount, BranchProbability::getUnknown());
954
955 for (unsigned Idx = 0; Idx < SuccCount; ++Idx) {
956 EdgeProbabilities[Idx] =
957 BranchProbability(SuccWeights[Idx], (uint32_t)TotalWeight);
958 }
959 setEdgeProbability(BB, EdgeProbabilities);
960 return true;
961}
962
963bool BranchProbabilityInfo::calcZeroHeuristics(const BasicBlock *BB,
964 const TargetLibraryInfo *TLI) {
965 const BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
966 if (!BI || !BI->isConditional())
967 return false;
968
969 Value *Cond = BI->getCondition();
970 ICmpInst *CI = dyn_cast<ICmpInst>(Cond);
971 if (!CI)
972 return false;
973
974 auto GetConstantInt = [](Value *V) {
975 if (auto *I = dyn_cast<BitCastInst>(V))
976 return dyn_cast<ConstantInt>(I->getOperand(0));
977 return dyn_cast<ConstantInt>(V);
978 };
979
980 Value *RHS = CI->getOperand(1);
981 ConstantInt *CV = GetConstantInt(RHS);
982 if (!CV)
983 return false;
984
985 // If the LHS is the result of AND'ing a value with a single bit bitmask,
986 // we don't have information about probabilities.
987 if (Instruction *LHS = dyn_cast<Instruction>(CI->getOperand(0)))
988 if (LHS->getOpcode() == Instruction::And)
989 if (ConstantInt *AndRHS = GetConstantInt(LHS->getOperand(1)))
990 if (AndRHS->getValue().isPowerOf2())
991 return false;
992
993 // Check if the LHS is the return value of a library function
995 if (TLI)
996 if (CallInst *Call = dyn_cast<CallInst>(CI->getOperand(0)))
997 if (Function *CalledFn = Call->getCalledFunction())
998 TLI->getLibFunc(*CalledFn, Func);
999
1000 ProbabilityTable::const_iterator Search;
1001 if (Func == LibFunc_strcasecmp ||
1002 Func == LibFunc_strcmp ||
1003 Func == LibFunc_strncasecmp ||
1004 Func == LibFunc_strncmp ||
1005 Func == LibFunc_memcmp ||
1006 Func == LibFunc_bcmp) {
1007 Search = ICmpWithLibCallTable.find(CI->getPredicate());
1008 if (Search == ICmpWithLibCallTable.end())
1009 return false;
1010 } else if (CV->isZero()) {
1011 Search = ICmpWithZeroTable.find(CI->getPredicate());
1012 if (Search == ICmpWithZeroTable.end())
1013 return false;
1014 } else if (CV->isOne()) {
1015 Search = ICmpWithOneTable.find(CI->getPredicate());
1016 if (Search == ICmpWithOneTable.end())
1017 return false;
1018 } else if (CV->isMinusOne()) {
1019 Search = ICmpWithMinusOneTable.find(CI->getPredicate());
1020 if (Search == ICmpWithMinusOneTable.end())
1021 return false;
1022 } else {
1023 return false;
1024 }
1025
1026 setEdgeProbability(BB, Search->second);
1027 return true;
1028}
1029
1030bool BranchProbabilityInfo::calcFloatingPointHeuristics(const BasicBlock *BB) {
1031 const BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
1032 if (!BI || !BI->isConditional())
1033 return false;
1034
1035 Value *Cond = BI->getCondition();
1036 FCmpInst *FCmp = dyn_cast<FCmpInst>(Cond);
1037 if (!FCmp)
1038 return false;
1039
1040 ProbabilityList ProbList;
1041 if (FCmp->isEquality()) {
1042 ProbList = !FCmp->isTrueWhenEqual() ?
1043 // f1 == f2 -> Unlikely
1045 // f1 != f2 -> Likely
1047 } else {
1048 auto Search = FCmpTable.find(FCmp->getPredicate());
1049 if (Search == FCmpTable.end())
1050 return false;
1051 ProbList = Search->second;
1052 }
1053
1054 setEdgeProbability(BB, ProbList);
1055 return true;
1056}
1057
1059 Probs.clear();
1060 Handles.clear();
1061}
1062
1065 // Check whether the analysis, all analyses on functions, or the function's
1066 // CFG have been preserved.
1067 auto PAC = PA.getChecker<BranchProbabilityAnalysis>();
1068 return !(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Function>>() ||
1069 PAC.preservedSet<CFGAnalyses>());
1070}
1071
1073 OS << "---- Branch Probabilities ----\n";
1074 // We print the probabilities from the last function the analysis ran over,
1075 // or the function it is currently running over.
1076 assert(LastF && "Cannot print prior to running over a function");
1077 for (const auto &BI : *LastF) {
1078 for (const BasicBlock *Succ : successors(&BI))
1079 printEdgeProbability(OS << " ", &BI, Succ);
1080 }
1081}
1082
1084isEdgeHot(const BasicBlock *Src, const BasicBlock *Dst) const {
1085 // Hot probability is at least 4/5 = 80%
1086 // FIXME: Compare against a static "hot" BranchProbability.
1087 return getEdgeProbability(Src, Dst) > BranchProbability(4, 5);
1088}
1089
1090/// Get the raw edge probability for the edge. If can't find it, return a
1091/// default probability 1/N where N is the number of successors. Here an edge is
1092/// specified using PredBlock and an
1093/// index to the successors.
1096 unsigned IndexInSuccessors) const {
1097 auto I = Probs.find(std::make_pair(Src, IndexInSuccessors));
1098 assert((Probs.end() == Probs.find(std::make_pair(Src, 0))) ==
1099 (Probs.end() == I) &&
1100 "Probability for I-th successor must always be defined along with the "
1101 "probability for the first successor");
1102
1103 if (I != Probs.end())
1104 return I->second;
1105
1106 return {1, static_cast<uint32_t>(succ_size(Src))};
1107}
1108
1111 const_succ_iterator Dst) const {
1112 return getEdgeProbability(Src, Dst.getSuccessorIndex());
1113}
1114
1115/// Get the raw edge probability calculated for the block pair. This returns the
1116/// sum of all raw edge probabilities from Src to Dst.
1119 const BasicBlock *Dst) const {
1120 if (!Probs.count(std::make_pair(Src, 0)))
1121 return BranchProbability(llvm::count(successors(Src), Dst), succ_size(Src));
1122
1123 auto Prob = BranchProbability::getZero();
1124 for (const_succ_iterator I = succ_begin(Src), E = succ_end(Src); I != E; ++I)
1125 if (*I == Dst)
1126 Prob += Probs.find(std::make_pair(Src, I.getSuccessorIndex()))->second;
1127
1128 return Prob;
1129}
1130
1131/// Set the edge probability for all edges at once.
1133 const BasicBlock *Src, const SmallVectorImpl<BranchProbability> &Probs) {
1134 assert(Src->getTerminator()->getNumSuccessors() == Probs.size());
1135 eraseBlock(Src); // Erase stale data if any.
1136 if (Probs.size() == 0)
1137 return; // Nothing to set.
1138
1139 Handles.insert(BasicBlockCallbackVH(Src, this));
1140 uint64_t TotalNumerator = 0;
1141 for (unsigned SuccIdx = 0; SuccIdx < Probs.size(); ++SuccIdx) {
1142 this->Probs[std::make_pair(Src, SuccIdx)] = Probs[SuccIdx];
1143 LLVM_DEBUG(dbgs() << "set edge " << Src->getName() << " -> " << SuccIdx
1144 << " successor probability to " << Probs[SuccIdx]
1145 << "\n");
1146 TotalNumerator += Probs[SuccIdx].getNumerator();
1147 }
1148
1149 // Because of rounding errors the total probability cannot be checked to be
1150 // 1.0 exactly. That is TotalNumerator == BranchProbability::getDenominator.
1151 // Instead, every single probability in Probs must be as accurate as possible.
1152 // This results in error 1/denominator at most, thus the total absolute error
1153 // should be within Probs.size / BranchProbability::getDenominator.
1154 assert(TotalNumerator <= BranchProbability::getDenominator() + Probs.size());
1155 assert(TotalNumerator >= BranchProbability::getDenominator() - Probs.size());
1156 (void)TotalNumerator;
1157}
1158
1160 BasicBlock *Dst) {
1161 eraseBlock(Dst); // Erase stale data if any.
1162 unsigned NumSuccessors = Src->getTerminator()->getNumSuccessors();
1163 assert(NumSuccessors == Dst->getTerminator()->getNumSuccessors());
1164 if (NumSuccessors == 0)
1165 return; // Nothing to set.
1166 if (!this->Probs.contains(std::make_pair(Src, 0)))
1167 return; // No probability is set for edges from Src. Keep the same for Dst.
1168
1169 Handles.insert(BasicBlockCallbackVH(Dst, this));
1170 for (unsigned SuccIdx = 0; SuccIdx < NumSuccessors; ++SuccIdx) {
1171 auto Prob = this->Probs[std::make_pair(Src, SuccIdx)];
1172 this->Probs[std::make_pair(Dst, SuccIdx)] = Prob;
1173 LLVM_DEBUG(dbgs() << "set edge " << Dst->getName() << " -> " << SuccIdx
1174 << " successor probability to " << Prob << "\n");
1175 }
1176}
1177
1179 assert(Src->getTerminator()->getNumSuccessors() == 2);
1180 if (!Probs.contains(std::make_pair(Src, 0)))
1181 return; // No probability is set for edges from Src
1182 assert(Probs.contains(std::make_pair(Src, 1)));
1183 std::swap(Probs[std::make_pair(Src, 0)], Probs[std::make_pair(Src, 1)]);
1184}
1185
1188 const BasicBlock *Src,
1189 const BasicBlock *Dst) const {
1190 const BranchProbability Prob = getEdgeProbability(Src, Dst);
1191 OS << "edge ";
1192 Src->printAsOperand(OS, false, Src->getModule());
1193 OS << " -> ";
1194 Dst->printAsOperand(OS, false, Dst->getModule());
1195 OS << " probability is " << Prob
1196 << (isEdgeHot(Src, Dst) ? " [HOT edge]\n" : "\n");
1197
1198 return OS;
1199}
1200
1202 LLVM_DEBUG(dbgs() << "eraseBlock " << BB->getName() << "\n");
1203
1204 // Note that we cannot use successors of BB because the terminator of BB may
1205 // have changed when eraseBlock is called as a BasicBlockCallbackVH callback.
1206 // Instead we remove prob data for the block by iterating successors by their
1207 // indices from 0 till the last which exists. There could not be prob data for
1208 // a pair (BB, N) if there is no data for (BB, N-1) because the data is always
1209 // set for all successors from 0 to M at once by the method
1210 // setEdgeProbability().
1211 Handles.erase(BasicBlockCallbackVH(BB, this));
1212 for (unsigned I = 0;; ++I) {
1213 auto MapI = Probs.find(std::make_pair(BB, I));
1214 if (MapI == Probs.end()) {
1215 assert(Probs.count(std::make_pair(BB, I + 1)) == 0 &&
1216 "Must be no more successors");
1217 return;
1218 }
1219 Probs.erase(MapI);
1220 }
1221}
1222
1224 const TargetLibraryInfo *TLI,
1225 DominatorTree *DT,
1226 PostDominatorTree *PDT) {
1227 LLVM_DEBUG(dbgs() << "---- Branch Probability Info : " << F.getName()
1228 << " ----\n\n");
1229 LastF = &F; // Store the last function we ran on for printing.
1230 LI = &LoopI;
1231
1232 SccI = std::make_unique<SccInfo>(F);
1233
1234 assert(EstimatedBlockWeight.empty());
1235 assert(EstimatedLoopWeight.empty());
1236
1237 std::unique_ptr<DominatorTree> DTPtr;
1238 std::unique_ptr<PostDominatorTree> PDTPtr;
1239
1240 if (!DT) {
1241 DTPtr = std::make_unique<DominatorTree>(const_cast<Function &>(F));
1242 DT = DTPtr.get();
1243 }
1244
1245 if (!PDT) {
1246 PDTPtr = std::make_unique<PostDominatorTree>(const_cast<Function &>(F));
1247 PDT = PDTPtr.get();
1248 }
1249
1250 computeEestimateBlockWeight(F, DT, PDT);
1251
1252 // Walk the basic blocks in post-order so that we can build up state about
1253 // the successors of a block iteratively.
1254 for (const auto *BB : post_order(&F.getEntryBlock())) {
1255 LLVM_DEBUG(dbgs() << "Computing probabilities for " << BB->getName()
1256 << "\n");
1257 // If there is no at least two successors, no sense to set probability.
1258 if (BB->getTerminator()->getNumSuccessors() < 2)
1259 continue;
1260 if (calcMetadataWeights(BB))
1261 continue;
1262 if (calcEstimatedHeuristics(BB))
1263 continue;
1264 if (calcPointerHeuristics(BB))
1265 continue;
1266 if (calcZeroHeuristics(BB, TLI))
1267 continue;
1268 if (calcFloatingPointHeuristics(BB))
1269 continue;
1270 }
1271
1272 EstimatedLoopWeight.clear();
1273 EstimatedBlockWeight.clear();
1274 SccI.reset();
1275
1276 if (PrintBranchProb && (PrintBranchProbFuncName.empty() ||
1277 F.getName() == PrintBranchProbFuncName)) {
1278 print(dbgs());
1279 }
1280}
1281
1283 AnalysisUsage &AU) const {
1284 // We require DT so it's available when LI is available. The LI updating code
1285 // asserts that DT is also present so if we don't make sure that we have DT
1286 // here, that assert will trigger.
1292 AU.setPreservesAll();
1293}
1294
1296 const LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1297 const TargetLibraryInfo &TLI =
1298 getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
1299 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1300 PostDominatorTree &PDT =
1301 getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
1302 BPI.calculate(F, LI, &TLI, &DT, &PDT);
1303 return false;
1304}
1305
1307
1309 const Module *) const {
1310 BPI.print(OS);
1311}
1312
1313AnalysisKey BranchProbabilityAnalysis::Key;
1316 auto &LI = AM.getResult<LoopAnalysis>(F);
1317 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
1318 auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
1319 auto &PDT = AM.getResult<PostDominatorTreeAnalysis>(F);
1321 BPI.calculate(F, LI, &TLI, &DT, &PDT);
1322 return BPI;
1323}
1324
1327 OS << "Printing analysis 'Branch Probability Analysis' for function '"
1328 << F.getName() << "':\n";
1330 return PreservedAnalyses::all();
1331}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
basic Basic Alias true
BlockExecWeight
Set of dedicated "absolute" execution weights for a block.
@ NORETURN
Weight to a block containing non returning call.
@ UNWIND
Weight to 'unwind' block of an invoke instruction.
@ COLD
Weight to a 'cold' block.
@ ZERO
Special weight used for cases with exact zero probability.
@ UNREACHABLE
Weight to an 'unreachable' block.
@ DEFAULT
Default weight is used in cases when there is no dedicated execution weight set.
@ LOWEST_NON_ZERO
Minimal possible non zero weight.
static const uint32_t FPH_TAKEN_WEIGHT
static const BranchProbability FPUntakenProb(FPH_NONTAKEN_WEIGHT, FPH_TAKEN_WEIGHT+FPH_NONTAKEN_WEIGHT)
static const BranchProbability ZeroUntakenProb(ZH_NONTAKEN_WEIGHT, ZH_TAKEN_WEIGHT+ZH_NONTAKEN_WEIGHT)
static const uint32_t LBH_TAKEN_WEIGHT
static const uint32_t ZH_NONTAKEN_WEIGHT
static const ProbabilityTable ICmpWithLibCallTable
strcmp and similar functions return zero, negative, or positive, if the first string is equal,...
static const ProbabilityTable ICmpWithMinusOneTable
Integer compares with -1:
branch prob
static const BranchProbability FPOrdTakenProb(FPH_ORD_WEIGHT, FPH_ORD_WEIGHT+FPH_UNO_WEIGHT)
static const ProbabilityTable ICmpWithZeroTable
Integer compares with 0:
static const uint32_t PH_NONTAKEN_WEIGHT
std::map< CmpInst::Predicate, ProbabilityList > ProbabilityTable
static const BranchProbability PtrTakenProb(PH_TAKEN_WEIGHT, PH_TAKEN_WEIGHT+PH_NONTAKEN_WEIGHT)
static const BranchProbability ZeroTakenProb(ZH_TAKEN_WEIGHT, ZH_TAKEN_WEIGHT+ZH_NONTAKEN_WEIGHT)
static const BranchProbability FPOrdUntakenProb(FPH_UNO_WEIGHT, FPH_ORD_WEIGHT+FPH_UNO_WEIGHT)
static const uint32_t PH_TAKEN_WEIGHT
Heuristics and lookup tables for non-loop branches: Pointer Heuristics (PH)
static const BranchProbability FPTakenProb(FPH_TAKEN_WEIGHT, FPH_TAKEN_WEIGHT+FPH_NONTAKEN_WEIGHT)
branch Branch Probability Analysis
static const BranchProbability PtrUntakenProb(PH_NONTAKEN_WEIGHT, PH_TAKEN_WEIGHT+PH_NONTAKEN_WEIGHT)
SmallVector< BranchProbability > ProbabilityList
static const ProbabilityTable ICmpWithOneTable
Integer compares with 1:
static const uint32_t ZH_TAKEN_WEIGHT
Zero Heuristics (ZH)
static const uint32_t FPH_NONTAKEN_WEIGHT
static const BranchProbability UR_TAKEN_PROB
Unreachable-terminating branch taken probability.
static const ProbabilityTable FCmpTable
Floating-Point compares:
static const uint32_t LBH_NONTAKEN_WEIGHT
static const ProbabilityTable PointerTable
Pointer comparisons:
static const uint32_t FPH_ORD_WEIGHT
This is the probability for an ordered floating point comparison.
static void computeUnlikelySuccessors(const BasicBlock *BB, Loop *L, SmallPtrSetImpl< const BasicBlock * > &UnlikelyBlocks)
static const uint32_t FPH_UNO_WEIGHT
This is the probability for an unordered floating point comparison, it means one or two of the operan...
cl::opt< std::string > PrintBranchProbFuncName("print-bpi-func-name", cl::Hidden, cl::desc("The option to specify the name of the function " "whose branch probability info is printed."))
static cl::opt< bool > PrintBranchProb("print-bpi", cl::init(false), cl::Hidden, cl::desc("Print the branch probability info."))
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file contains the declarations for metadata subclasses.
#define P(N)
This header defines various interfaces for pass management in LLVM.
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
const MachineOperand & RHS
This builds on the llvm/ADT/GraphTraits.h file to find the strongly connected components (SCCs) of a ...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
raw_pwrite_stream & OS
This file defines the SmallVector class.
Value * LHS
This templated class represents "all analyses that operate over <a particular IR unit>" (e....
Definition: Analysis.h:49
API to communicate dependencies between analyses during invalidation.
Definition: PassManager.h:292
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:405
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesAll()
Set by analyses that do not transform their input at all.
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
const CallInst * getTerminatingDeoptimizeCall() const
Returns the call instruction calling @llvm.experimental.deoptimize prior to the terminating return in...
Definition: BasicBlock.cpp:331
const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
Definition: BasicBlock.cpp:296
bool isEHPad() const
Return true if this basic block is an exception handling block.
Definition: BasicBlock.h:675
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:239
Conditional or Unconditional Branch instruction.
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
Value * getCondition() const
Analysis pass which computes BranchProbabilityInfo.
BranchProbabilityInfo run(Function &F, FunctionAnalysisManager &AM)
Run the analysis pass over a function and produce BPI.
Legacy analysis pass which computes BranchProbabilityInfo.
void releaseMemory() override
releaseMemory() - This member can be implemented by a pass if it wants to be able to release its memo...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
bool runOnFunction(Function &F) override
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
void print(raw_ostream &OS, const Module *M=nullptr) const override
print - Print out the internal state of the pass.
void getSccEnterBlocks(int SccNum, SmallVectorImpl< BasicBlock * > &Enters) const
Fills in Enters vector with all such blocks that don't belong to SCC with SccNum ID but there is an e...
void getSccExitBlocks(int SccNum, SmallVectorImpl< BasicBlock * > &Exits) const
Fills in Exits vector with all such blocks that don't belong to SCC with SccNum ID but there is an ed...
int getSCCNum(const BasicBlock *BB) const
If BB belongs to some SCC then ID of that SCC is returned, otherwise -1 is returned.
Analysis providing branch probability information.
void eraseBlock(const BasicBlock *BB)
Forget analysis results for the given basic block.
void setEdgeProbability(const BasicBlock *Src, const SmallVectorImpl< BranchProbability > &Probs)
Set the raw probabilities for all edges from the given block.
bool invalidate(Function &, const PreservedAnalyses &PA, FunctionAnalysisManager::Invalidator &)
BranchProbability getEdgeProbability(const BasicBlock *Src, unsigned IndexInSuccessors) const
Get an edge's probability, relative to other out-edges of the Src.
void calculate(const Function &F, const LoopInfo &LI, const TargetLibraryInfo *TLI, DominatorTree *DT, PostDominatorTree *PDT)
bool isEdgeHot(const BasicBlock *Src, const BasicBlock *Dst) const
Test if an edge is hot relative to other out-edges of the Src.
void swapSuccEdgesProbabilities(const BasicBlock *Src)
Swap outgoing edges probabilities for Src with branch terminator.
void print(raw_ostream &OS) const
raw_ostream & printEdgeProbability(raw_ostream &OS, const BasicBlock *Src, const BasicBlock *Dst) const
Print an edge's probability.
void copyEdgeProbabilities(BasicBlock *Src, BasicBlock *Dst)
Copy outgoing edge probabilities from Src to Dst.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
static uint32_t getDenominator()
static BranchProbability getRaw(uint32_t N)
static BranchProbability getOne()
static BranchProbability getUnknown()
uint32_t getNumerator() const
static BranchProbability getZero()
Represents analyses that only rely on functions' control flow.
Definition: Analysis.h:72
This class represents a function call, abstracting a target machine's calling convention.
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:747
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:786
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:784
@ ICMP_EQ
equal
Definition: InstrTypes.h:778
@ ICMP_NE
not equal
Definition: InstrTypes.h:779
bool isTrueWhenEqual() const
This is just a convenience.
Definition: InstrTypes.h:1056
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:847
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition: Constants.h:218
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition: Constants.h:212
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:206
This is an important base class in LLVM.
Definition: Constant.h:42
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&... Args)
Definition: DenseMap.h:226
bool empty() const
Definition: DenseMap.h:98
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:151
iterator end()
Definition: DenseMap.h:84
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:211
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:317
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
This instruction compares its operands according to the predicate given to the constructor.
static bool isEquality(Predicate Pred)
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
This instruction compares its operands according to the predicate given to the constructor.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:566
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:593
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:39
Metadata node.
Definition: Metadata.h:1069
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Analysis pass which computes a PostDominatorTree.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
bool dominates(const Instruction *I1, const Instruction *I2) const
Return true if I1 dominates I2.
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
PreservedAnalysisChecker getChecker() const
Build a checker for this PreservedAnalyses and the specified analysis type.
Definition: Analysis.h:264
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:346
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:367
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:441
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:502
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:251
Value * getOperand(unsigned i) const
Definition: User.h:169
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
Enumerate the SCCs of a directed graph in reverse topological order of the SCC DAG.
Definition: SCCIterator.h:49
BlockType
Used as immediate MachineOperands for block signatures.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
NodeAddr< FuncNode * > Func
Definition: RDFGraph.h:393
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
pred_iterator pred_end(BasicBlock *BB)
Definition: CFG.h:114
auto successors(const MachineBasicBlock *BB)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
scc_iterator< T > scc_begin(const T &G)
Construct the begin iterator for a deduced graph type T.
Definition: SCCIterator.h:233
Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
iterator_range< po_iterator< T > > post_order(const T &G)
void initializeBranchProbabilityInfoWrapperPassPass(PassRegistry &)
constexpr T divideNearest(U Numerator, V Denominator)
Returns (Numerator / Denominator) rounded by round-half-up.
Definition: MathExtras.h:467
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
pred_iterator pred_begin(BasicBlock *BB)
Definition: CFG.h:110
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
MDNode * getValidBranchWeightMDNode(const Instruction &I)
Get the valid branch weights metadata node.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
Constant * ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL)
Attempt to constant fold a binary operation with the specified operands.
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
@ Mul
Product of integers.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1921
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1886
unsigned succ_size(const MachineBasicBlock *BB)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: Analysis.h:28