LLVM 19.0.0git
BranchProbabilityInfo.cpp
Go to the documentation of this file.
1//===- BranchProbabilityInfo.cpp - Branch Probability Analysis ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Loops should be simplified before this analysis.
10//
11//===----------------------------------------------------------------------===//
12
16#include "llvm/ADT/STLExtras.h"
22#include "llvm/IR/Attributes.h"
23#include "llvm/IR/BasicBlock.h"
24#include "llvm/IR/CFG.h"
25#include "llvm/IR/Constants.h"
26#include "llvm/IR/Dominators.h"
27#include "llvm/IR/Function.h"
28#include "llvm/IR/InstrTypes.h"
29#include "llvm/IR/Instruction.h"
31#include "llvm/IR/LLVMContext.h"
32#include "llvm/IR/Metadata.h"
33#include "llvm/IR/PassManager.h"
35#include "llvm/IR/Type.h"
36#include "llvm/IR/Value.h"
38#include "llvm/Pass.h"
42#include "llvm/Support/Debug.h"
44#include <cassert>
45#include <cstdint>
46#include <iterator>
47#include <map>
48#include <utility>
49
50using namespace llvm;
51
52#define DEBUG_TYPE "branch-prob"
53
55 "print-bpi", cl::init(false), cl::Hidden,
56 cl::desc("Print the branch probability info."));
57
59 "print-bpi-func-name", cl::Hidden,
60 cl::desc("The option to specify the name of the function "
61 "whose branch probability info is printed."));
62
64 "Branch Probability Analysis", false, true)
70 "Branch Probability Analysis", false, true)
71
73 : FunctionPass(ID) {
76}
77
79
80// Weights are for internal use only. They are used by heuristics to help to
81// estimate edges' probability. Example:
82//
83// Using "Loop Branch Heuristics" we predict weights of edges for the
84// block BB2.
85// ...
86// |
87// V
88// BB1<-+
89// | |
90// | | (Weight = 124)
91// V |
92// BB2--+
93// |
94// | (Weight = 4)
95// V
96// BB3
97//
98// Probability of the edge BB2->BB1 = 124 / (124 + 4) = 0.96875
99// Probability of the edge BB2->BB3 = 4 / (124 + 4) = 0.03125
100static const uint32_t LBH_TAKEN_WEIGHT = 124;
102
103/// Unreachable-terminating branch taken probability.
104///
105/// This is the probability for a branch being taken to a block that terminates
106/// (eventually) in unreachable. These are predicted as unlikely as possible.
107/// All reachable probability will proportionally share the remaining part.
109
110/// Heuristics and lookup tables for non-loop branches:
111/// Pointer Heuristics (PH)
112static const uint32_t PH_TAKEN_WEIGHT = 20;
113static const uint32_t PH_NONTAKEN_WEIGHT = 12;
114static const BranchProbability
116static const BranchProbability
118
120using ProbabilityTable = std::map<CmpInst::Predicate, ProbabilityList>;
121
122/// Pointer comparisons:
124 {ICmpInst::ICMP_NE, {PtrTakenProb, PtrUntakenProb}}, /// p != q -> Likely
125 {ICmpInst::ICMP_EQ, {PtrUntakenProb, PtrTakenProb}}, /// p == q -> Unlikely
126};
127
128/// Zero Heuristics (ZH)
129static const uint32_t ZH_TAKEN_WEIGHT = 20;
130static const uint32_t ZH_NONTAKEN_WEIGHT = 12;
131static const BranchProbability
133static const BranchProbability
135
136/// Integer compares with 0:
138 {CmpInst::ICMP_EQ, {ZeroUntakenProb, ZeroTakenProb}}, /// X == 0 -> Unlikely
139 {CmpInst::ICMP_NE, {ZeroTakenProb, ZeroUntakenProb}}, /// X != 0 -> Likely
140 {CmpInst::ICMP_SLT, {ZeroUntakenProb, ZeroTakenProb}}, /// X < 0 -> Unlikely
141 {CmpInst::ICMP_SGT, {ZeroTakenProb, ZeroUntakenProb}}, /// X > 0 -> Likely
142};
143
144/// Integer compares with -1:
146 {CmpInst::ICMP_EQ, {ZeroUntakenProb, ZeroTakenProb}}, /// X == -1 -> Unlikely
147 {CmpInst::ICMP_NE, {ZeroTakenProb, ZeroUntakenProb}}, /// X != -1 -> Likely
148 // InstCombine canonicalizes X >= 0 into X > -1
149 {CmpInst::ICMP_SGT, {ZeroTakenProb, ZeroUntakenProb}}, /// X >= 0 -> Likely
150};
151
152/// Integer compares with 1:
154 // InstCombine canonicalizes X <= 0 into X < 1
155 {CmpInst::ICMP_SLT, {ZeroUntakenProb, ZeroTakenProb}}, /// X <= 0 -> Unlikely
156};
157
158/// strcmp and similar functions return zero, negative, or positive, if the
159/// first string is equal, less, or greater than the second. We consider it
160/// likely that the strings are not equal, so a comparison with zero is
161/// probably false, but also a comparison with any other number is also
162/// probably false given that what exactly is returned for nonzero values is
163/// not specified. Any kind of comparison other than equality we know
164/// nothing about.
168};
169
170// Floating-Point Heuristics (FPH)
171static const uint32_t FPH_TAKEN_WEIGHT = 20;
173
174/// This is the probability for an ordered floating point comparison.
175static const uint32_t FPH_ORD_WEIGHT = 1024 * 1024 - 1;
176/// This is the probability for an unordered floating point comparison, it means
177/// one or two of the operands are NaN. Usually it is used to test for an
178/// exceptional case, so the result is unlikely.
179static const uint32_t FPH_UNO_WEIGHT = 1;
180
183static const BranchProbability
185static const BranchProbability
187static const BranchProbability
189
190/// Floating-Point compares:
192 {FCmpInst::FCMP_ORD, {FPOrdTakenProb, FPOrdUntakenProb}}, /// !isnan -> Likely
193 {FCmpInst::FCMP_UNO, {FPOrdUntakenProb, FPOrdTakenProb}}, /// isnan -> Unlikely
194};
195
196/// Set of dedicated "absolute" execution weights for a block. These weights are
197/// meaningful relative to each other and their derivatives only.
198enum class BlockExecWeight : std::uint32_t {
199 /// Special weight used for cases with exact zero probability.
200 ZERO = 0x0,
201 /// Minimal possible non zero weight.
202 LOWEST_NON_ZERO = 0x1,
203 /// Weight to an 'unreachable' block.
205 /// Weight to a block containing non returning call.
207 /// Weight to 'unwind' block of an invoke instruction.
209 /// Weight to a 'cold' block. Cold blocks are the ones containing calls marked
210 /// with attribute 'cold'.
211 COLD = 0xffff,
212 /// Default weight is used in cases when there is no dedicated execution
213 /// weight set. It is not propagated through the domination line either.
214 DEFAULT = 0xfffff
215};
216
218 // Record SCC numbers of blocks in the CFG to identify irreducible loops.
219 // FIXME: We could only calculate this if the CFG is known to be irreducible
220 // (perhaps cache this info in LoopInfo if we can easily calculate it there?).
221 int SccNum = 0;
222 for (scc_iterator<const Function *> It = scc_begin(&F); !It.isAtEnd();
223 ++It, ++SccNum) {
224 // Ignore single-block SCCs since they either aren't loops or LoopInfo will
225 // catch them.
226 const std::vector<const BasicBlock *> &Scc = *It;
227 if (Scc.size() == 1)
228 continue;
229
230 LLVM_DEBUG(dbgs() << "BPI: SCC " << SccNum << ":");
231 for (const auto *BB : Scc) {
232 LLVM_DEBUG(dbgs() << " " << BB->getName());
233 SccNums[BB] = SccNum;
234 calculateSccBlockType(BB, SccNum);
235 }
236 LLVM_DEBUG(dbgs() << "\n");
237 }
238}
239
241 auto SccIt = SccNums.find(BB);
242 if (SccIt == SccNums.end())
243 return -1;
244 return SccIt->second;
245}
246
248 int SccNum, SmallVectorImpl<BasicBlock *> &Enters) const {
249
250 for (auto MapIt : SccBlocks[SccNum]) {
251 const auto *BB = MapIt.first;
252 if (isSCCHeader(BB, SccNum))
253 for (const auto *Pred : predecessors(BB))
254 if (getSCCNum(Pred) != SccNum)
255 Enters.push_back(const_cast<BasicBlock *>(BB));
256 }
257}
258
260 int SccNum, SmallVectorImpl<BasicBlock *> &Exits) const {
261 for (auto MapIt : SccBlocks[SccNum]) {
262 const auto *BB = MapIt.first;
263 if (isSCCExitingBlock(BB, SccNum))
264 for (const auto *Succ : successors(BB))
265 if (getSCCNum(Succ) != SccNum)
266 Exits.push_back(const_cast<BasicBlock *>(Succ));
267 }
268}
269
270uint32_t BranchProbabilityInfo::SccInfo::getSccBlockType(const BasicBlock *BB,
271 int SccNum) const {
272 assert(getSCCNum(BB) == SccNum);
273
274 assert(SccBlocks.size() > static_cast<unsigned>(SccNum) && "Unknown SCC");
275 const auto &SccBlockTypes = SccBlocks[SccNum];
276
277 auto It = SccBlockTypes.find(BB);
278 if (It != SccBlockTypes.end()) {
279 return It->second;
280 }
281 return Inner;
282}
283
284void BranchProbabilityInfo::SccInfo::calculateSccBlockType(const BasicBlock *BB,
285 int SccNum) {
286 assert(getSCCNum(BB) == SccNum);
287 uint32_t BlockType = Inner;
288
289 if (llvm::any_of(predecessors(BB), [&](const BasicBlock *Pred) {
290 // Consider any block that is an entry point to the SCC as
291 // a header.
292 return getSCCNum(Pred) != SccNum;
293 }))
294 BlockType |= Header;
295
296 if (llvm::any_of(successors(BB), [&](const BasicBlock *Succ) {
297 return getSCCNum(Succ) != SccNum;
298 }))
299 BlockType |= Exiting;
300
301 // Lazily compute the set of headers for a given SCC and cache the results
302 // in the SccHeaderMap.
303 if (SccBlocks.size() <= static_cast<unsigned>(SccNum))
304 SccBlocks.resize(SccNum + 1);
305 auto &SccBlockTypes = SccBlocks[SccNum];
306
307 if (BlockType != Inner) {
308 bool IsInserted;
309 std::tie(std::ignore, IsInserted) =
310 SccBlockTypes.insert(std::make_pair(BB, BlockType));
311 assert(IsInserted && "Duplicated block in SCC");
312 }
313}
314
315BranchProbabilityInfo::LoopBlock::LoopBlock(const BasicBlock *BB,
316 const LoopInfo &LI,
317 const SccInfo &SccI)
318 : BB(BB) {
319 LD.first = LI.getLoopFor(BB);
320 if (!LD.first) {
321 LD.second = SccI.getSCCNum(BB);
322 }
323}
324
325bool BranchProbabilityInfo::isLoopEnteringEdge(const LoopEdge &Edge) const {
326 const auto &SrcBlock = Edge.first;
327 const auto &DstBlock = Edge.second;
328 return (DstBlock.getLoop() &&
329 !DstBlock.getLoop()->contains(SrcBlock.getLoop())) ||
330 // Assume that SCCs can't be nested.
331 (DstBlock.getSccNum() != -1 &&
332 SrcBlock.getSccNum() != DstBlock.getSccNum());
333}
334
335bool BranchProbabilityInfo::isLoopExitingEdge(const LoopEdge &Edge) const {
336 return isLoopEnteringEdge({Edge.second, Edge.first});
337}
338
339bool BranchProbabilityInfo::isLoopEnteringExitingEdge(
340 const LoopEdge &Edge) const {
341 return isLoopEnteringEdge(Edge) || isLoopExitingEdge(Edge);
342}
343
344bool BranchProbabilityInfo::isLoopBackEdge(const LoopEdge &Edge) const {
345 const auto &SrcBlock = Edge.first;
346 const auto &DstBlock = Edge.second;
347 return SrcBlock.belongsToSameLoop(DstBlock) &&
348 ((DstBlock.getLoop() &&
349 DstBlock.getLoop()->getHeader() == DstBlock.getBlock()) ||
350 (DstBlock.getSccNum() != -1 &&
351 SccI->isSCCHeader(DstBlock.getBlock(), DstBlock.getSccNum())));
352}
353
354void BranchProbabilityInfo::getLoopEnterBlocks(
355 const LoopBlock &LB, SmallVectorImpl<BasicBlock *> &Enters) const {
356 if (LB.getLoop()) {
357 auto *Header = LB.getLoop()->getHeader();
358 Enters.append(pred_begin(Header), pred_end(Header));
359 } else {
360 assert(LB.getSccNum() != -1 && "LB doesn't belong to any loop?");
361 SccI->getSccEnterBlocks(LB.getSccNum(), Enters);
362 }
363}
364
365void BranchProbabilityInfo::getLoopExitBlocks(
366 const LoopBlock &LB, SmallVectorImpl<BasicBlock *> &Exits) const {
367 if (LB.getLoop()) {
368 LB.getLoop()->getExitBlocks(Exits);
369 } else {
370 assert(LB.getSccNum() != -1 && "LB doesn't belong to any loop?");
371 SccI->getSccExitBlocks(LB.getSccNum(), Exits);
372 }
373}
374
375// Propagate existing explicit probabilities from either profile data or
376// 'expect' intrinsic processing. Examine metadata against unreachable
377// heuristic. The probability of the edge coming to unreachable block is
378// set to min of metadata and unreachable heuristic.
379bool BranchProbabilityInfo::calcMetadataWeights(const BasicBlock *BB) {
380 const Instruction *TI = BB->getTerminator();
381 assert(TI->getNumSuccessors() > 1 && "expected more than one successor!");
382 if (!(isa<BranchInst>(TI) || isa<SwitchInst>(TI) || isa<IndirectBrInst>(TI) ||
383 isa<InvokeInst>(TI) || isa<CallBrInst>(TI)))
384 return false;
385
386 MDNode *WeightsNode = getValidBranchWeightMDNode(*TI);
387 if (!WeightsNode)
388 return false;
389
390 // Check that the number of successors is manageable.
391 assert(TI->getNumSuccessors() < UINT32_MAX && "Too many successors");
392
393 // Build up the final weights that will be used in a temporary buffer.
394 // Compute the sum of all weights to later decide whether they need to
395 // be scaled to fit in 32 bits.
396 uint64_t WeightSum = 0;
398 SmallVector<unsigned, 2> UnreachableIdxs;
399 SmallVector<unsigned, 2> ReachableIdxs;
400
401 extractBranchWeights(WeightsNode, Weights);
402 for (unsigned I = 0, E = Weights.size(); I != E; ++I) {
403 WeightSum += Weights[I];
404 const LoopBlock SrcLoopBB = getLoopBlock(BB);
405 const LoopBlock DstLoopBB = getLoopBlock(TI->getSuccessor(I));
406 auto EstimatedWeight = getEstimatedEdgeWeight({SrcLoopBB, DstLoopBB});
407 if (EstimatedWeight &&
408 *EstimatedWeight <= static_cast<uint32_t>(BlockExecWeight::UNREACHABLE))
409 UnreachableIdxs.push_back(I);
410 else
411 ReachableIdxs.push_back(I);
412 }
413 assert(Weights.size() == TI->getNumSuccessors() && "Checked above");
414
415 // If the sum of weights does not fit in 32 bits, scale every weight down
416 // accordingly.
417 uint64_t ScalingFactor =
418 (WeightSum > UINT32_MAX) ? WeightSum / UINT32_MAX + 1 : 1;
419
420 if (ScalingFactor > 1) {
421 WeightSum = 0;
422 for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) {
423 Weights[I] /= ScalingFactor;
424 WeightSum += Weights[I];
425 }
426 }
427 assert(WeightSum <= UINT32_MAX &&
428 "Expected weights to scale down to 32 bits");
429
430 if (WeightSum == 0 || ReachableIdxs.size() == 0) {
431 for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I)
432 Weights[I] = 1;
433 WeightSum = TI->getNumSuccessors();
434 }
435
436 // Set the probability.
438 for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I)
439 BP.push_back({ Weights[I], static_cast<uint32_t>(WeightSum) });
440
441 // Examine the metadata against unreachable heuristic.
442 // If the unreachable heuristic is more strong then we use it for this edge.
443 if (UnreachableIdxs.size() == 0 || ReachableIdxs.size() == 0) {
444 setEdgeProbability(BB, BP);
445 return true;
446 }
447
448 auto UnreachableProb = UR_TAKEN_PROB;
449 for (auto I : UnreachableIdxs)
450 if (UnreachableProb < BP[I]) {
451 BP[I] = UnreachableProb;
452 }
453
454 // Sum of all edge probabilities must be 1.0. If we modified the probability
455 // of some edges then we must distribute the introduced difference over the
456 // reachable blocks.
457 //
458 // Proportional distribution: the relation between probabilities of the
459 // reachable edges is kept unchanged. That is for any reachable edges i and j:
460 // newBP[i] / newBP[j] == oldBP[i] / oldBP[j] =>
461 // newBP[i] / oldBP[i] == newBP[j] / oldBP[j] == K
462 // Where K is independent of i,j.
463 // newBP[i] == oldBP[i] * K
464 // We need to find K.
465 // Make sum of all reachables of the left and right parts:
466 // sum_of_reachable(newBP) == K * sum_of_reachable(oldBP)
467 // Sum of newBP must be equal to 1.0:
468 // sum_of_reachable(newBP) + sum_of_unreachable(newBP) == 1.0 =>
469 // sum_of_reachable(newBP) = 1.0 - sum_of_unreachable(newBP)
470 // Where sum_of_unreachable(newBP) is what has been just changed.
471 // Finally:
472 // K == sum_of_reachable(newBP) / sum_of_reachable(oldBP) =>
473 // K == (1.0 - sum_of_unreachable(newBP)) / sum_of_reachable(oldBP)
474 BranchProbability NewUnreachableSum = BranchProbability::getZero();
475 for (auto I : UnreachableIdxs)
476 NewUnreachableSum += BP[I];
477
478 BranchProbability NewReachableSum =
479 BranchProbability::getOne() - NewUnreachableSum;
480
482 for (auto I : ReachableIdxs)
483 OldReachableSum += BP[I];
484
485 if (OldReachableSum != NewReachableSum) { // Anything to dsitribute?
486 if (OldReachableSum.isZero()) {
487 // If all oldBP[i] are zeroes then the proportional distribution results
488 // in all zero probabilities and the error stays big. In this case we
489 // evenly spread NewReachableSum over the reachable edges.
490 BranchProbability PerEdge = NewReachableSum / ReachableIdxs.size();
491 for (auto I : ReachableIdxs)
492 BP[I] = PerEdge;
493 } else {
494 for (auto I : ReachableIdxs) {
495 // We use uint64_t to avoid double rounding error of the following
496 // calculation: BP[i] = BP[i] * NewReachableSum / OldReachableSum
497 // The formula is taken from the private constructor
498 // BranchProbability(uint32_t Numerator, uint32_t Denominator)
499 uint64_t Mul = static_cast<uint64_t>(NewReachableSum.getNumerator()) *
500 BP[I].getNumerator();
501 uint32_t Div = static_cast<uint32_t>(
502 divideNearest(Mul, OldReachableSum.getNumerator()));
503 BP[I] = BranchProbability::getRaw(Div);
504 }
505 }
506 }
507
508 setEdgeProbability(BB, BP);
509
510 return true;
511}
512
513// Calculate Edge Weights using "Pointer Heuristics". Predict a comparison
514// between two pointer or pointer and NULL will fail.
515bool BranchProbabilityInfo::calcPointerHeuristics(const BasicBlock *BB) {
516 const BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
517 if (!BI || !BI->isConditional())
518 return false;
519
520 Value *Cond = BI->getCondition();
521 ICmpInst *CI = dyn_cast<ICmpInst>(Cond);
522 if (!CI || !CI->isEquality())
523 return false;
524
525 Value *LHS = CI->getOperand(0);
526
527 if (!LHS->getType()->isPointerTy())
528 return false;
529
530 assert(CI->getOperand(1)->getType()->isPointerTy());
531
532 auto Search = PointerTable.find(CI->getPredicate());
533 if (Search == PointerTable.end())
534 return false;
535 setEdgeProbability(BB, Search->second);
536 return true;
537}
538
539// Compute the unlikely successors to the block BB in the loop L, specifically
540// those that are unlikely because this is a loop, and add them to the
541// UnlikelyBlocks set.
542static void
544 SmallPtrSetImpl<const BasicBlock*> &UnlikelyBlocks) {
545 // Sometimes in a loop we have a branch whose condition is made false by
546 // taking it. This is typically something like
547 // int n = 0;
548 // while (...) {
549 // if (++n >= MAX) {
550 // n = 0;
551 // }
552 // }
553 // In this sort of situation taking the branch means that at the very least it
554 // won't be taken again in the next iteration of the loop, so we should
555 // consider it less likely than a typical branch.
556 //
557 // We detect this by looking back through the graph of PHI nodes that sets the
558 // value that the condition depends on, and seeing if we can reach a successor
559 // block which can be determined to make the condition false.
560 //
561 // FIXME: We currently consider unlikely blocks to be half as likely as other
562 // blocks, but if we consider the example above the likelyhood is actually
563 // 1/MAX. We could therefore be more precise in how unlikely we consider
564 // blocks to be, but it would require more careful examination of the form
565 // of the comparison expression.
566 const BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
567 if (!BI || !BI->isConditional())
568 return;
569
570 // Check if the branch is based on an instruction compared with a constant
571 CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition());
572 if (!CI || !isa<Instruction>(CI->getOperand(0)) ||
573 !isa<Constant>(CI->getOperand(1)))
574 return;
575
576 // Either the instruction must be a PHI, or a chain of operations involving
577 // constants that ends in a PHI which we can then collapse into a single value
578 // if the PHI value is known.
579 Instruction *CmpLHS = dyn_cast<Instruction>(CI->getOperand(0));
580 PHINode *CmpPHI = dyn_cast<PHINode>(CmpLHS);
581 Constant *CmpConst = dyn_cast<Constant>(CI->getOperand(1));
582 // Collect the instructions until we hit a PHI
584 while (!CmpPHI && CmpLHS && isa<BinaryOperator>(CmpLHS) &&
585 isa<Constant>(CmpLHS->getOperand(1))) {
586 // Stop if the chain extends outside of the loop
587 if (!L->contains(CmpLHS))
588 return;
589 InstChain.push_back(cast<BinaryOperator>(CmpLHS));
590 CmpLHS = dyn_cast<Instruction>(CmpLHS->getOperand(0));
591 if (CmpLHS)
592 CmpPHI = dyn_cast<PHINode>(CmpLHS);
593 }
594 if (!CmpPHI || !L->contains(CmpPHI))
595 return;
596
597 // Trace the phi node to find all values that come from successors of BB
598 SmallPtrSet<PHINode*, 8> VisitedInsts;
600 WorkList.push_back(CmpPHI);
601 VisitedInsts.insert(CmpPHI);
602 while (!WorkList.empty()) {
603 PHINode *P = WorkList.pop_back_val();
604 for (BasicBlock *B : P->blocks()) {
605 // Skip blocks that aren't part of the loop
606 if (!L->contains(B))
607 continue;
608 Value *V = P->getIncomingValueForBlock(B);
609 // If the source is a PHI add it to the work list if we haven't
610 // already visited it.
611 if (PHINode *PN = dyn_cast<PHINode>(V)) {
612 if (VisitedInsts.insert(PN).second)
613 WorkList.push_back(PN);
614 continue;
615 }
616 // If this incoming value is a constant and B is a successor of BB, then
617 // we can constant-evaluate the compare to see if it makes the branch be
618 // taken or not.
619 Constant *CmpLHSConst = dyn_cast<Constant>(V);
620 if (!CmpLHSConst || !llvm::is_contained(successors(BB), B))
621 continue;
622 // First collapse InstChain
623 const DataLayout &DL = BB->getModule()->getDataLayout();
624 for (Instruction *I : llvm::reverse(InstChain)) {
625 CmpLHSConst = ConstantFoldBinaryOpOperands(
626 I->getOpcode(), CmpLHSConst, cast<Constant>(I->getOperand(1)), DL);
627 if (!CmpLHSConst)
628 break;
629 }
630 if (!CmpLHSConst)
631 continue;
632 // Now constant-evaluate the compare
634 CI->getPredicate(), CmpLHSConst, CmpConst, DL);
635 // If the result means we don't branch to the block then that block is
636 // unlikely.
637 if (Result &&
638 ((Result->isZeroValue() && B == BI->getSuccessor(0)) ||
639 (Result->isOneValue() && B == BI->getSuccessor(1))))
640 UnlikelyBlocks.insert(B);
641 }
642 }
643}
644
645std::optional<uint32_t>
646BranchProbabilityInfo::getEstimatedBlockWeight(const BasicBlock *BB) const {
647 auto WeightIt = EstimatedBlockWeight.find(BB);
648 if (WeightIt == EstimatedBlockWeight.end())
649 return std::nullopt;
650 return WeightIt->second;
651}
652
653std::optional<uint32_t>
654BranchProbabilityInfo::getEstimatedLoopWeight(const LoopData &L) const {
655 auto WeightIt = EstimatedLoopWeight.find(L);
656 if (WeightIt == EstimatedLoopWeight.end())
657 return std::nullopt;
658 return WeightIt->second;
659}
660
661std::optional<uint32_t>
662BranchProbabilityInfo::getEstimatedEdgeWeight(const LoopEdge &Edge) const {
663 // For edges entering a loop take weight of a loop rather than an individual
664 // block in the loop.
665 return isLoopEnteringEdge(Edge)
666 ? getEstimatedLoopWeight(Edge.second.getLoopData())
667 : getEstimatedBlockWeight(Edge.second.getBlock());
668}
669
670template <class IterT>
671std::optional<uint32_t> BranchProbabilityInfo::getMaxEstimatedEdgeWeight(
672 const LoopBlock &SrcLoopBB, iterator_range<IterT> Successors) const {
674 std::optional<uint32_t> MaxWeight;
675 for (const BasicBlock *DstBB : Successors) {
676 const LoopBlock DstLoopBB = getLoopBlock(DstBB);
677 auto Weight = getEstimatedEdgeWeight({SrcLoopBB, DstLoopBB});
678
679 if (!Weight)
680 return std::nullopt;
681
682 if (!MaxWeight || *MaxWeight < *Weight)
683 MaxWeight = Weight;
684 }
685
686 return MaxWeight;
687}
688
689// Updates \p LoopBB's weight and returns true. If \p LoopBB has already
690// an associated weight it is unchanged and false is returned.
691//
692// Please note by the algorithm the weight is not expected to change once set
693// thus 'false' status is used to track visited blocks.
694bool BranchProbabilityInfo::updateEstimatedBlockWeight(
695 LoopBlock &LoopBB, uint32_t BBWeight,
696 SmallVectorImpl<BasicBlock *> &BlockWorkList,
697 SmallVectorImpl<LoopBlock> &LoopWorkList) {
698 BasicBlock *BB = LoopBB.getBlock();
699
700 // In general, weight is assigned to a block when it has final value and
701 // can't/shouldn't be changed. However, there are cases when a block
702 // inherently has several (possibly "contradicting") weights. For example,
703 // "unwind" block may also contain "cold" call. In that case the first
704 // set weight is favored and all consequent weights are ignored.
705 if (!EstimatedBlockWeight.insert({BB, BBWeight}).second)
706 return false;
707
708 for (BasicBlock *PredBlock : predecessors(BB)) {
709 LoopBlock PredLoop = getLoopBlock(PredBlock);
710 // Add affected block/loop to a working list.
711 if (isLoopExitingEdge({PredLoop, LoopBB})) {
712 if (!EstimatedLoopWeight.count(PredLoop.getLoopData()))
713 LoopWorkList.push_back(PredLoop);
714 } else if (!EstimatedBlockWeight.count(PredBlock))
715 BlockWorkList.push_back(PredBlock);
716 }
717 return true;
718}
719
720// Starting from \p BB traverse through dominator blocks and assign \p BBWeight
721// to all such blocks that are post dominated by \BB. In other words to all
722// blocks that the one is executed if and only if another one is executed.
723// Importantly, we skip loops here for two reasons. First weights of blocks in
724// a loop should be scaled by trip count (yet possibly unknown). Second there is
725// no any value in doing that because that doesn't give any additional
726// information regarding distribution of probabilities inside the loop.
727// Exception is loop 'enter' and 'exit' edges that are handled in a special way
728// at calcEstimatedHeuristics.
729//
730// In addition, \p WorkList is populated with basic blocks if at leas one
731// successor has updated estimated weight.
732void BranchProbabilityInfo::propagateEstimatedBlockWeight(
733 const LoopBlock &LoopBB, DominatorTree *DT, PostDominatorTree *PDT,
734 uint32_t BBWeight, SmallVectorImpl<BasicBlock *> &BlockWorkList,
735 SmallVectorImpl<LoopBlock> &LoopWorkList) {
736 const BasicBlock *BB = LoopBB.getBlock();
737 const auto *DTStartNode = DT->getNode(BB);
738 const auto *PDTStartNode = PDT->getNode(BB);
739
740 // TODO: Consider propagating weight down the domination line as well.
741 for (const auto *DTNode = DTStartNode; DTNode != nullptr;
742 DTNode = DTNode->getIDom()) {
743 auto *DomBB = DTNode->getBlock();
744 // Consider blocks which lie on one 'line'.
745 if (!PDT->dominates(PDTStartNode, PDT->getNode(DomBB)))
746 // If BB doesn't post dominate DomBB it will not post dominate dominators
747 // of DomBB as well.
748 break;
749
750 LoopBlock DomLoopBB = getLoopBlock(DomBB);
751 const LoopEdge Edge{DomLoopBB, LoopBB};
752 // Don't propagate weight to blocks belonging to different loops.
753 if (!isLoopEnteringExitingEdge(Edge)) {
754 if (!updateEstimatedBlockWeight(DomLoopBB, BBWeight, BlockWorkList,
755 LoopWorkList))
756 // If DomBB has weight set then all it's predecessors are already
757 // processed (since we propagate weight up to the top of IR each time).
758 break;
759 } else if (isLoopExitingEdge(Edge)) {
760 LoopWorkList.push_back(DomLoopBB);
761 }
762 }
763}
764
765std::optional<uint32_t>
766BranchProbabilityInfo::getInitialEstimatedBlockWeight(const BasicBlock *BB) {
767 // Returns true if \p BB has call marked with "NoReturn" attribute.
768 auto hasNoReturn = [&](const BasicBlock *BB) {
769 for (const auto &I : reverse(*BB))
770 if (const CallInst *CI = dyn_cast<CallInst>(&I))
771 if (CI->hasFnAttr(Attribute::NoReturn))
772 return true;
773
774 return false;
775 };
776
777 // Important note regarding the order of checks. They are ordered by weight
778 // from lowest to highest. Doing that allows to avoid "unstable" results
779 // when several conditions heuristics can be applied simultaneously.
780 if (isa<UnreachableInst>(BB->getTerminator()) ||
781 // If this block is terminated by a call to
782 // @llvm.experimental.deoptimize then treat it like an unreachable
783 // since it is expected to practically never execute.
784 // TODO: Should we actually treat as never returning call?
786 return hasNoReturn(BB)
787 ? static_cast<uint32_t>(BlockExecWeight::NORETURN)
788 : static_cast<uint32_t>(BlockExecWeight::UNREACHABLE);
789
790 // Check if the block is 'unwind' handler of some invoke instruction.
791 for (const auto *Pred : predecessors(BB))
792 if (Pred)
793 if (const auto *II = dyn_cast<InvokeInst>(Pred->getTerminator()))
794 if (II->getUnwindDest() == BB)
795 return static_cast<uint32_t>(BlockExecWeight::UNWIND);
796
797 // Check if the block contains 'cold' call.
798 for (const auto &I : *BB)
799 if (const CallInst *CI = dyn_cast<CallInst>(&I))
800 if (CI->hasFnAttr(Attribute::Cold))
801 return static_cast<uint32_t>(BlockExecWeight::COLD);
802
803 return std::nullopt;
804}
805
806// Does RPO traversal over all blocks in \p F and assigns weights to
807// 'unreachable', 'noreturn', 'cold', 'unwind' blocks. In addition it does its
808// best to propagate the weight to up/down the IR.
809void BranchProbabilityInfo::computeEestimateBlockWeight(
810 const Function &F, DominatorTree *DT, PostDominatorTree *PDT) {
811 SmallVector<BasicBlock *, 8> BlockWorkList;
812 SmallVector<LoopBlock, 8> LoopWorkList;
814
815 // By doing RPO we make sure that all predecessors already have weights
816 // calculated before visiting theirs successors.
818 for (const auto *BB : RPOT)
819 if (auto BBWeight = getInitialEstimatedBlockWeight(BB))
820 // If we were able to find estimated weight for the block set it to this
821 // block and propagate up the IR.
822 propagateEstimatedBlockWeight(getLoopBlock(BB), DT, PDT, *BBWeight,
823 BlockWorkList, LoopWorkList);
824
825 // BlockWorklist/LoopWorkList contains blocks/loops with at least one
826 // successor/exit having estimated weight. Try to propagate weight to such
827 // blocks/loops from successors/exits.
828 // Process loops and blocks. Order is not important.
829 do {
830 while (!LoopWorkList.empty()) {
831 const LoopBlock LoopBB = LoopWorkList.pop_back_val();
832 const LoopData LD = LoopBB.getLoopData();
833 if (EstimatedLoopWeight.count(LD))
834 continue;
835
836 auto Res = LoopExitBlocks.try_emplace(LD);
837 SmallVectorImpl<BasicBlock *> &Exits = Res.first->second;
838 if (Res.second)
839 getLoopExitBlocks(LoopBB, Exits);
840 auto LoopWeight = getMaxEstimatedEdgeWeight(
841 LoopBB, make_range(Exits.begin(), Exits.end()));
842
843 if (LoopWeight) {
844 // If we never exit the loop then we can enter it once at maximum.
845 if (LoopWeight <= static_cast<uint32_t>(BlockExecWeight::UNREACHABLE))
846 LoopWeight = static_cast<uint32_t>(BlockExecWeight::LOWEST_NON_ZERO);
847
848 EstimatedLoopWeight.insert({LD, *LoopWeight});
849 // Add all blocks entering the loop into working list.
850 getLoopEnterBlocks(LoopBB, BlockWorkList);
851 }
852 }
853
854 while (!BlockWorkList.empty()) {
855 // We can reach here only if BlockWorkList is not empty.
856 const BasicBlock *BB = BlockWorkList.pop_back_val();
857 if (EstimatedBlockWeight.count(BB))
858 continue;
859
860 // We take maximum over all weights of successors. In other words we take
861 // weight of "hot" path. In theory we can probably find a better function
862 // which gives higher accuracy results (comparing to "maximum") but I
863 // can't
864 // think of any right now. And I doubt it will make any difference in
865 // practice.
866 const LoopBlock LoopBB = getLoopBlock(BB);
867 auto MaxWeight = getMaxEstimatedEdgeWeight(LoopBB, successors(BB));
868
869 if (MaxWeight)
870 propagateEstimatedBlockWeight(LoopBB, DT, PDT, *MaxWeight,
871 BlockWorkList, LoopWorkList);
872 }
873 } while (!BlockWorkList.empty() || !LoopWorkList.empty());
874}
875
876// Calculate edge probabilities based on block's estimated weight.
877// Note that gathered weights were not scaled for loops. Thus edges entering
878// and exiting loops requires special processing.
879bool BranchProbabilityInfo::calcEstimatedHeuristics(const BasicBlock *BB) {
881 "expected more than one successor!");
882
883 const LoopBlock LoopBB = getLoopBlock(BB);
884
887 if (LoopBB.getLoop())
888 computeUnlikelySuccessors(BB, LoopBB.getLoop(), UnlikelyBlocks);
889
890 // Changed to 'true' if at least one successor has estimated weight.
891 bool FoundEstimatedWeight = false;
892 SmallVector<uint32_t, 4> SuccWeights;
893 uint64_t TotalWeight = 0;
894 // Go over all successors of BB and put their weights into SuccWeights.
895 for (const BasicBlock *SuccBB : successors(BB)) {
896 std::optional<uint32_t> Weight;
897 const LoopBlock SuccLoopBB = getLoopBlock(SuccBB);
898 const LoopEdge Edge{LoopBB, SuccLoopBB};
899
900 Weight = getEstimatedEdgeWeight(Edge);
901
902 if (isLoopExitingEdge(Edge) &&
903 // Avoid adjustment of ZERO weight since it should remain unchanged.
904 Weight != static_cast<uint32_t>(BlockExecWeight::ZERO)) {
905 // Scale down loop exiting weight by trip count.
906 Weight = std::max(
907 static_cast<uint32_t>(BlockExecWeight::LOWEST_NON_ZERO),
908 Weight.value_or(static_cast<uint32_t>(BlockExecWeight::DEFAULT)) /
909 TC);
910 }
911 bool IsUnlikelyEdge = LoopBB.getLoop() && UnlikelyBlocks.contains(SuccBB);
912 if (IsUnlikelyEdge &&
913 // Avoid adjustment of ZERO weight since it should remain unchanged.
914 Weight != static_cast<uint32_t>(BlockExecWeight::ZERO)) {
915 // 'Unlikely' blocks have twice lower weight.
916 Weight = std::max(
917 static_cast<uint32_t>(BlockExecWeight::LOWEST_NON_ZERO),
918 Weight.value_or(static_cast<uint32_t>(BlockExecWeight::DEFAULT)) / 2);
919 }
920
921 if (Weight)
922 FoundEstimatedWeight = true;
923
924 auto WeightVal =
925 Weight.value_or(static_cast<uint32_t>(BlockExecWeight::DEFAULT));
926 TotalWeight += WeightVal;
927 SuccWeights.push_back(WeightVal);
928 }
929
930 // If non of blocks have estimated weight bail out.
931 // If TotalWeight is 0 that means weight of each successor is 0 as well and
932 // equally likely. Bail out early to not deal with devision by zero.
933 if (!FoundEstimatedWeight || TotalWeight == 0)
934 return false;
935
936 assert(SuccWeights.size() == succ_size(BB) && "Missed successor?");
937 const unsigned SuccCount = SuccWeights.size();
938
939 // If the sum of weights does not fit in 32 bits, scale every weight down
940 // accordingly.
941 if (TotalWeight > UINT32_MAX) {
942 uint64_t ScalingFactor = TotalWeight / UINT32_MAX + 1;
943 TotalWeight = 0;
944 for (unsigned Idx = 0; Idx < SuccCount; ++Idx) {
945 SuccWeights[Idx] /= ScalingFactor;
946 if (SuccWeights[Idx] == static_cast<uint32_t>(BlockExecWeight::ZERO))
947 SuccWeights[Idx] =
948 static_cast<uint32_t>(BlockExecWeight::LOWEST_NON_ZERO);
949 TotalWeight += SuccWeights[Idx];
950 }
951 assert(TotalWeight <= UINT32_MAX && "Total weight overflows");
952 }
953
954 // Finally set probabilities to edges according to estimated block weights.
955 SmallVector<BranchProbability, 4> EdgeProbabilities(
956 SuccCount, BranchProbability::getUnknown());
957
958 for (unsigned Idx = 0; Idx < SuccCount; ++Idx) {
959 EdgeProbabilities[Idx] =
960 BranchProbability(SuccWeights[Idx], (uint32_t)TotalWeight);
961 }
962 setEdgeProbability(BB, EdgeProbabilities);
963 return true;
964}
965
966bool BranchProbabilityInfo::calcZeroHeuristics(const BasicBlock *BB,
967 const TargetLibraryInfo *TLI) {
968 const BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
969 if (!BI || !BI->isConditional())
970 return false;
971
972 Value *Cond = BI->getCondition();
973 ICmpInst *CI = dyn_cast<ICmpInst>(Cond);
974 if (!CI)
975 return false;
976
977 auto GetConstantInt = [](Value *V) {
978 if (auto *I = dyn_cast<BitCastInst>(V))
979 return dyn_cast<ConstantInt>(I->getOperand(0));
980 return dyn_cast<ConstantInt>(V);
981 };
982
983 Value *RHS = CI->getOperand(1);
984 ConstantInt *CV = GetConstantInt(RHS);
985 if (!CV)
986 return false;
987
988 // If the LHS is the result of AND'ing a value with a single bit bitmask,
989 // we don't have information about probabilities.
990 if (Instruction *LHS = dyn_cast<Instruction>(CI->getOperand(0)))
991 if (LHS->getOpcode() == Instruction::And)
992 if (ConstantInt *AndRHS = GetConstantInt(LHS->getOperand(1)))
993 if (AndRHS->getValue().isPowerOf2())
994 return false;
995
996 // Check if the LHS is the return value of a library function
998 if (TLI)
999 if (CallInst *Call = dyn_cast<CallInst>(CI->getOperand(0)))
1000 if (Function *CalledFn = Call->getCalledFunction())
1001 TLI->getLibFunc(*CalledFn, Func);
1002
1003 ProbabilityTable::const_iterator Search;
1004 if (Func == LibFunc_strcasecmp ||
1005 Func == LibFunc_strcmp ||
1006 Func == LibFunc_strncasecmp ||
1007 Func == LibFunc_strncmp ||
1008 Func == LibFunc_memcmp ||
1009 Func == LibFunc_bcmp) {
1010 Search = ICmpWithLibCallTable.find(CI->getPredicate());
1011 if (Search == ICmpWithLibCallTable.end())
1012 return false;
1013 } else if (CV->isZero()) {
1014 Search = ICmpWithZeroTable.find(CI->getPredicate());
1015 if (Search == ICmpWithZeroTable.end())
1016 return false;
1017 } else if (CV->isOne()) {
1018 Search = ICmpWithOneTable.find(CI->getPredicate());
1019 if (Search == ICmpWithOneTable.end())
1020 return false;
1021 } else if (CV->isMinusOne()) {
1022 Search = ICmpWithMinusOneTable.find(CI->getPredicate());
1023 if (Search == ICmpWithMinusOneTable.end())
1024 return false;
1025 } else {
1026 return false;
1027 }
1028
1029 setEdgeProbability(BB, Search->second);
1030 return true;
1031}
1032
1033bool BranchProbabilityInfo::calcFloatingPointHeuristics(const BasicBlock *BB) {
1034 const BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
1035 if (!BI || !BI->isConditional())
1036 return false;
1037
1038 Value *Cond = BI->getCondition();
1039 FCmpInst *FCmp = dyn_cast<FCmpInst>(Cond);
1040 if (!FCmp)
1041 return false;
1042
1043 ProbabilityList ProbList;
1044 if (FCmp->isEquality()) {
1045 ProbList = !FCmp->isTrueWhenEqual() ?
1046 // f1 == f2 -> Unlikely
1048 // f1 != f2 -> Likely
1050 } else {
1051 auto Search = FCmpTable.find(FCmp->getPredicate());
1052 if (Search == FCmpTable.end())
1053 return false;
1054 ProbList = Search->second;
1055 }
1056
1057 setEdgeProbability(BB, ProbList);
1058 return true;
1059}
1060
1062 Probs.clear();
1063 Handles.clear();
1064}
1065
1068 // Check whether the analysis, all analyses on functions, or the function's
1069 // CFG have been preserved.
1070 auto PAC = PA.getChecker<BranchProbabilityAnalysis>();
1071 return !(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Function>>() ||
1072 PAC.preservedSet<CFGAnalyses>());
1073}
1074
1076 OS << "---- Branch Probabilities ----\n";
1077 // We print the probabilities from the last function the analysis ran over,
1078 // or the function it is currently running over.
1079 assert(LastF && "Cannot print prior to running over a function");
1080 for (const auto &BI : *LastF) {
1081 for (const BasicBlock *Succ : successors(&BI))
1082 printEdgeProbability(OS << " ", &BI, Succ);
1083 }
1084}
1085
1087isEdgeHot(const BasicBlock *Src, const BasicBlock *Dst) const {
1088 // Hot probability is at least 4/5 = 80%
1089 // FIXME: Compare against a static "hot" BranchProbability.
1090 return getEdgeProbability(Src, Dst) > BranchProbability(4, 5);
1091}
1092
1093/// Get the raw edge probability for the edge. If can't find it, return a
1094/// default probability 1/N where N is the number of successors. Here an edge is
1095/// specified using PredBlock and an
1096/// index to the successors.
1099 unsigned IndexInSuccessors) const {
1100 auto I = Probs.find(std::make_pair(Src, IndexInSuccessors));
1101 assert((Probs.end() == Probs.find(std::make_pair(Src, 0))) ==
1102 (Probs.end() == I) &&
1103 "Probability for I-th successor must always be defined along with the "
1104 "probability for the first successor");
1105
1106 if (I != Probs.end())
1107 return I->second;
1108
1109 return {1, static_cast<uint32_t>(succ_size(Src))};
1110}
1111
1114 const_succ_iterator Dst) const {
1115 return getEdgeProbability(Src, Dst.getSuccessorIndex());
1116}
1117
1118/// Get the raw edge probability calculated for the block pair. This returns the
1119/// sum of all raw edge probabilities from Src to Dst.
1122 const BasicBlock *Dst) const {
1123 if (!Probs.count(std::make_pair(Src, 0)))
1124 return BranchProbability(llvm::count(successors(Src), Dst), succ_size(Src));
1125
1126 auto Prob = BranchProbability::getZero();
1127 for (const_succ_iterator I = succ_begin(Src), E = succ_end(Src); I != E; ++I)
1128 if (*I == Dst)
1129 Prob += Probs.find(std::make_pair(Src, I.getSuccessorIndex()))->second;
1130
1131 return Prob;
1132}
1133
1134/// Set the edge probability for all edges at once.
1136 const BasicBlock *Src, const SmallVectorImpl<BranchProbability> &Probs) {
1137 assert(Src->getTerminator()->getNumSuccessors() == Probs.size());
1138 eraseBlock(Src); // Erase stale data if any.
1139 if (Probs.size() == 0)
1140 return; // Nothing to set.
1141
1142 Handles.insert(BasicBlockCallbackVH(Src, this));
1143 uint64_t TotalNumerator = 0;
1144 for (unsigned SuccIdx = 0; SuccIdx < Probs.size(); ++SuccIdx) {
1145 this->Probs[std::make_pair(Src, SuccIdx)] = Probs[SuccIdx];
1146 LLVM_DEBUG(dbgs() << "set edge " << Src->getName() << " -> " << SuccIdx
1147 << " successor probability to " << Probs[SuccIdx]
1148 << "\n");
1149 TotalNumerator += Probs[SuccIdx].getNumerator();
1150 }
1151
1152 // Because of rounding errors the total probability cannot be checked to be
1153 // 1.0 exactly. That is TotalNumerator == BranchProbability::getDenominator.
1154 // Instead, every single probability in Probs must be as accurate as possible.
1155 // This results in error 1/denominator at most, thus the total absolute error
1156 // should be within Probs.size / BranchProbability::getDenominator.
1157 assert(TotalNumerator <= BranchProbability::getDenominator() + Probs.size());
1158 assert(TotalNumerator >= BranchProbability::getDenominator() - Probs.size());
1159 (void)TotalNumerator;
1160}
1161
1163 BasicBlock *Dst) {
1164 eraseBlock(Dst); // Erase stale data if any.
1165 unsigned NumSuccessors = Src->getTerminator()->getNumSuccessors();
1166 assert(NumSuccessors == Dst->getTerminator()->getNumSuccessors());
1167 if (NumSuccessors == 0)
1168 return; // Nothing to set.
1169 if (!this->Probs.contains(std::make_pair(Src, 0)))
1170 return; // No probability is set for edges from Src. Keep the same for Dst.
1171
1172 Handles.insert(BasicBlockCallbackVH(Dst, this));
1173 for (unsigned SuccIdx = 0; SuccIdx < NumSuccessors; ++SuccIdx) {
1174 auto Prob = this->Probs[std::make_pair(Src, SuccIdx)];
1175 this->Probs[std::make_pair(Dst, SuccIdx)] = Prob;
1176 LLVM_DEBUG(dbgs() << "set edge " << Dst->getName() << " -> " << SuccIdx
1177 << " successor probability to " << Prob << "\n");
1178 }
1179}
1180
1182 assert(Src->getTerminator()->getNumSuccessors() == 2);
1183 if (!Probs.contains(std::make_pair(Src, 0)))
1184 return; // No probability is set for edges from Src
1185 assert(Probs.contains(std::make_pair(Src, 1)));
1186 std::swap(Probs[std::make_pair(Src, 0)], Probs[std::make_pair(Src, 1)]);
1187}
1188
1191 const BasicBlock *Src,
1192 const BasicBlock *Dst) const {
1193 const BranchProbability Prob = getEdgeProbability(Src, Dst);
1194 OS << "edge ";
1195 Src->printAsOperand(OS, false, Src->getModule());
1196 OS << " -> ";
1197 Dst->printAsOperand(OS, false, Dst->getModule());
1198 OS << " probability is " << Prob
1199 << (isEdgeHot(Src, Dst) ? " [HOT edge]\n" : "\n");
1200
1201 return OS;
1202}
1203
1205 LLVM_DEBUG(dbgs() << "eraseBlock " << BB->getName() << "\n");
1206
1207 // Note that we cannot use successors of BB because the terminator of BB may
1208 // have changed when eraseBlock is called as a BasicBlockCallbackVH callback.
1209 // Instead we remove prob data for the block by iterating successors by their
1210 // indices from 0 till the last which exists. There could not be prob data for
1211 // a pair (BB, N) if there is no data for (BB, N-1) because the data is always
1212 // set for all successors from 0 to M at once by the method
1213 // setEdgeProbability().
1214 Handles.erase(BasicBlockCallbackVH(BB, this));
1215 for (unsigned I = 0;; ++I) {
1216 auto MapI = Probs.find(std::make_pair(BB, I));
1217 if (MapI == Probs.end()) {
1218 assert(Probs.count(std::make_pair(BB, I + 1)) == 0 &&
1219 "Must be no more successors");
1220 return;
1221 }
1222 Probs.erase(MapI);
1223 }
1224}
1225
1227 const TargetLibraryInfo *TLI,
1228 DominatorTree *DT,
1229 PostDominatorTree *PDT) {
1230 LLVM_DEBUG(dbgs() << "---- Branch Probability Info : " << F.getName()
1231 << " ----\n\n");
1232 LastF = &F; // Store the last function we ran on for printing.
1233 LI = &LoopI;
1234
1235 SccI = std::make_unique<SccInfo>(F);
1236
1237 assert(EstimatedBlockWeight.empty());
1238 assert(EstimatedLoopWeight.empty());
1239
1240 std::unique_ptr<DominatorTree> DTPtr;
1241 std::unique_ptr<PostDominatorTree> PDTPtr;
1242
1243 if (!DT) {
1244 DTPtr = std::make_unique<DominatorTree>(const_cast<Function &>(F));
1245 DT = DTPtr.get();
1246 }
1247
1248 if (!PDT) {
1249 PDTPtr = std::make_unique<PostDominatorTree>(const_cast<Function &>(F));
1250 PDT = PDTPtr.get();
1251 }
1252
1253 computeEestimateBlockWeight(F, DT, PDT);
1254
1255 // Walk the basic blocks in post-order so that we can build up state about
1256 // the successors of a block iteratively.
1257 for (const auto *BB : post_order(&F.getEntryBlock())) {
1258 LLVM_DEBUG(dbgs() << "Computing probabilities for " << BB->getName()
1259 << "\n");
1260 // If there is no at least two successors, no sense to set probability.
1261 if (BB->getTerminator()->getNumSuccessors() < 2)
1262 continue;
1263 if (calcMetadataWeights(BB))
1264 continue;
1265 if (calcEstimatedHeuristics(BB))
1266 continue;
1267 if (calcPointerHeuristics(BB))
1268 continue;
1269 if (calcZeroHeuristics(BB, TLI))
1270 continue;
1271 if (calcFloatingPointHeuristics(BB))
1272 continue;
1273 }
1274
1275 EstimatedLoopWeight.clear();
1276 EstimatedBlockWeight.clear();
1277 SccI.reset();
1278
1279 if (PrintBranchProb && (PrintBranchProbFuncName.empty() ||
1280 F.getName() == PrintBranchProbFuncName)) {
1281 print(dbgs());
1282 }
1283}
1284
1286 AnalysisUsage &AU) const {
1287 // We require DT so it's available when LI is available. The LI updating code
1288 // asserts that DT is also present so if we don't make sure that we have DT
1289 // here, that assert will trigger.
1295 AU.setPreservesAll();
1296}
1297
1299 const LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1300 const TargetLibraryInfo &TLI =
1301 getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
1302 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1303 PostDominatorTree &PDT =
1304 getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
1305 BPI.calculate(F, LI, &TLI, &DT, &PDT);
1306 return false;
1307}
1308
1310
1312 const Module *) const {
1313 BPI.print(OS);
1314}
1315
1316AnalysisKey BranchProbabilityAnalysis::Key;
1319 auto &LI = AM.getResult<LoopAnalysis>(F);
1320 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
1321 auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
1322 auto &PDT = AM.getResult<PostDominatorTreeAnalysis>(F);
1324 BPI.calculate(F, LI, &TLI, &DT, &PDT);
1325 return BPI;
1326}
1327
1330 OS << "Printing analysis 'Branch Probability Analysis' for function '"
1331 << F.getName() << "':\n";
1333 return PreservedAnalyses::all();
1334}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
basic Basic Alias true
BlockExecWeight
Set of dedicated "absolute" execution weights for a block.
@ NORETURN
Weight to a block containing non returning call.
@ UNWIND
Weight to 'unwind' block of an invoke instruction.
@ COLD
Weight to a 'cold' block.
@ ZERO
Special weight used for cases with exact zero probability.
@ UNREACHABLE
Weight to an 'unreachable' block.
@ DEFAULT
Default weight is used in cases when there is no dedicated execution weight set.
@ LOWEST_NON_ZERO
Minimal possible non zero weight.
static const uint32_t FPH_TAKEN_WEIGHT
static const BranchProbability FPUntakenProb(FPH_NONTAKEN_WEIGHT, FPH_TAKEN_WEIGHT+FPH_NONTAKEN_WEIGHT)
static const BranchProbability ZeroUntakenProb(ZH_NONTAKEN_WEIGHT, ZH_TAKEN_WEIGHT+ZH_NONTAKEN_WEIGHT)
static const uint32_t LBH_TAKEN_WEIGHT
static const uint32_t ZH_NONTAKEN_WEIGHT
static const ProbabilityTable ICmpWithLibCallTable
strcmp and similar functions return zero, negative, or positive, if the first string is equal,...
static const ProbabilityTable ICmpWithMinusOneTable
Integer compares with -1:
branch prob
static const BranchProbability FPOrdTakenProb(FPH_ORD_WEIGHT, FPH_ORD_WEIGHT+FPH_UNO_WEIGHT)
static const ProbabilityTable ICmpWithZeroTable
Integer compares with 0:
static const uint32_t PH_NONTAKEN_WEIGHT
std::map< CmpInst::Predicate, ProbabilityList > ProbabilityTable
static const BranchProbability PtrTakenProb(PH_TAKEN_WEIGHT, PH_TAKEN_WEIGHT+PH_NONTAKEN_WEIGHT)
static const BranchProbability ZeroTakenProb(ZH_TAKEN_WEIGHT, ZH_TAKEN_WEIGHT+ZH_NONTAKEN_WEIGHT)
static const BranchProbability FPOrdUntakenProb(FPH_UNO_WEIGHT, FPH_ORD_WEIGHT+FPH_UNO_WEIGHT)
static const uint32_t PH_TAKEN_WEIGHT
Heuristics and lookup tables for non-loop branches: Pointer Heuristics (PH)
static const BranchProbability FPTakenProb(FPH_TAKEN_WEIGHT, FPH_TAKEN_WEIGHT+FPH_NONTAKEN_WEIGHT)
branch Branch Probability Analysis
static const BranchProbability PtrUntakenProb(PH_NONTAKEN_WEIGHT, PH_TAKEN_WEIGHT+PH_NONTAKEN_WEIGHT)
SmallVector< BranchProbability > ProbabilityList
static const ProbabilityTable ICmpWithOneTable
Integer compares with 1:
static const uint32_t ZH_TAKEN_WEIGHT
Zero Heuristics (ZH)
static const uint32_t FPH_NONTAKEN_WEIGHT
static const BranchProbability UR_TAKEN_PROB
Unreachable-terminating branch taken probability.
static const ProbabilityTable FCmpTable
Floating-Point compares:
static const uint32_t LBH_NONTAKEN_WEIGHT
static const ProbabilityTable PointerTable
Pointer comparisons:
static const uint32_t FPH_ORD_WEIGHT
This is the probability for an ordered floating point comparison.
static void computeUnlikelySuccessors(const BasicBlock *BB, Loop *L, SmallPtrSetImpl< const BasicBlock * > &UnlikelyBlocks)
static const uint32_t FPH_UNO_WEIGHT
This is the probability for an unordered floating point comparison, it means one or two of the operan...
cl::opt< std::string > PrintBranchProbFuncName("print-bpi-func-name", cl::Hidden, cl::desc("The option to specify the name of the function " "whose branch probability info is printed."))
static cl::opt< bool > PrintBranchProb("print-bpi", cl::init(false), cl::Hidden, cl::desc("Print the branch probability info."))
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file contains the declarations for metadata subclasses.
uint64_t IntrinsicInst * II
#define P(N)
This header defines various interfaces for pass management in LLVM.
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
This builds on the llvm/ADT/GraphTraits.h file to find the strongly connected components (SCCs) of a ...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
raw_pwrite_stream & OS
static ConstantInt * GetConstantInt(Value *V, const DataLayout &DL)
Extract ConstantInt from value, looking through IntToPtr and PointerNullValue.
This file defines the SmallVector class.
Value * RHS
Value * LHS
This templated class represents "all analyses that operate over <a particular IR unit>" (e....
Definition: Analysis.h:47
API to communicate dependencies between analyses during invalidation.
Definition: PassManager.h:360
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:321
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:473
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesAll()
Set by analyses that do not transform their input at all.
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
const CallInst * getTerminatingDeoptimizeCall() const
Returns the call instruction calling @llvm.experimental.deoptimize prior to the terminating return in...
Definition: BasicBlock.cpp:324
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:221
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:289
Conditional or Unconditional Branch instruction.
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
Value * getCondition() const
Analysis pass which computes BranchProbabilityInfo.
BranchProbabilityInfo run(Function &F, FunctionAnalysisManager &AM)
Run the analysis pass over a function and produce BPI.
Legacy analysis pass which computes BranchProbabilityInfo.
void releaseMemory() override
releaseMemory() - This member can be implemented by a pass if it wants to be able to release its memo...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
bool runOnFunction(Function &F) override
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
void print(raw_ostream &OS, const Module *M=nullptr) const override
print - Print out the internal state of the pass.
void getSccEnterBlocks(int SccNum, SmallVectorImpl< BasicBlock * > &Enters) const
Fills in Enters vector with all such blocks that don't belong to SCC with SccNum ID but there is an e...
void getSccExitBlocks(int SccNum, SmallVectorImpl< BasicBlock * > &Exits) const
Fills in Exits vector with all such blocks that don't belong to SCC with SccNum ID but there is an ed...
int getSCCNum(const BasicBlock *BB) const
If BB belongs to some SCC then ID of that SCC is returned, otherwise -1 is returned.
Analysis providing branch probability information.
void eraseBlock(const BasicBlock *BB)
Forget analysis results for the given basic block.
void setEdgeProbability(const BasicBlock *Src, const SmallVectorImpl< BranchProbability > &Probs)
Set the raw probabilities for all edges from the given block.
bool invalidate(Function &, const PreservedAnalyses &PA, FunctionAnalysisManager::Invalidator &)
BranchProbability getEdgeProbability(const BasicBlock *Src, unsigned IndexInSuccessors) const
Get an edge's probability, relative to other out-edges of the Src.
void calculate(const Function &F, const LoopInfo &LI, const TargetLibraryInfo *TLI, DominatorTree *DT, PostDominatorTree *PDT)
bool isEdgeHot(const BasicBlock *Src, const BasicBlock *Dst) const
Test if an edge is hot relative to other out-edges of the Src.
void swapSuccEdgesProbabilities(const BasicBlock *Src)
Swap outgoing edges probabilities for Src with branch terminator.
void print(raw_ostream &OS) const
raw_ostream & printEdgeProbability(raw_ostream &OS, const BasicBlock *Src, const BasicBlock *Dst) const
Print an edge's probability.
void copyEdgeProbabilities(BasicBlock *Src, BasicBlock *Dst)
Copy outgoing edge probabilities from Src to Dst.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
static uint32_t getDenominator()
static BranchProbability getRaw(uint32_t N)
static BranchProbability getOne()
static BranchProbability getUnknown()
uint32_t getNumerator() const
static BranchProbability getZero()
Represents analyses that only rely on functions' control flow.
Definition: Analysis.h:70
This class represents a function call, abstracting a target machine's calling convention.
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:983
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:1022
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:1020
@ ICMP_EQ
equal
Definition: InstrTypes.h:1014
@ ICMP_NE
not equal
Definition: InstrTypes.h:1015
bool isTrueWhenEqual() const
This is just a convenience.
Definition: InstrTypes.h:1314
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:1105
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition: Constants.h:218
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition: Constants.h:212
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:206
This is an important base class in LLVM.
Definition: Constant.h:41
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&... Args)
Definition: DenseMap.h:235
bool empty() const
Definition: DenseMap.h:98
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:151
iterator end()
Definition: DenseMap.h:84
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:317
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
This instruction compares its operands according to the predicate given to the constructor.
static bool isEquality(Predicate Pred)
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
This instruction compares its operands according to the predicate given to the constructor.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:566
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:593
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:44
Metadata node.
Definition: Metadata.h:1067
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.h:293
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Analysis pass which computes a PostDominatorTree.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
bool dominates(const Instruction *I1, const Instruction *I2) const
Return true if I1 dominates I2.
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:109
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:115
PreservedAnalysisChecker getChecker() const
Build a checker for this PreservedAnalyses and the specified analysis type.
Definition: Analysis.h:264
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:321
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:342
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:366
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:427
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:255
Value * getOperand(unsigned i) const
Definition: User.h:169
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
Enumerate the SCCs of a directed graph in reverse topological order of the SCC DAG.
Definition: SCCIterator.h:49
BlockType
Used as immediate MachineOperands for block signatures.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
NodeAddr< FuncNode * > Func
Definition: RDFGraph.h:393
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
pred_iterator pred_end(BasicBlock *BB)
Definition: CFG.h:114
auto successors(const MachineBasicBlock *BB)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
scc_iterator< T > scc_begin(const T &G)
Construct the begin iterator for a deduced graph type T.
Definition: SCCIterator.h:233
Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
iterator_range< po_iterator< T > > post_order(const T &G)
void initializeBranchProbabilityInfoWrapperPassPass(PassRegistry &)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
pred_iterator pred_begin(BasicBlock *BB)
Definition: CFG.h:110
uint64_t divideNearest(uint64_t Numerator, uint64_t Denominator)
Returns the integer nearest(Numerator / Denominator).
Definition: MathExtras.h:433
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
MDNode * getValidBranchWeightMDNode(const Instruction &I)
Get the valid branch weights metadata node.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
Constant * ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL)
Attempt to constant fold a binary operation with the specified operands.
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
@ Mul
Product of integers.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1914
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879
unsigned succ_size(const MachineBasicBlock *BB)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: Analysis.h:26