doxygen/MisExpect_8cpp_source.html

//===--- MisExpect.cpp - Check the use of llvm.expect with PGO data -------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

// This contains code to emit warnings for potentially incorrect usage of the

// llvm.expect intrinsic. This utility extracts the threshold values from

// metadata associated with the instrumented Branch or Switch instruction. The

// threshold values are then used to determine if a warning should be emmited.

//

// MisExpect's implementation relies on two assumptions about how branch weights

// are managed in LLVM.

//

// 1) Frontend profiling weights are always in place before llvm.expect is

// lowered in LowerExpectIntrinsic.cpp. Frontend based instrumentation therefore

// needs to extract the branch weights and then compare them to the weights

// being added by the llvm.expect intrinsic lowering.

//

// 2) Sampling and IR based profiles will *only* have branch weight metadata

// before profiling data is consulted if they are from a lowered llvm.expect

// intrinsic. These profiles thus always extract the expected weights and then

// compare them to the weights collected during profiling to determine if a

// diagnostic message is warranted.

//

//===----------------------------------------------------------------------===//


#include "llvm/Transforms/Utils/MisExpect.h"

#include "llvm/ADT/Twine.h"

#include "llvm/Analysis/OptimizationRemarkEmitter.h"

#include "llvm/IR/DiagnosticInfo.h"

#include "llvm/IR/Instruction.h"

#include "llvm/IR/Instructions.h"

#include "llvm/IR/LLVMContext.h"

#include "llvm/IR/ProfDataUtils.h"

#include "llvm/Support/BranchProbability.h"

#include "llvm/Support/CommandLine.h"

#include "llvm/Support/FormatVariadic.h"

#include <algorithm>

#include <cstdint>

#include <functional>

#include <numeric>


#define DEBUG_TYPE "misexpect"


using namespace llvm;

using namespace misexpect;


namespace llvm {


// Command line option to enable/disable the warning when profile data suggests

// a mismatch with the use of the llvm.expect intrinsic

static cl::opt<bool> PGOWarnMisExpect(

    "pgo-warn-misexpect", cl::init(false), cl::Hidden,

    cl::desc("Use this option to turn on/off "

             "warnings about incorrect usage of llvm.expect intrinsics."));


// Command line option for setting the diagnostic tolerance threshold

static cl::opt<uint32_t> MisExpectTolerance(

    "misexpect-tolerance", cl::init(0),

    cl::desc("Prevents emitting diagnostics when profile counts are "

             "within N% of the threshold.."));


} // namespace llvm


namespace {


bool isMisExpectDiagEnabled(LLVMContext &Ctx) {

  return PGOWarnMisExpect || Ctx.getMisExpectWarningRequested();

}


uint32_t getMisExpectTolerance(LLVMContext &Ctx) {

  return std::max(static_cast<uint32_t>(MisExpectTolerance),

                  Ctx.getDiagnosticsMisExpectTolerance());

}


Instruction *getInstCondition(Instruction *I) {

  assert(I != nullptr && "MisExpect target Instruction cannot be nullptr");

  Instruction *Ret = nullptr;

  if (auto *B = dyn_cast<BranchInst>(I)) {

    Ret = dyn_cast<Instruction>(B->getCondition());

  }

  // TODO: Find a way to resolve condition location for switches

  // Using the condition of the switch seems to often resolve to an earlier

  // point in the program, i.e. the calculation of the switch condition, rather

  // than the switch's location in the source code. Thus, we should use the

  // instruction to get source code locations rather than the condition to

  // improve diagnostic output, such as the caret. If the same problem exists

  // for branch instructions, then we should remove this function and directly

  // use the instruction

  //

  else if (auto *S = dyn_cast<SwitchInst>(I)) {

    Ret = dyn_cast<Instruction>(S->getCondition());

  }

  return Ret ? Ret : I;

}


void emitMisexpectDiagnostic(Instruction *I, LLVMContext &Ctx,

                             uint64_t ProfCount, uint64_t TotalCount) {

  double PercentageCorrect = (double)ProfCount / TotalCount;

  auto PerString =

      formatv("{0:P} ({1} / {2})", PercentageCorrect, ProfCount, TotalCount);

  auto RemStr = formatv(

      "Potential performance regression from use of the llvm.expect intrinsic: "

      "Annotation was correct on {0} of profiled executions.",

      PerString);

  Twine Msg(PerString);

  Instruction *Cond = getInstCondition(I);

  if (isMisExpectDiagEnabled(Ctx))

    Ctx.diagnose(DiagnosticInfoMisExpect(Cond, Msg));

  OptimizationRemarkEmitter ORE(I->getParent()->getParent());

  ORE.emit(OptimizationRemark(DEBUG_TYPE, "misexpect", Cond) << RemStr.str());

}


} // namespace


namespace llvm {

namespace misexpect {


void verifyMisExpect(Instruction &I, ArrayRef<uint32_t> RealWeights,

                     ArrayRef<uint32_t> ExpectedWeights) {

  // To determine if we emit a diagnostic, we need to compare the branch weights

  // from the profile to those added by the llvm.expect intrinsic.

  // So first, we extract the "likely" and "unlikely" weights from

  // ExpectedWeights And determine the correct weight in the profile to compare

  // against.

  uint64_t LikelyBranchWeight = 0,

           UnlikelyBranchWeight = std::numeric_limits<uint32_t>::max();

  size_t MaxIndex = 0;

  for (size_t Idx = 0, End = ExpectedWeights.size(); Idx < End; Idx++) {

    uint32_t V = ExpectedWeights[Idx];

    if (LikelyBranchWeight < V) {

      LikelyBranchWeight = V;

      MaxIndex = Idx;

    }

    if (UnlikelyBranchWeight > V) {

      UnlikelyBranchWeight = V;

    }

  }


  const uint64_t ProfiledWeight = RealWeights[MaxIndex];

  const uint64_t RealWeightsTotal =

      std::accumulate(RealWeights.begin(), RealWeights.end(), (uint64_t)0,

                      std::plus<uint64_t>());

  const uint64_t NumUnlikelyTargets = RealWeights.size() - 1;


  uint64_t TotalBranchWeight =

      LikelyBranchWeight + (UnlikelyBranchWeight * NumUnlikelyTargets);


  // Failing this assert means that we have corrupted metadata.

  assert((TotalBranchWeight >= LikelyBranchWeight) && (TotalBranchWeight > 0) &&

         "TotalBranchWeight is less than the Likely branch weight");


  // To determine our threshold value we need to obtain the branch probability

  // for the weights added by llvm.expect and use that proportion to calculate

  // our threshold based on the collected profile data.

  auto LikelyProbablilty = BranchProbability::getBranchProbability(

      LikelyBranchWeight, TotalBranchWeight);


  uint64_t ScaledThreshold = LikelyProbablilty.scale(RealWeightsTotal);


  // clamp tolerance range to [0, 100)

  auto Tolerance = getMisExpectTolerance(I.getContext());

  Tolerance = std::clamp(Tolerance, 0u, 99u);


  // Allow users to relax checking by N%  i.e., if they use a 5% tolerance,

  // then we check against 0.95*ScaledThreshold

  if (Tolerance > 0)

    ScaledThreshold *= (1.0 - Tolerance / 100.0);


  // When the profile weight is below the threshold, we emit the diagnostic

  if (ProfiledWeight < ScaledThreshold)

    emitMisexpectDiagnostic(&I, I.getContext(), ProfiledWeight,

                            RealWeightsTotal);

}


void checkBackendInstrumentation(Instruction &I,

                                 const ArrayRef<uint32_t> RealWeights) {

  // Backend checking assumes any existing weight comes from an `llvm.expect`

  // intrinsic. However, SampleProfiling + ThinLTO add branch weights  multiple

  // times, leading to an invalid assumption in our checking. Backend checks

  // should only operate on branch weights that carry the "!expected" field,

  // since they are guaranteed to be added by the LowerExpectIntrinsic pass.

  if (!hasBranchWeightOrigin(I))

    return;

  SmallVector<uint32_t> ExpectedWeights;

  if (!extractBranchWeights(I, ExpectedWeights))

    return;

  verifyMisExpect(I, RealWeights, ExpectedWeights);

}


void checkFrontendInstrumentation(Instruction &I,

                                  const ArrayRef<uint32_t> ExpectedWeights) {

  SmallVector<uint32_t> RealWeights;

  if (!extractBranchWeights(I, RealWeights))

    return;

  verifyMisExpect(I, RealWeights, ExpectedWeights);

}


void checkExpectAnnotations(Instruction &I,

                            const ArrayRef<uint32_t> ExistingWeights,

                            bool IsFrontend) {

  if (IsFrontend) {

    checkFrontendInstrumentation(I, ExistingWeights);

  } else {

    checkBackendInstrumentation(I, ExistingWeights);

  }

}


} // namespace misexpect

} // namespace llvm

#undef DEBUG_TYPE

BranchProbability.h

B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

CommandLine.h

Idx
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
Definition: DeadArgumentElimination.cpp:353

DiagnosticInfo.h

End
bool End
Definition: ELF_riscv.cpp:480

FormatVariadic.h

Instruction.h

Instructions.h

LLVMContext.h

UnlikelyBranchWeight
static cl::opt< uint32_t > UnlikelyBranchWeight("unlikely-branch-weight", cl::Hidden, cl::init(1), cl::desc("Weight of the branch unlikely to be taken (default = 1)"))

LikelyBranchWeight
static cl::opt< uint32_t > LikelyBranchWeight("likely-branch-weight", cl::Hidden, cl::init(2000), cl::desc("Weight of the branch likely to be taken (default = 2000)"))

I
#define I(x, y, z)
Definition: MD5.cpp:58

DEBUG_TYPE
#define DEBUG_TYPE
Definition: MisExpect.cpp:46

MisExpect.h

OptimizationRemarkEmitter.h

ProfDataUtils.h
This file contains the declarations for profiling metadata utility functions.

Cond
const SmallVectorImpl< MachineOperand > & Cond
Definition: RISCVRedundantCopyElimination.cpp:75

assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

Twine.h

llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41

llvm::ArrayRef::end
iterator end() const
Definition: ArrayRef.h:157

llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168

llvm::ArrayRef::begin
iterator begin() const
Definition: ArrayRef.h:156

llvm::BranchProbability::getBranchProbability
static BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
Definition: BranchProbability.cpp:53

llvm::DiagnosticInfoMisExpect
Diagnostic information for MisExpect analysis.
Definition: DiagnosticInfo.h:1125

llvm::Instruction
Definition: Instruction.h:68

llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67

llvm::LLVMContext::getMisExpectWarningRequested
bool getMisExpectWarningRequested() const
Definition: LLVMContext.cpp:146

llvm::LLVMContext::diagnose
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Definition: LLVMContext.cpp:245

llvm::LLVMContext::getDiagnosticsMisExpectTolerance
uint32_t getDiagnosticsMisExpectTolerance() const
Definition: LLVMContext.cpp:156

llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:32

llvm::OptimizationRemark
Diagnostic information for applied optimization remarks.
Definition: DiagnosticInfo.h:762

llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196

llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81

llvm::cl::opt
Definition: CommandLine.h:1423

uint32_t

uint64_t

llvm::MipsISD::Ret
@ Ret
Definition: MipsISelLowering.h:117

llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:137

llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443

llvm::misexpect::checkFrontendInstrumentation
void checkFrontendInstrumentation(Instruction &I, const ArrayRef< uint32_t > ExpectedWeights)
checkFrontendInstrumentation - compares PGO counters to the thresholds used for llvm....
Definition: MisExpect.cpp:194

llvm::misexpect::checkExpectAnnotations
void checkExpectAnnotations(Instruction &I, const ArrayRef< uint32_t > ExistingWeights, bool IsFrontend)
checkExpectAnnotations - compares PGO counters to the thresholds used for llvm.expect and warns if th...
Definition: MisExpect.cpp:202

llvm::misexpect::checkBackendInstrumentation
void checkBackendInstrumentation(Instruction &I, const llvm::ArrayRef< uint32_t > RealWeights)
checkBackendInstrumentation - compares PGO counters to the thresholds used for llvm....
Definition: MisExpect.cpp:179

llvm::misexpect::verifyMisExpect
void verifyMisExpect(Instruction &I, ArrayRef< uint32_t > RealWeights, const ArrayRef< uint32_t > ExpectedWeights)
veryifyMisExpect - compares RealWeights to the thresholds used for llvm.expect and warns if the PGO c...
Definition: MisExpect.cpp:122

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18

llvm::PGOWarnMisExpect
static cl::opt< bool > PGOWarnMisExpect("pgo-warn-misexpect", cl::init(false), cl::Hidden, cl::desc("Use this option to turn on/off " "warnings about incorrect usage of llvm.expect intrinsics."))

llvm::hasBranchWeightOrigin
bool hasBranchWeightOrigin(const Instruction &I)
Check if Branch Weight Metadata has an "expected" field from an llvm.expect* intrinsic.
Definition: ProfDataUtils.cpp:122

llvm::formatv
auto formatv(bool Validate, const char *Fmt, Ts &&...Vals)
Definition: FormatVariadic.h:252

llvm::MisExpectTolerance
static cl::opt< uint32_t > MisExpectTolerance("misexpect-tolerance", cl::init(0), cl::desc("Prevents emitting diagnostics when profile counts are " "within N% of the threshold.."))

llvm::extractBranchWeights
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
Definition: ProfDataUtils.cpp:170

llvm::cl::desc
Definition: CommandLine.h:409