LLVM 22.0.0git
MisExpect.cpp
Go to the documentation of this file.
1//===--- MisExpect.cpp - Check the use of llvm.expect with PGO data -------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This contains code to emit warnings for potentially incorrect usage of the
10// llvm.expect intrinsic. This utility extracts the threshold values from
11// metadata associated with the instrumented Branch or Switch instruction. The
12// threshold values are then used to determine if a warning should be emmited.
13//
14// MisExpect's implementation relies on two assumptions about how branch weights
15// are managed in LLVM.
16//
17// 1) Frontend profiling weights are always in place before llvm.expect is
18// lowered in LowerExpectIntrinsic.cpp. Frontend based instrumentation therefore
19// needs to extract the branch weights and then compare them to the weights
20// being added by the llvm.expect intrinsic lowering.
21//
22// 2) Sampling and IR based profiles will *only* have branch weight metadata
23// before profiling data is consulted if they are from a lowered llvm.expect
24// intrinsic. These profiles thus always extract the expected weights and then
25// compare them to the weights collected during profiling to determine if a
26// diagnostic message is warranted.
27//
28//===----------------------------------------------------------------------===//
29
31#include "llvm/ADT/Twine.h"
34#include "llvm/IR/Instruction.h"
36#include "llvm/IR/LLVMContext.h"
41#include <algorithm>
42#include <cstdint>
43#include <functional>
44#include <numeric>
45
46#define DEBUG_TYPE "misexpect"
47
48using namespace llvm;
49using namespace misexpect;
50
51// Command line option to enable/disable the warning when profile data suggests
52// a mismatch with the use of the llvm.expect intrinsic
54 "pgo-warn-misexpect", cl::init(false), cl::Hidden,
55 cl::desc("Use this option to turn on/off "
56 "warnings about incorrect usage of llvm.expect intrinsics."));
57
58// Command line option for setting the diagnostic tolerance threshold
60 "misexpect-tolerance", cl::init(0),
61 cl::desc("Prevents emitting diagnostics when profile counts are "
62 "within N% of the threshold.."));
63
64static bool isMisExpectDiagEnabled(const LLVMContext &Ctx) {
65 return PGOWarnMisExpect || Ctx.getMisExpectWarningRequested();
66}
67
69 return std::max(static_cast<uint32_t>(MisExpectTolerance),
70 Ctx.getDiagnosticsMisExpectTolerance());
71}
72
74 assert(I != nullptr && "MisExpect target Instruction cannot be nullptr");
75 const Instruction *Ret = nullptr;
76 if (auto *B = dyn_cast<BranchInst>(I)) {
77 Ret = dyn_cast<Instruction>(B->getCondition());
78 }
79 // TODO: Find a way to resolve condition location for switches
80 // Using the condition of the switch seems to often resolve to an earlier
81 // point in the program, i.e. the calculation of the switch condition, rather
82 // than the switch's location in the source code. Thus, we should use the
83 // instruction to get source code locations rather than the condition to
84 // improve diagnostic output, such as the caret. If the same problem exists
85 // for branch instructions, then we should remove this function and directly
86 // use the instruction
87 //
88 else if (auto *S = dyn_cast<SwitchInst>(I)) {
89 Ret = dyn_cast<Instruction>(S->getCondition());
90 }
91 return Ret ? Ret : I;
92}
93
95 uint64_t ProfCount, uint64_t TotalCount) {
96 double PercentageCorrect = (double)ProfCount / TotalCount;
97 auto PerString =
98 formatv("{0:P} ({1} / {2})", PercentageCorrect, ProfCount, TotalCount);
99 auto RemStr = formatv(
100 "Potential performance regression from use of the llvm.expect intrinsic: "
101 "Annotation was correct on {0} of profiled executions.",
102 PerString);
104 if (isMisExpectDiagEnabled(Ctx))
105 Ctx.diagnose(DiagnosticInfoMisExpect(Cond, Twine(PerString)));
106 OptimizationRemarkEmitter ORE(I->getParent()->getParent());
107 ORE.emit(OptimizationRemark(DEBUG_TYPE, "misexpect", Cond) << RemStr.str());
108}
109
111 ArrayRef<uint32_t> RealWeights,
112 ArrayRef<uint32_t> ExpectedWeights) {
113 // To determine if we emit a diagnostic, we need to compare the branch weights
114 // from the profile to those added by the llvm.expect intrinsic.
115 // So first, we extract the "likely" and "unlikely" weights from
116 // ExpectedWeights And determine the correct weight in the profile to compare
117 // against.
119 UnlikelyBranchWeight = std::numeric_limits<uint32_t>::max();
120 size_t MaxIndex = 0;
121 for (const auto &[Idx, V] : enumerate(ExpectedWeights)) {
122 if (LikelyBranchWeight < V) {
124 MaxIndex = Idx;
125 }
126 if (UnlikelyBranchWeight > V)
128 }
129
130 const uint64_t ProfiledWeight = RealWeights[MaxIndex];
131 const uint64_t RealWeightsTotal =
132 std::accumulate(RealWeights.begin(), RealWeights.end(), (uint64_t)0,
133 std::plus<uint64_t>());
134 const uint64_t NumUnlikelyTargets = RealWeights.size() - 1;
135
136 uint64_t TotalBranchWeight =
137 LikelyBranchWeight + (UnlikelyBranchWeight * NumUnlikelyTargets);
138
139 // Failing this assert means that we have corrupted metadata.
140 assert((TotalBranchWeight >= LikelyBranchWeight) && (TotalBranchWeight > 0) &&
141 "TotalBranchWeight is less than the Likely branch weight");
142
143 // To determine our threshold value we need to obtain the branch probability
144 // for the weights added by llvm.expect and use that proportion to calculate
145 // our threshold based on the collected profile data.
146 auto LikelyProbablilty = BranchProbability::getBranchProbability(
147 LikelyBranchWeight, TotalBranchWeight);
148
149 uint64_t ScaledThreshold = LikelyProbablilty.scale(RealWeightsTotal);
150
151 // clamp tolerance range to [0, 100)
152 uint32_t Tolerance = getMisExpectTolerance(I.getContext());
153 Tolerance = std::clamp(Tolerance, 0u, 99u);
154
155 // Allow users to relax checking by N% i.e., if they use a 5% tolerance,
156 // then we check against 0.95*ScaledThreshold
157 if (Tolerance > 0)
158 ScaledThreshold *= (1.0 - Tolerance / 100.0);
159
160 // When the profile weight is below the threshold, we emit the diagnostic
161 if (ProfiledWeight < ScaledThreshold)
162 emitMisexpectDiagnostic(&I, I.getContext(), ProfiledWeight,
163 RealWeightsTotal);
164}
165
167 ArrayRef<uint32_t> RealWeights) {
168 // Backend checking assumes any existing weight comes from an `llvm.expect`
169 // intrinsic. However, SampleProfiling + ThinLTO add branch weights multiple
170 // times, leading to an invalid assumption in our checking. Backend checks
171 // should only operate on branch weights that carry the "!expected" field,
172 // since they are guaranteed to be added by the LowerExpectIntrinsic pass.
174 return;
175 SmallVector<uint32_t> ExpectedWeights;
176 if (!extractBranchWeights(I, ExpectedWeights))
177 return;
178 verifyMisExpect(I, RealWeights, ExpectedWeights);
179}
180
182 const Instruction &I, ArrayRef<uint32_t> ExpectedWeights) {
183 SmallVector<uint32_t> RealWeights;
184 if (!extractBranchWeights(I, RealWeights))
185 return;
186 verifyMisExpect(I, RealWeights, ExpectedWeights);
187}
188
190 ArrayRef<uint32_t> ExistingWeights,
191 bool IsFrontend) {
192 if (IsFrontend)
193 checkFrontendInstrumentation(I, ExistingWeights);
194 else
195 checkBackendInstrumentation(I, ExistingWeights);
196}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define DEBUG_TYPE
static cl::opt< uint32_t > UnlikelyBranchWeight("unlikely-branch-weight", cl::Hidden, cl::init(1), cl::desc("Weight of the branch unlikely to be taken (default = 1)"))
static cl::opt< uint32_t > LikelyBranchWeight("likely-branch-weight", cl::Hidden, cl::init(2000), cl::desc("Weight of the branch likely to be taken (default = 2000)"))
#define I(x, y, z)
Definition MD5.cpp:58
static const Instruction * getInstCondition(const Instruction *I)
Definition MisExpect.cpp:73
static void emitMisexpectDiagnostic(const Instruction *I, LLVMContext &Ctx, uint64_t ProfCount, uint64_t TotalCount)
Definition MisExpect.cpp:94
static uint32_t getMisExpectTolerance(const LLVMContext &Ctx)
Definition MisExpect.cpp:68
static cl::opt< bool > PGOWarnMisExpect("pgo-warn-misexpect", cl::init(false), cl::Hidden, cl::desc("Use this option to turn on/off " "warnings about incorrect usage of llvm.expect intrinsics."))
static cl::opt< uint32_t > MisExpectTolerance("misexpect-tolerance", cl::init(0), cl::desc("Prevents emitting diagnostics when profile counts are " "within N% of the threshold.."))
static bool isMisExpectDiagEnabled(const LLVMContext &Ctx)
Definition MisExpect.cpp:64
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
iterator end() const
Definition ArrayRef.h:136
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
iterator begin() const
Definition ArrayRef.h:135
static LLVM_ABI BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
Diagnostic information for MisExpect analysis.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
The optimization diagnostic interface.
LLVM_ABI void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for applied optimization remarks.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
initializer< Ty > init(const Ty &Val)
void checkBackendInstrumentation(const Instruction &I, ArrayRef< uint32_t > RealWeights)
checkBackendInstrumentation - compares PGO counters to the thresholds used for llvm....
void verifyMisExpect(const Instruction &I, ArrayRef< uint32_t > RealWeights, ArrayRef< uint32_t > ExpectedWeights)
veryifyMisExpect - compares RealWeights to the thresholds used for llvm.expect and warns if the PGO c...
void checkExpectAnnotations(const Instruction &I, ArrayRef< uint32_t > ExistingWeights, bool IsFrontend)
checkExpectAnnotations - compares PGO counters to the thresholds used for llvm.expect and warns if th...
void checkFrontendInstrumentation(const Instruction &I, ArrayRef< uint32_t > ExpectedWeights)
checkFrontendInstrumentation - compares PGO counters to the thresholds used for llvm....
This is an optimization pass for GlobalISel generic memory operations.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2472
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:644
LLVM_ABI bool hasBranchWeightOrigin(const Instruction &I)
Check if Branch Weight Metadata has an "expected" field from an llvm.expect* intrinsic.
auto formatv(bool Validate, const char *Fmt, Ts &&...Vals)
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.