LLVM 20.0.0git
DevelopmentModeInlineAdvisor.cpp
Go to the documentation of this file.
1//===- DevelopmentModeInlineAdvisor.cpp - runtime-loadable model runner --===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements a model runner using TFLite, allowing the
10// loading of a model from a command line option.
11//
12//===----------------------------------------------------------------------===//
14#include "llvm/Config/config.h"
15#if defined(LLVM_HAVE_TFLITE)
16
17#include "llvm/ADT/BitVector.h"
25#include "llvm/IR/LLVMContext.h"
26#include "llvm/IR/Module.h"
29
30#include <vector>
31#include <optional>
32
33using namespace llvm;
34
35static cl::opt<std::string> TrainingLog(
36 "training-log", cl::Hidden,
37 cl::desc("Path where the development - mode inlining log is saved."));
38
39static cl::opt<std::string> TFModelUnderTrainingPath(
40 "ml-inliner-model-under-training", cl::Hidden,
41 cl::desc(R"(Path to SavedModel from the previous training iteration.
42The directory is also expected to contain a JSON specification of the
43outputs expected to be logged, where the first entry must be the
44inlining decision. The file containing the specification should be
45called output_spec.json. The expected JSON value is an array of
46dictionaries. Each dictionary should have 2 keys:
47
48- "tensor_spec, followed by the TensorSpec description of the
49output; and
50- "logging_name", a string indicating the name to use when
51logging the output values.
52
53Example:
54[
55 {
56 "logging_name" : "some_name",
57 "tensor_spec" : {
58 "name" : "model_name",
59 "port" : 0,
60 "shape" : [2, 3],
61 "type" : "float"
62 }
63 }
64]
65
66The first value must always correspond to the decision.)"));
67
68static cl::opt<std::string> TFOutputSpecOverride(
69 "ml-inliner-output-spec-override", cl::Hidden,
70 cl::desc("Override the path to the output spec json file. See "
71 "-ml-inliner-model-under-training documentation for the "
72 "specification of that file."));
73
74static cl::opt<std::string> TFFeedPrefix("ml-inliner-trained-model-feed-prefix",
75 cl::Hidden, cl::init("action_"),
76 cl::desc("Prefix for feature names."));
77
78namespace {
79/// An InlineEvent, used by TrainingLogger.
80struct InlineEvent {
81 /// What the default policy's decision would have been.
82 int64_t DefaultDecision = 0;
83
84 /// What we advised. When training off the default policy, this is the same as
85 /// DefaultDecision.
86 int64_t AdvisedDecision = 0;
87
88 /// What actually happened. This would be 'false' in the case of an inline
89 /// error, even if AdvisedDecision were true, otherwise it agrees with
90 /// AdvisedDecision.
91 bool Effect = false;
92
93 /// What the change in size was: size_after - size_before
94 int64_t Reward = 0;
95};
96
97/// Collect data we may use for training a model.
98class TrainingLogger final {
99public:
100 TrainingLogger(StringRef LogFileName, const ModelUnderTrainingRunner *MUTR);
101
102 /// Log one inlining event.
103 void logInlineEvent(const InlineEvent &Event,
104 const MLModelRunner &ModelRunner);
105
106private:
107 StringRef LogFileName;
108 const ModelUnderTrainingRunner *const MUTR;
109 std::unique_ptr<Logger> L;
110 BitVector Effects;
111 /// Set these 2 clearly OOB, to make sure we set them later.
112 size_t DefaultDecisionPos = std::numeric_limits<size_t>::max();
113 size_t DecisionPos = std::numeric_limits<size_t>::max();
114};
115
116/// An extension of the MLInlineAdvisor for the 'development' mode, targeting
117/// the offline training scenario. Note that training happens outside of the
118/// compiler, this facility is concerned with producing training data ("logs").
119/// This InlineAdvisor can operate in the following modes:
120///
121/// 1) collect logs for the default policy. This is useful for bootstrapping
122/// training, which will be considerably faster by starting from a reasonable
123/// policy.
124///
125/// 2) collect logs for the ML policy, using a model from a previous
126/// training. Potentially, that model uses internally some small random
127/// perturbation of its weights, to induce exploration (setting this up is the
128/// responsibility of the training algorithm). The logs would then be used to
129/// retrain and improve on this model.
130///
131/// 3) use the provided model, with no logging. This is useful for end to end
132/// validation - the model, in this case, is a release candidate and shouldn't
133/// have random perturbations. It is a convenience feature: rather than needing
134/// to take the release candidate model and compile it in 'release' mode,
135/// validate it, then potentially discard it, it's easier to just pass the model
136/// to the compiler, albeit compilation would be slower, as a one-off. Once the
137/// model behaves satisfactorily, it can be compiled AOT, for efficiency, in
138/// release mode. The expectation is that a well-trained model provides a good
139/// policy over a sufficiently diverse codebase, over many changes (i.e.
140/// training happens seldom).
141class DevelopmentModeMLInlineAdvisor : public MLInlineAdvisor {
142public:
143 DevelopmentModeMLInlineAdvisor(
145 std::unique_ptr<MLModelRunner> ModelRunner,
146 std::function<bool(CallBase &)> GetDefaultAdvice,
147 std::unique_ptr<TrainingLogger> Logger);
148
149 size_t getTotalSizeEstimate();
150
151 void updateNativeSizeEstimate(int64_t Change) {
152 *CurrentNativeSize += Change;
153 }
154 void resetNativeSize(Function *F) {
155 PreservedAnalyses PA = PreservedAnalyses::all();
157 FAM.invalidate(*F, PA);
158 }
159
160 std::unique_ptr<MLInlineAdvice>
161 getAdviceFromModel(CallBase &CB, OptimizationRemarkEmitter &ORE) override;
162
163 std::optional<size_t> getNativeSizeEstimate(const Function &F) const;
164
165private:
166 bool isLogging() const { return !!Logger; }
167 std::unique_ptr<MLInlineAdvice> getMandatoryAdviceImpl(CallBase &CB) override;
168
169 const bool IsDoingInference;
170 std::unique_ptr<TrainingLogger> Logger;
171
172 const std::optional<int32_t> InitialNativeSize;
173 std::optional<int32_t> CurrentNativeSize;
174};
175
176/// A variant of MLInlineAdvice that tracks all non-trivial inlining
177/// decisions, for training/logging.
178class LoggingMLInlineAdvice : public MLInlineAdvice {
179public:
180 LoggingMLInlineAdvice(DevelopmentModeMLInlineAdvisor *Advisor, CallBase &CB,
181 OptimizationRemarkEmitter &ORE, bool Recommendation,
182 TrainingLogger &Logger,
183 std::optional<size_t> CallerSizeEstimateBefore,
184 std::optional<size_t> CalleeSizeEstimateBefore,
185 bool DefaultDecision, bool Mandatory = false)
186 : MLInlineAdvice(Advisor, CB, ORE, Recommendation), Logger(Logger),
187 CallerSizeEstimateBefore(CallerSizeEstimateBefore),
188 CalleeSizeEstimateBefore(CalleeSizeEstimateBefore),
189 DefaultDecision(DefaultDecision), Mandatory(Mandatory) {}
190
191 virtual ~LoggingMLInlineAdvice() = default;
192
193private:
194 DevelopmentModeMLInlineAdvisor *getAdvisor() const {
195 return static_cast<DevelopmentModeMLInlineAdvisor *>(Advisor);
196 }
197 void recordInliningImpl() override {
198 MLInlineAdvice::recordInliningImpl();
199 getAdvisor()->resetNativeSize(Caller);
200 int Reward = std::numeric_limits<int>::max();
201 if (InlineSizeEstimatorAnalysis::isEvaluatorRequested() &&
202 !getAdvisor()->isForcedToStop()) {
203 int NativeSizeAfter = *getAdvisor()->getNativeSizeEstimate(*Caller) +
204 *CalleeSizeEstimateBefore;
205 Reward = NativeSizeAfter -
206 (*CallerSizeEstimateBefore + *CalleeSizeEstimateBefore);
207 getAdvisor()->updateNativeSizeEstimate(Reward);
208 }
209 log(Reward, /*Success=*/true);
210 }
211
212 void recordInliningWithCalleeDeletedImpl() override {
213 MLInlineAdvice::recordInliningWithCalleeDeletedImpl();
214 getAdvisor()->resetNativeSize(Caller);
215 if (InlineSizeEstimatorAnalysis::isEvaluatorRequested() &&
216 !getAdvisor()->isForcedToStop()) {
217 int NativeSizeAfter = *getAdvisor()->getNativeSizeEstimate(*Caller);
218 int Reward = NativeSizeAfter -
219 (*CallerSizeEstimateBefore + *CalleeSizeEstimateBefore);
220 getAdvisor()->updateNativeSizeEstimate(Reward);
221 log(Reward, /*Success=*/true);
222 } else {
223 log(NoReward, /*Success=*/true);
224 }
225 }
226
227 void recordUnsuccessfulInliningImpl(const InlineResult &Result) override {
228 MLInlineAdvice::recordUnsuccessfulInliningImpl(Result);
229 log(NoReward, /*Success=*/false);
230 }
231
232 void recordUnattemptedInliningImpl() override {
233 MLInlineAdvice::recordUnattemptedInliningImpl();
234 log(NoReward, /*Success=*/false);
235 }
236
237 void log(int64_t Reward, bool Success) {
238 if (Mandatory)
239 return;
240 InlineEvent Event;
241 Event.AdvisedDecision = isInliningRecommended();
242 Event.DefaultDecision = DefaultDecision;
243 Event.Effect = Success;
244 Event.Reward = Reward;
245 Logger.logInlineEvent(Event, getAdvisor()->getModelRunner());
246 }
247
248 static const int64_t NoReward = 0;
249 TrainingLogger &Logger;
250 const std::optional<size_t> CallerSizeEstimateBefore;
251 const std::optional<size_t> CalleeSizeEstimateBefore;
252 const int64_t DefaultDecision;
253 const int64_t Mandatory;
254};
255
256static const std::vector<TensorSpec> TrainingOnlyFeatures{
257 TensorSpec::createSpec<float>(TFFeedPrefix + "discount", {1}),
258 TensorSpec::createSpec<float>(TFFeedPrefix + "reward", {1}),
259 TensorSpec::createSpec<int32_t>(TFFeedPrefix + "step_type", {1})};
260
261static const std::vector<TensorSpec> getInputFeatures() {
262 std::vector<TensorSpec> InputSpecs;
263 for (size_t I = 0; I < NumberOfFeatures; ++I)
264 InputSpecs.push_back(TensorSpec::createSpec<int64_t>(
265 TFFeedPrefix + FeatureMap[I].name(), FeatureMap[I].shape()));
266 append_range(InputSpecs, TrainingOnlyFeatures);
267 return InputSpecs;
268}
269
270} // namespace
271
272TrainingLogger::TrainingLogger(StringRef LogFileName,
273 const ModelUnderTrainingRunner *MUTR)
274 : LogFileName(LogFileName), MUTR(MUTR) {
275 // The first output is the inlining decision.
276 std::vector<TensorSpec> FT(FeatureMap.begin(), FeatureMap.end());
277
278 if (MUTR)
279 append_range(FT, MUTR->extraOutputsForLoggingSpecs());
280
281 DefaultDecisionPos = FT.size();
282 FT.push_back(DefaultDecisionSpec);
283
284 DecisionPos = FT.size();
285 FT.push_back(InlineDecisionSpec);
286 std::error_code EC;
287 auto OS = std::make_unique<raw_fd_ostream>(TrainingLog, EC);
288 if (EC)
289 dbgs() << (EC.message() + ":" + TrainingLog);
290
291 L = std::make_unique<Logger>(
292 std::move(OS), FT, TensorSpec::createSpec<int64_t>(RewardName, {1}),
293 InlineSizeEstimatorAnalysis::isEvaluatorRequested());
294 L->switchContext("");
295}
296
297/// Log one inlining event.
298void TrainingLogger::logInlineEvent(const InlineEvent &Event,
299 const MLModelRunner &ModelRunner) {
300 L->startObservation();
301 size_t CurrentFeature = 0;
302 for (; CurrentFeature < NumberOfFeatures; ++CurrentFeature)
303 L->logTensorValue(CurrentFeature,
304 reinterpret_cast<const char *>(
305 ModelRunner.getTensorUntyped(CurrentFeature)));
306
307 if (MUTR)
308 for (size_t I = 0; I < MUTR->extraOutputsForLoggingSpecs().size(); ++I) {
309 const char *RawData =
310 reinterpret_cast<const char *>(MUTR->getUntypedExtraOutputValue(I));
311 L->logTensorValue(CurrentFeature, RawData);
312 ++CurrentFeature;
313 }
314
315 assert(CurrentFeature == DefaultDecisionPos);
316 L->logTensorValue(DefaultDecisionPos,
317 reinterpret_cast<const char *>(&Event.DefaultDecision));
318 L->logTensorValue(DecisionPos,
319 reinterpret_cast<const char *>(&Event.AdvisedDecision));
320 L->endObservation();
321 if (InlineSizeEstimatorAnalysis::isEvaluatorRequested())
322 L->logReward(Event.Reward);
323
324 // For debugging / later use
325 Effects.push_back(Event.Effect);
326}
327
328DevelopmentModeMLInlineAdvisor::DevelopmentModeMLInlineAdvisor(
330 std::unique_ptr<MLModelRunner> ModelRunner,
331 std::function<bool(CallBase &)> GetDefaultAdvice,
332 std::unique_ptr<TrainingLogger> Logger)
333 : MLInlineAdvisor(M, MAM, std::move(ModelRunner), GetDefaultAdvice),
334 IsDoingInference(isa<ModelUnderTrainingRunner>(getModelRunner())),
336 InitialNativeSize(isLogging() ? getTotalSizeEstimate() : 0),
337 CurrentNativeSize(InitialNativeSize) {
338 // We cannot have the case of neither inference nor logging.
339 assert(IsDoingInference || isLogging());
340}
341
342std::optional<size_t>
343DevelopmentModeMLInlineAdvisor::getNativeSizeEstimate(const Function &F) const {
344 if (!InlineSizeEstimatorAnalysis::isEvaluatorRequested())
345 return std::nullopt;
346 auto &R =
347 FAM.getResult<InlineSizeEstimatorAnalysis>(const_cast<Function &>(F));
348 if (!R) {
349 F.getParent()->getContext().emitError(
350 "Native size estimator is not present.");
351 return 0;
352 }
353 return *R;
354}
355
356std::unique_ptr<MLInlineAdvice>
357DevelopmentModeMLInlineAdvisor::getMandatoryAdviceImpl(CallBase &CB) {
358 return std::make_unique<LoggingMLInlineAdvice>(
359 /*Advisor=*/this,
360 /*CB=*/CB, /*ORE=*/getCallerORE(CB), /*Recommendation=*/true,
361 /*Logger=*/*Logger,
362 /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()),
363 /*CalleeSizeEstimateBefore=*/
364 getNativeSizeEstimate(*CB.getCalledFunction()),
365 /*DefaultDecision=*/true, /*Mandatory*/ true);
366}
367
368std::unique_ptr<MLInlineAdvice>
369DevelopmentModeMLInlineAdvisor::getAdviceFromModel(
371 if (IsDoingInference && !isLogging())
372 return MLInlineAdvisor::getAdviceFromModel(CB, ORE);
373
374 bool DefaultAdvice = GetDefaultAdvice(CB);
375 auto Recommendation =
376 IsDoingInference ? static_cast<bool>(ModelRunner->evaluate<int64_t>())
377 : DefaultAdvice;
378 return std::make_unique<LoggingMLInlineAdvice>(
379 /*Advisor=*/this,
380 /*CB=*/CB, /*ORE=*/ORE, /*Recommendation=*/Recommendation,
381 /*Logger=*/*Logger,
382 /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()),
383 /*CalleeSizeEstimateBefore=*/
384 getNativeSizeEstimate(*CB.getCalledFunction()),
385 /*DefaultDecision=*/DefaultAdvice);
386}
387
388size_t DevelopmentModeMLInlineAdvisor::getTotalSizeEstimate() {
389 if (!InlineSizeEstimatorAnalysis::isEvaluatorRequested())
390 return 0;
391 size_t Ret = 0;
392 for (auto &F : M) {
393 if (F.isDeclaration())
394 continue;
395 Ret += *getNativeSizeEstimate(F);
396 }
397 return Ret;
398}
399
400std::unique_ptr<InlineAdvisor> llvm::getDevelopmentModeAdvisor(
402 std::function<bool(CallBase &)> GetDefaultAdvice) {
403 auto &Ctx = M.getContext();
404 std::unique_ptr<MLModelRunner> Runner;
405 if (TFModelUnderTrainingPath.empty())
406 Runner.reset(new NoInferenceModelRunner(Ctx, getInputFeatures()));
407 else
408 Runner = ModelUnderTrainingRunner::createAndEnsureValid(
409 Ctx, TFModelUnderTrainingPath, DecisionName, getInputFeatures(),
410 TFOutputSpecOverride);
411 if (!Runner)
412 return nullptr;
413 std::unique_ptr<TrainingLogger> Logger;
414 if (!TrainingLog.empty())
415 Logger = std::make_unique<TrainingLogger>(
416 TrainingLog, dyn_cast<ModelUnderTrainingRunner>(Runner.get()));
417
418 return std::make_unique<DevelopmentModeMLInlineAdvisor>(
419 M, MAM, std::move(Runner), GetDefaultAdvice, std::move(Logger));
420}
421#endif // defined(LLVM_HAVE_TFLITE)
#define Success
This file implements the BitVector class.
This file provides interfaces used to build and manipulate a call graph, which is a very useful tool ...
Module.h This file contains the declarations for the Module class.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define DecisionName
FunctionAnalysisManager FAM
ModuleAnalysisManager MAM
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1120
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1349
Function * getCaller()
Helper to get the caller (the parent function).
InlineResult is basically true or false.
Definition: InlineCost.h:179
Logging utility - given an ordered specification of features, and assuming a scalar reward,...
InlineAdvice that tracks changes post inlining.
MLModelRunner interface: abstraction of a mechanism for evaluating a ML model.
Definition: MLModelRunner.h:26
void * getTensorUntyped(size_t Index)
Definition: MLModelRunner.h:47
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
The optimization diagnostic interface.
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
void abandon()
Mark an analysis as abandoned.
Definition: Analysis.h:164
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1697
constexpr size_t NumberOfFeatures
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2115
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
const std::vector< TensorSpec > FeatureMap
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition: Casting.h:548
std::unique_ptr< InlineAdvisor > getDevelopmentModeAdvisor(Module &M, ModuleAnalysisManager &MAM, std::function< bool(CallBase &)> GetDefaultAdvice)
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1873
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:858