LLVM 20.0.0git
InlineSizeEstimatorAnalysis.cpp
Go to the documentation of this file.
1//===- InlineSizeEstimatorAnalysis.cpp - IR to native size from ML model --===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This implements feature and label extraction for offline supervised learning
10// of a IR to native size model.
11//
12//===----------------------------------------------------------------------===//
14
15#ifdef LLVM_HAVE_TFLITE
17#endif
18#include "llvm/IR/Function.h"
19#include "llvm/IR/PassManager.h"
21
22using namespace llvm;
23
25
26#ifdef LLVM_HAVE_TFLITE
30#include "llvm/IR/BasicBlock.h"
31#include "llvm/IR/Dominators.h"
35#include <algorithm>
36#include <deque>
37#include <optional>
38
39cl::opt<std::string> TFIR2NativeModelPath(
40 "ml-inliner-ir2native-model", cl::Hidden,
41 cl::desc("Path to saved model evaluating native size from IR."));
42
43#define DEBUG_TYPE "inline-size-estimator"
44namespace {
45unsigned getMaxInstructionID() {
46#define LAST_OTHER_INST(NR) return NR;
47#include "llvm/IR/Instruction.def"
48}
49
50class IRToNativeSizeLearning {
51public:
52 enum class NamedFeatureIndex : size_t {
53 InitialSize,
54 Blocks,
55 Calls,
56 IsLocal,
57 IsLinkOnceODR,
58 IsLinkOnce,
59 Loops,
60 MaxLoopDepth,
61 MaxDomTreeLevel,
62
63 NumNamedFeatures
64 };
65 static const size_t NumNamedFeatures =
66 static_cast<size_t>(NamedFeatureIndex::NumNamedFeatures);
67 struct FunctionFeatures {
68 static const size_t FeatureCount;
69
70 std::array<int32_t, NumNamedFeatures> NamedFeatures = {0};
71 std::vector<int32_t> InstructionHistogram;
72 std::vector<int32_t> InstructionPairHistogram;
73
74 void fillTensor(int32_t *Ptr) const;
75 int32_t &operator[](NamedFeatureIndex Pos) {
76 return NamedFeatures[static_cast<size_t>(Pos)];
77 }
78 };
79 IRToNativeSizeLearning() = default;
80
81 static FunctionFeatures getFunctionFeatures(Function &F,
83};
84
85// This is a point in time - we determined including these pairs of
86// consecutive instructions (in the IR layout available at inline time) as
87// features improves the model performance. We want to move away from manual
88// feature selection.
89// The array is given in opcode pairs rather than labels because 1) labels
90// weren't readily available, and 2) the successions were hand - extracted.
91//
92// This array must be sorted.
93static const std::array<std::pair<size_t, size_t>, 137>
94 ImportantInstructionSuccessions{
95 {{1, 1}, {1, 4}, {1, 5}, {1, 7}, {1, 8}, {1, 9}, {1, 11},
96 {1, 12}, {1, 13}, {1, 14}, {1, 18}, {1, 20}, {1, 22}, {1, 24},
97 {1, 25}, {1, 26}, {1, 27}, {1, 28}, {1, 29}, {1, 30}, {1, 31},
98 {1, 32}, {1, 33}, {1, 34}, {1, 39}, {1, 40}, {1, 42}, {1, 45},
99 {2, 1}, {2, 2}, {2, 13}, {2, 28}, {2, 29}, {2, 32}, {2, 33},
100 {2, 34}, {2, 38}, {2, 48}, {2, 49}, {2, 53}, {2, 55}, {2, 56},
101 {13, 2}, {13, 13}, {13, 26}, {13, 33}, {13, 34}, {13, 56}, {15, 27},
102 {28, 2}, {28, 48}, {28, 53}, {29, 2}, {29, 33}, {29, 56}, {31, 31},
103 {31, 33}, {31, 34}, {31, 49}, {32, 1}, {32, 2}, {32, 13}, {32, 15},
104 {32, 28}, {32, 29}, {32, 32}, {32, 33}, {32, 34}, {32, 39}, {32, 40},
105 {32, 48}, {32, 49}, {32, 53}, {32, 56}, {33, 1}, {33, 2}, {33, 32},
106 {33, 33}, {33, 34}, {33, 49}, {33, 53}, {33, 56}, {34, 1}, {34, 2},
107 {34, 32}, {34, 33}, {34, 34}, {34, 49}, {34, 53}, {34, 56}, {38, 34},
108 {39, 57}, {40, 34}, {47, 15}, {47, 49}, {48, 2}, {48, 34}, {48, 56},
109 {49, 1}, {49, 2}, {49, 28}, {49, 32}, {49, 33}, {49, 34}, {49, 39},
110 {49, 49}, {49, 56}, {53, 1}, {53, 2}, {53, 28}, {53, 34}, {53, 53},
111 {53, 57}, {55, 1}, {55, 28}, {55, 34}, {55, 53}, {55, 55}, {55, 56},
112 {56, 1}, {56, 2}, {56, 7}, {56, 13}, {56, 32}, {56, 33}, {56, 34},
113 {56, 49}, {56, 53}, {56, 56}, {56, 64}, {57, 34}, {57, 56}, {57, 57},
114 {64, 1}, {64, 64}, {65, 1}, {65, 65}}};
115
116// We have: 9 calculated features (the features here); 1 feature for each
117// instruction opcode; and 1 feature for each manually-identified sequence.
118// For the latter 2, we build a histogram: we count the number of
119// occurrences of each instruction opcode or succession of instructions,
120// respectively.
121// Note that instruction opcodes start from 1. For convenience, we also have an
122// always 0 feature for the '0' opcode, hence the extra 1.
123const size_t IRToNativeSizeLearning::FunctionFeatures::FeatureCount =
124 ImportantInstructionSuccessions.size() + getMaxInstructionID() + 1 +
125 IRToNativeSizeLearning::NumNamedFeatures;
126
128 size_t Ret = 0;
129 for (const auto &BB : F)
130 for (const auto &I : BB)
132 &I, TargetTransformInfo::TargetCostKind::TCK_CodeSize).getValue());
133 return Ret;
134}
135
138 return getSize(F, TTI);
139}
140
141unsigned getMaxDominatorTreeDepth(const Function &F,
142 const DominatorTree &Tree) {
143 unsigned Ret = 0;
144 for (const auto &BB : F)
145 if (const auto *TN = Tree.getNode(&BB))
146 Ret = std::max(Ret, TN->getLevel());
147 return Ret;
148}
149} // namespace
150
151IRToNativeSizeLearning::FunctionFeatures
152IRToNativeSizeLearning::getFunctionFeatures(Function &F,
154 assert(llvm::is_sorted(ImportantInstructionSuccessions) &&
155 "expected function features are sorted");
156
157 auto &DomTree = FAM.getResult<DominatorTreeAnalysis>(F);
158 FunctionFeatures FF;
159 size_t InstrCount = getMaxInstructionID() + 1;
160 FF.InstructionHistogram.resize(InstrCount);
161
162 FF.InstructionPairHistogram.resize(ImportantInstructionSuccessions.size());
163
164 int StartID = 0;
165 int LastID = StartID;
166 auto getPairIndex = [](size_t a, size_t b) {
167 auto I = llvm::find(ImportantInstructionSuccessions, std::make_pair(a, b));
168 if (I == ImportantInstructionSuccessions.end())
169 return -1;
170 return static_cast<int>(
171 std::distance(ImportantInstructionSuccessions.begin(), I));
172 };
173
174 // We don't want debug calls, because they'd just add noise.
175 for (const auto &BB : F) {
176 for (const auto &I : BB.instructionsWithoutDebug()) {
177 auto ID = I.getOpcode();
178
179 ++FF.InstructionHistogram[ID];
180 int PairIndex = getPairIndex(LastID, ID);
181 if (PairIndex >= 0)
182 ++FF.InstructionPairHistogram[PairIndex];
183 LastID = ID;
184 if (isa<CallBase>(I))
185 ++FF[NamedFeatureIndex::Calls];
186 }
187 }
188
189 FF[NamedFeatureIndex::InitialSize] = getSize(F, FAM);
190 FF[NamedFeatureIndex::IsLocal] = F.hasLocalLinkage();
191 FF[NamedFeatureIndex::IsLinkOnceODR] = F.hasLinkOnceODRLinkage();
192 FF[NamedFeatureIndex::IsLinkOnce] = F.hasLinkOnceLinkage();
193 FF[NamedFeatureIndex::Blocks] = F.size();
194 auto &LI = FAM.getResult<LoopAnalysis>(F);
195 FF[NamedFeatureIndex::Loops] = std::distance(LI.begin(), LI.end());
196 for (auto &L : LI)
197 FF[NamedFeatureIndex::MaxLoopDepth] =
198 std::max(FF[NamedFeatureIndex::MaxLoopDepth],
199 static_cast<int32_t>(L->getLoopDepth()));
200 FF[NamedFeatureIndex::MaxDomTreeLevel] = getMaxDominatorTreeDepth(F, DomTree);
201 return FF;
202}
203
204void IRToNativeSizeLearning::FunctionFeatures::fillTensor(int32_t *Ptr) const {
205 std::copy(NamedFeatures.begin(), NamedFeatures.end(), Ptr);
206 Ptr += NamedFeatures.size();
207 std::copy(InstructionHistogram.begin(), InstructionHistogram.end(), Ptr);
208 Ptr += InstructionHistogram.size();
209 std::copy(InstructionPairHistogram.begin(), InstructionPairHistogram.end(),
210 Ptr);
211}
212
214 return !TFIR2NativeModelPath.empty();
215}
216
218 if (!isEvaluatorRequested()) {
219 return;
220 }
221 std::vector<TensorSpec> InputSpecs{TensorSpec::createSpec<int32_t>(
222 "serving_default_input_1",
223 {1, static_cast<int64_t>(
224 IRToNativeSizeLearning::FunctionFeatures::FeatureCount)})};
225 std::vector<TensorSpec> OutputSpecs{
226 TensorSpec::createSpec<float>("StatefulPartitionedCall", {1})};
227 Evaluator = std::make_unique<TFModelEvaluator>(
228 TFIR2NativeModelPath.getValue().c_str(), InputSpecs, OutputSpecs);
229 if (!Evaluator || !Evaluator->isValid()) {
230 Evaluator.reset();
231 return;
232 }
233}
234
238 if (!Evaluator)
239 return std::nullopt;
240 auto Features = IRToNativeSizeLearning::getFunctionFeatures(
241 const_cast<Function &>(F), FAM);
242 int32_t *V = Evaluator->getInput<int32_t>(0);
243 Features.fillTensor(V);
244 auto ER = Evaluator->evaluate();
245 if (!ER)
246 return std::nullopt;
247 float Ret = *ER->getTensorValue<float>(0);
248 if (Ret < 0.0)
249 Ret = 0.0;
250 return static_cast<size_t>(Ret);
251}
252
257
258#else
259namespace llvm {
261} // namespace llvm
263InlineSizeEstimatorAnalysis ::InlineSizeEstimatorAnalysis(
269 return std::nullopt;
270}
272#endif
273
277 OS << "[InlineSizeEstimatorAnalysis] size estimate for " << F.getName()
278 << ": " << AM.getResult<InlineSizeEstimatorAnalysis>(F) << "\n";
279 return PreservedAnalyses::all();
280}
static unsigned InstrCount
DenseMap< Block *, BlockRelaxAux > Blocks
Definition: ELF_riscv.cpp:507
Hexagon Hardware Loops
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
FunctionAnalysisManager FAM
This header defines various interfaces for pass management in LLVM.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This pass exposes codegen information to IR-level passes.
static unsigned getSize(unsigned Kind)
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:405
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
This class evaluates LLVM IR, producing the Constant representing each SSA instruction.
Definition: Evaluator.h:37
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Result run(const Function &F, FunctionAnalysisManager &FAM)
std::optional< CostType > getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:566
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
Analysis pass providing the TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1742
bool is_sorted(R &&Range, Compare C)
Wrapper function around std::is_sorted to check if elements in a range R are sorted with respect to a...
Definition: STLExtras.h:1909
@ Other
Any other memory.
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1856
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:858
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: Analysis.h:28