LLVM  16.0.0git
InlineSizeEstimatorAnalysis.cpp
Go to the documentation of this file.
1 //===- InlineSizeEstimatorAnalysis.cpp - IR to native size from ML model --===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This implements feature and label extraction for offline supervised learning
10 // of a IR to native size model.
11 //
12 //===----------------------------------------------------------------------===//
14 
15 #ifdef LLVM_HAVE_TF_API
17 #endif
18 #include "llvm/IR/Function.h"
19 #include "llvm/IR/PassManager.h"
21 
22 using namespace llvm;
23 
25 
26 #ifdef LLVM_HAVE_TF_API
27 #include "llvm/Analysis/LoopInfo.h"
30 #include "llvm/IR/BasicBlock.h"
31 #include "llvm/IR/Dominators.h"
32 #include "llvm/IR/Instructions.h"
33 #include "llvm/MC/MCAsmLayout.h"
34 #include "llvm/Support/Casting.h"
36 #include <algorithm>
37 #include <deque>
38 
39 cl::opt<std::string> TFIR2NativeModelPath(
40  "ml-inliner-ir2native-model", cl::Hidden,
41  cl::desc("Path to saved model evaluating native size from IR."));
42 
43 #define DEBUG_TYPE "inline-size-estimator"
44 namespace {
45 unsigned getMaxInstructionID() {
46 #define LAST_OTHER_INST(NR) return NR;
47 #include "llvm/IR/Instruction.def"
48 }
49 
50 class IRToNativeSizeLearning {
51 public:
52  enum class NamedFeatureIndex : size_t {
53  InitialSize,
54  Blocks,
55  Calls,
56  IsLocal,
57  IsLinkOnceODR,
58  IsLinkOnce,
59  Loops,
60  MaxLoopDepth,
61  MaxDomTreeLevel,
62 
63  NumNamedFeatures
64  };
65  static const size_t NumNamedFeatures =
66  static_cast<size_t>(NamedFeatureIndex::NumNamedFeatures);
67  struct FunctionFeatures {
68  static const size_t FeatureCount;
69 
70  std::array<int32_t, NumNamedFeatures> NamedFeatures = {0};
71  std::vector<int32_t> InstructionHistogram;
72  std::vector<int32_t> InstructionPairHistogram;
73 
74  void fillTensor(int32_t *Ptr) const;
75  int32_t &operator[](NamedFeatureIndex Pos) {
76  return NamedFeatures[static_cast<size_t>(Pos)];
77  }
78  };
79  IRToNativeSizeLearning() = default;
80 
81  static FunctionFeatures getFunctionFeatures(Function &F,
83 };
84 
85 // This is a point in time - we determined including these pairs of
86 // consecutive instructions (in the IR layout available at inline time) as
87 // features improves the model performance. We want to move away from manual
88 // feature selection.
89 // The array is given in opcode pairs rather than labels because 1) labels
90 // weren't readily available, and 2) the successions were hand - extracted.
91 //
92 // This array must be sorted.
93 static const std::array<std::pair<size_t, size_t>, 137>
94  ImportantInstructionSuccessions{
95  {{1, 1}, {1, 4}, {1, 5}, {1, 7}, {1, 8}, {1, 9}, {1, 11},
96  {1, 12}, {1, 13}, {1, 14}, {1, 18}, {1, 20}, {1, 22}, {1, 24},
97  {1, 25}, {1, 26}, {1, 27}, {1, 28}, {1, 29}, {1, 30}, {1, 31},
98  {1, 32}, {1, 33}, {1, 34}, {1, 39}, {1, 40}, {1, 42}, {1, 45},
99  {2, 1}, {2, 2}, {2, 13}, {2, 28}, {2, 29}, {2, 32}, {2, 33},
100  {2, 34}, {2, 38}, {2, 48}, {2, 49}, {2, 53}, {2, 55}, {2, 56},
101  {13, 2}, {13, 13}, {13, 26}, {13, 33}, {13, 34}, {13, 56}, {15, 27},
102  {28, 2}, {28, 48}, {28, 53}, {29, 2}, {29, 33}, {29, 56}, {31, 31},
103  {31, 33}, {31, 34}, {31, 49}, {32, 1}, {32, 2}, {32, 13}, {32, 15},
104  {32, 28}, {32, 29}, {32, 32}, {32, 33}, {32, 34}, {32, 39}, {32, 40},
105  {32, 48}, {32, 49}, {32, 53}, {32, 56}, {33, 1}, {33, 2}, {33, 32},
106  {33, 33}, {33, 34}, {33, 49}, {33, 53}, {33, 56}, {34, 1}, {34, 2},
107  {34, 32}, {34, 33}, {34, 34}, {34, 49}, {34, 53}, {34, 56}, {38, 34},
108  {39, 57}, {40, 34}, {47, 15}, {47, 49}, {48, 2}, {48, 34}, {48, 56},
109  {49, 1}, {49, 2}, {49, 28}, {49, 32}, {49, 33}, {49, 34}, {49, 39},
110  {49, 49}, {49, 56}, {53, 1}, {53, 2}, {53, 28}, {53, 34}, {53, 53},
111  {53, 57}, {55, 1}, {55, 28}, {55, 34}, {55, 53}, {55, 55}, {55, 56},
112  {56, 1}, {56, 2}, {56, 7}, {56, 13}, {56, 32}, {56, 33}, {56, 34},
113  {56, 49}, {56, 53}, {56, 56}, {56, 64}, {57, 34}, {57, 56}, {57, 57},
114  {64, 1}, {64, 64}, {65, 1}, {65, 65}}};
115 
116 // We have: 9 calculated features (the features here); 1 feature for each
117 // instruction opcode; and 1 feature for each manually-identified sequence.
118 // For the latter 2, we build a histogram: we count the number of
119 // occurrences of each instruction opcode or succession of instructions,
120 // respectively.
121 // Note that instruction opcodes start from 1. For convenience, we also have an
122 // always 0 feature for the '0' opcode, hence the extra 1.
124  ImportantInstructionSuccessions.size() + getMaxInstructionID() + 1 +
125  IRToNativeSizeLearning::NumNamedFeatures;
126 
127 size_t getSize(Function &F, TargetTransformInfo &TTI) {
128  size_t Ret = 0;
129  for (const auto &BB : F)
130  for (const auto &I : BB)
132  &I, TargetTransformInfo::TargetCostKind::TCK_CodeSize).getValue());
133  return Ret;
134 }
135 
136 size_t getSize(Function &F, FunctionAnalysisManager &FAM) {
137  auto &TTI = FAM.getResult<TargetIRAnalysis>(F);
138  return getSize(F, TTI);
139 }
140 
141 unsigned getMaxDominatorTreeDepth(const Function &F,
142  const DominatorTree &Tree) {
143  unsigned Ret = 0;
144  for (const auto &BB : F)
145  if (const auto *TN = Tree.getNode(&BB))
146  Ret = std::max(Ret, TN->getLevel());
147  return Ret;
148 }
149 } // namespace
150 
151 IRToNativeSizeLearning::FunctionFeatures
152 IRToNativeSizeLearning::getFunctionFeatures(Function &F,
154  assert(llvm::is_sorted(ImportantInstructionSuccessions) &&
155  "expected function features are sorted");
156 
157  auto &DomTree = FAM.getResult<DominatorTreeAnalysis>(F);
158  FunctionFeatures FF;
159  size_t InstrCount = getMaxInstructionID() + 1;
160  FF.InstructionHistogram.resize(InstrCount);
161 
162  FF.InstructionPairHistogram.resize(ImportantInstructionSuccessions.size());
163 
164  int StartID = 0;
165  int LastID = StartID;
166  auto getPairIndex = [](size_t a, size_t b) {
167  auto I = llvm::find(ImportantInstructionSuccessions, std::make_pair(a, b));
168  if (I == ImportantInstructionSuccessions.end())
169  return -1;
170  return static_cast<int>(
171  std::distance(ImportantInstructionSuccessions.begin(), I));
172  };
173 
174  // We don't want debug calls, because they'd just add noise.
175  for (const auto &BB : F) {
176  for (const auto &I : BB.instructionsWithoutDebug()) {
177  auto ID = I.getOpcode();
178 
179  ++FF.InstructionHistogram[ID];
180  int PairIndex = getPairIndex(LastID, ID);
181  if (PairIndex >= 0)
182  ++FF.InstructionPairHistogram[PairIndex];
183  LastID = ID;
184  if (isa<CallBase>(I))
185  ++FF[NamedFeatureIndex::Calls];
186  }
187  }
188 
189  FF[NamedFeatureIndex::InitialSize] = getSize(F, FAM);
190  FF[NamedFeatureIndex::IsLocal] = F.hasLocalLinkage();
191  FF[NamedFeatureIndex::IsLinkOnceODR] = F.hasLinkOnceODRLinkage();
192  FF[NamedFeatureIndex::IsLinkOnce] = F.hasLinkOnceLinkage();
193  FF[NamedFeatureIndex::Blocks] =
194  std::distance(F.getBasicBlockList().begin(), F.getBasicBlockList().end());
195  auto &LI = FAM.getResult<LoopAnalysis>(F);
196  FF[NamedFeatureIndex::Loops] = std::distance(LI.begin(), LI.end());
197  for (auto &L : LI)
198  FF[NamedFeatureIndex::MaxLoopDepth] =
199  std::max(FF[NamedFeatureIndex::MaxLoopDepth],
200  static_cast<int32_t>(L->getLoopDepth()));
201  FF[NamedFeatureIndex::MaxDomTreeLevel] = getMaxDominatorTreeDepth(F, DomTree);
202  return FF;
203 }
204 
205 void IRToNativeSizeLearning::FunctionFeatures::fillTensor(int32_t *Ptr) const {
206  std::copy(NamedFeatures.begin(), NamedFeatures.end(), Ptr);
207  Ptr += NamedFeatures.size();
208  std::copy(InstructionHistogram.begin(), InstructionHistogram.end(), Ptr);
209  Ptr += InstructionHistogram.size();
210  std::copy(InstructionPairHistogram.begin(), InstructionPairHistogram.end(),
211  Ptr);
212 }
213 
215  return !TFIR2NativeModelPath.empty();
216 }
217 
219  if (!isEvaluatorRequested()) {
220  return;
221  }
222  std::vector<TensorSpec> InputSpecs{TensorSpec::createSpec<int32_t>(
223  "serving_default_input_1",
224  {1, static_cast<int64_t>(
226  std::vector<TensorSpec> OutputSpecs{
227  TensorSpec::createSpec<float>("StatefulPartitionedCall", {1})};
228  Evaluator = std::make_unique<TFModelEvaluator>(
229  TFIR2NativeModelPath.getValue().c_str(), InputSpecs, OutputSpecs);
230  if (!Evaluator || !Evaluator->isValid()) {
231  Evaluator.reset();
232  return;
233  }
234 }
235 
239  if (!Evaluator)
240  return None;
241  auto Features = IRToNativeSizeLearning::getFunctionFeatures(
242  const_cast<Function &>(F), FAM);
243  int32_t *V = Evaluator->getInput<int32_t>(0);
244  Features.fillTensor(V);
245  auto ER = Evaluator->evaluate();
246  if (!ER)
247  return None;
248  float Ret = *ER->getTensorValue<float>(0);
249  if (Ret < 0.0)
250  Ret = 0.0;
251  return static_cast<size_t>(Ret);
252 }
253 
258 
259 #else
260 namespace llvm {
262 } // namespace llvm
263 InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis() = default;
264 InlineSizeEstimatorAnalysis ::InlineSizeEstimatorAnalysis(
270  return std::nullopt;
271 }
273 #endif
274 
278  OS << "[InlineSizeEstimatorAnalysis] size estimate for " << F.getName()
279  << ": " << AM.getResult<InlineSizeEstimatorAnalysis>(F) << "\n";
280  return PreservedAnalyses::all();
281 }
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
TFUtils.h
llvm::TargetIRAnalysis
Analysis pass providing the TargetTransformInfo.
Definition: TargetTransformInfo.h:2592
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::InlineSizeEstimatorAnalysis::isEvaluatorRequested
static bool isEvaluatorRequested()
Definition: InlineSizeEstimatorAnalysis.cpp:272
llvm::TargetTransformInfo::getInstructionCost
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
Definition: TargetTransformInfo.cpp:225
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:774
llvm::TFModelEvaluator
Definition: InlineSizeEstimatorAnalysis.cpp:261
llvm::Function
Definition: Function.h:60
Loops
Hexagon Hardware Loops
Definition: HexagonHardwareLoops.cpp:374
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:173
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
FAM
FunctionAnalysisManager FAM
Definition: PassBuilderBindings.cpp:59
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:139
InstrCount
static unsigned InstrCount
Definition: DFAPacketizer.cpp:53
llvm::DominatorTreeBase::getNode
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Definition: GenericDomTree.h:351
llvm::Optional
Definition: APInt.h:33
llvm::max
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:337
llvm::MipsISD::Ret
@ Ret
Definition: MipsISelLowering.h:119
F
#define F(x, y, z)
Definition: MD5.cpp:55
a
=0.0 ? 0.0 :(a > 0.0 ? 1.0 :-1.0) a
Definition: README.txt:489
CommandLine.h
llvm::InlineSizeEstimatorAnalysisPrinterPass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Definition: InlineSizeEstimatorAnalysis.cpp:276
b
the resulting code requires compare and branches when and if the revised code is with conditional branches instead of More there is a byte word extend before each where there should be only and the condition codes are not remembered when the same two values are compared twice More LSR enhancements i8 and i32 load store addressing modes are identical int b
Definition: README.txt:418
TargetLibraryInfo.h
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
LoopInfo.h
BasicBlock.h
llvm::cl::opt
Definition: CommandLine.h:1411
InlineSizeEstimatorAnalysis.h
llvm::InlineSizeEstimatorAnalysis
Definition: InlineSizeEstimatorAnalysis.h:19
llvm::find
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1754
llvm::AnalysisKey
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: PassManager.h:69
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::Evaluator
This class evaluates LLVM IR, producing the Constant representing each SSA instruction.
Definition: Evaluator.h:37
llvm::InlineSizeEstimatorAnalysis::~InlineSizeEstimatorAnalysis
~InlineSizeEstimatorAnalysis()
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::move
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1861
Ptr
@ Ptr
Definition: TargetLibraryInfo.cpp:60
llvm::InstructionCost::getValue
std::optional< CostType > getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
Definition: InstructionCost.h:88
MCAsmLayout.h
llvm::InlineSizeEstimatorAnalysis::Key
static AnalysisKey Key
Definition: InlineSizeEstimatorAnalysis.h:26
std
Definition: BitVector.h:851
llvm::None
constexpr std::nullopt_t None
Definition: None.h:27
llvm::InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis
InlineSizeEstimatorAnalysis()
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
Casting.h
Function.h
PassManager.h
llvm::is_sorted
bool is_sorted(R &&Range, Compare C)
Wrapper function around std::is_sorted to check if elements in a range R are sorted with respect to a...
Definition: STLExtras.h:1883
llvm::DominatorTreeAnalysis
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:271
Instructions.h
Dominators.h
llvm::FeatureCount
@ FeatureCount
Definition: MLRegallocPriorityAdvisor.cpp:74
TargetTransformInfo.h
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:42
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::cl::desc
Definition: CommandLine.h:412
raw_ostream.h
copy
we should consider alternate ways to model stack dependencies Lots of things could be done in WebAssemblyTargetTransformInfo cpp there are numerous optimization related hooks that can be overridden in WebAssemblyTargetLowering Instead of the OptimizeReturned which should consider preserving the returned attribute through to MachineInstrs and extending the MemIntrinsicResults pass to do this optimization on calls too That would also let the WebAssemblyPeephole pass clean up dead defs for such as it does for stores Consider implementing and or getMachineCombinerPatterns Find a clean way to fix the problem which leads to the Shrink Wrapping pass being run after the WebAssembly PEI pass When setting multiple variables to the same we currently get code like const It could be done with a smaller encoding like local tee $pop5 local copy
Definition: README.txt:101
llvm::LoopAnalysis
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:1268
Other
Optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1251
llvm::InlineSizeEstimatorAnalysis::run
Result run(const Function &F, FunctionAnalysisManager &FAM)
Definition: InlineSizeEstimatorAnalysis.cpp:268