LLVM  10.0.0svn
MachineTraceMetrics.h
Go to the documentation of this file.
1 //===- lib/CodeGen/MachineTraceMetrics.h - Super-scalar metrics -*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interface for the MachineTraceMetrics analysis pass
10 // that estimates CPU resource usage and critical data dependency paths through
11 // preferred traces. This is useful for super-scalar CPUs where execution speed
12 // can be limited both by data dependencies and by limited execution resources.
13 //
14 // Out-of-order CPUs will often be executing instructions from multiple basic
15 // blocks at the same time. This makes it difficult to estimate the resource
16 // usage accurately in a single basic block. Resources can be estimated better
17 // by looking at a trace through the current basic block.
18 //
19 // For every block, the MachineTraceMetrics pass will pick a preferred trace
20 // that passes through the block. The trace is chosen based on loop structure,
21 // branch probabilities, and resource usage. The intention is to pick likely
22 // traces that would be the most affected by code transformations.
23 //
24 // It is expensive to compute a full arbitrary trace for every block, so to
25 // save some computations, traces are chosen to be convergent. This means that
26 // if the traces through basic blocks A and B ever cross when moving away from
27 // A and B, they never diverge again. This applies in both directions - If the
28 // traces meet above A and B, they won't diverge when going further back.
29 //
30 // Traces tend to align with loops. The trace through a block in an inner loop
31 // will begin at the loop entry block and end at a back edge. If there are
32 // nested loops, the trace may begin and end at those instead.
33 //
34 // For each trace, we compute the critical path length, which is the number of
35 // cycles required to execute the trace when execution is limited by data
36 // dependencies only. We also compute the resource height, which is the number
37 // of cycles required to execute all instructions in the trace when ignoring
38 // data dependencies.
39 //
40 // Every instruction in the current block has a slack - the number of cycles
41 // execution of the instruction can be delayed without extending the critical
42 // path.
43 //
44 //===----------------------------------------------------------------------===//
45 
46 #ifndef LLVM_CODEGEN_MACHINETRACEMETRICS_H
47 #define LLVM_CODEGEN_MACHINETRACEMETRICS_H
48 
49 #include "llvm/ADT/SparseSet.h"
50 #include "llvm/ADT/ArrayRef.h"
51 #include "llvm/ADT/DenseMap.h"
52 #include "llvm/ADT/None.h"
53 #include "llvm/ADT/SmallVector.h"
57 
58 namespace llvm {
59 
60 class AnalysisUsage;
61 class MachineFunction;
62 class MachineInstr;
63 class MachineLoop;
64 class MachineLoopInfo;
65 class MachineRegisterInfo;
66 struct MCSchedClassDesc;
67 class raw_ostream;
68 class TargetInstrInfo;
69 class TargetRegisterInfo;
70 
71 // Keep track of physreg data dependencies by recording each live register unit.
72 // Associate each regunit with an instruction operand. Depending on the
73 // direction instructions are scanned, it could be the operand that defined the
74 // regunit, or the highest operand to read the regunit.
75 struct LiveRegUnit {
76  unsigned RegUnit;
77  unsigned Cycle = 0;
78  const MachineInstr *MI = nullptr;
79  unsigned Op = 0;
80 
81  unsigned getSparseSetIndex() const { return RegUnit; }
82 
83  LiveRegUnit(unsigned RU) : RegUnit(RU) {}
84 };
85 
86 
88  const MachineFunction *MF = nullptr;
89  const TargetInstrInfo *TII = nullptr;
90  const TargetRegisterInfo *TRI = nullptr;
91  const MachineRegisterInfo *MRI = nullptr;
92  const MachineLoopInfo *Loops = nullptr;
93  TargetSchedModel SchedModel;
94 
95 public:
96  friend class Ensemble;
97  friend class Trace;
98 
99  class Ensemble;
100 
101  static char ID;
102 
104 
105  void getAnalysisUsage(AnalysisUsage&) const override;
106  bool runOnMachineFunction(MachineFunction&) override;
107  void releaseMemory() override;
108  void verifyAnalysis() const override;
109 
110  /// Per-basic block information that doesn't depend on the trace through the
111  /// block.
112  struct FixedBlockInfo {
113  /// The number of non-trivial instructions in the block.
114  /// Doesn't count PHI and COPY instructions that are likely to be removed.
115  unsigned InstrCount = ~0u;
116 
117  /// True when the block contains calls.
118  bool HasCalls = false;
119 
120  FixedBlockInfo() = default;
121 
122  /// Returns true when resource information for this block has been computed.
123  bool hasResources() const { return InstrCount != ~0u; }
124 
125  /// Invalidate resource information.
126  void invalidate() { InstrCount = ~0u; }
127  };
128 
129  /// Get the fixed resource information about MBB. Compute it on demand.
130  const FixedBlockInfo *getResources(const MachineBasicBlock*);
131 
132  /// Get the scaled number of cycles used per processor resource in MBB.
133  /// This is an array with SchedModel.getNumProcResourceKinds() entries.
134  /// The getResources() function above must have been called first.
135  ///
136  /// These numbers have already been scaled by SchedModel.getResourceFactor().
137  ArrayRef<unsigned> getProcResourceCycles(unsigned MBBNum) const;
138 
139  /// A virtual register or regunit required by a basic block or its trace
140  /// successors.
141  struct LiveInReg {
142  /// The virtual register required, or a register unit.
143  unsigned Reg;
144 
145  /// For virtual registers: Minimum height of the defining instruction.
146  /// For regunits: Height of the highest user in the trace.
147  unsigned Height;
148 
149  LiveInReg(unsigned Reg, unsigned Height = 0) : Reg(Reg), Height(Height) {}
150  };
151 
152  /// Per-basic block information that relates to a specific trace through the
153  /// block. Convergent traces means that only one of these is required per
154  /// block in a trace ensemble.
155  struct TraceBlockInfo {
156  /// Trace predecessor, or NULL for the first block in the trace.
157  /// Valid when hasValidDepth().
158  const MachineBasicBlock *Pred = nullptr;
159 
160  /// Trace successor, or NULL for the last block in the trace.
161  /// Valid when hasValidHeight().
162  const MachineBasicBlock *Succ = nullptr;
163 
164  /// The block number of the head of the trace. (When hasValidDepth()).
165  unsigned Head;
166 
167  /// The block number of the tail of the trace. (When hasValidHeight()).
168  unsigned Tail;
169 
170  /// Accumulated number of instructions in the trace above this block.
171  /// Does not include instructions in this block.
172  unsigned InstrDepth = ~0u;
173 
174  /// Accumulated number of instructions in the trace below this block.
175  /// Includes instructions in this block.
176  unsigned InstrHeight = ~0u;
177 
178  TraceBlockInfo() = default;
179 
180  /// Returns true if the depth resources have been computed from the trace
181  /// above this block.
182  bool hasValidDepth() const { return InstrDepth != ~0u; }
183 
184  /// Returns true if the height resources have been computed from the trace
185  /// below this block.
186  bool hasValidHeight() const { return InstrHeight != ~0u; }
187 
188  /// Invalidate depth resources when some block above this one has changed.
189  void invalidateDepth() { InstrDepth = ~0u; HasValidInstrDepths = false; }
190 
191  /// Invalidate height resources when a block below this one has changed.
192  void invalidateHeight() { InstrHeight = ~0u; HasValidInstrHeights = false; }
193 
194  /// Assuming that this is a dominator of TBI, determine if it contains
195  /// useful instruction depths. A dominating block can be above the current
196  /// trace head, and any dependencies from such a far away dominator are not
197  /// expected to affect the critical path.
198  ///
199  /// Also returns true when TBI == this.
200  bool isUsefulDominator(const TraceBlockInfo &TBI) const {
201  // The trace for TBI may not even be calculated yet.
202  if (!hasValidDepth() || !TBI.hasValidDepth())
203  return false;
204  // Instruction depths are only comparable if the traces share a head.
205  if (Head != TBI.Head)
206  return false;
207  // It is almost always the case that TBI belongs to the same trace as
208  // this block, but rare convoluted cases involving irreducible control
209  // flow, a dominator may share a trace head without actually being on the
210  // same trace as TBI. This is not a big problem as long as it doesn't
211  // increase the instruction depth.
212  return HasValidInstrDepths && InstrDepth <= TBI.InstrDepth;
213  }
214 
215  // Data-dependency-related information. Per-instruction depth and height
216  // are computed from data dependencies in the current trace, using
217  // itinerary data.
218 
219  /// Instruction depths have been computed. This implies hasValidDepth().
220  bool HasValidInstrDepths = false;
221 
222  /// Instruction heights have been computed. This implies hasValidHeight().
223  bool HasValidInstrHeights = false;
224 
225  /// Critical path length. This is the number of cycles in the longest data
226  /// dependency chain through the trace. This is only valid when both
227  /// HasValidInstrDepths and HasValidInstrHeights are set.
228  unsigned CriticalPath;
229 
230  /// Live-in registers. These registers are defined above the current block
231  /// and used by this block or a block below it.
232  /// This does not include PHI uses in the current block, but it does
233  /// include PHI uses in deeper blocks.
235 
236  void print(raw_ostream&) const;
237  };
238 
239  /// InstrCycles represents the cycle height and depth of an instruction in a
240  /// trace.
241  struct InstrCycles {
242  /// Earliest issue cycle as determined by data dependencies and instruction
243  /// latencies from the beginning of the trace. Data dependencies from
244  /// before the trace are not included.
245  unsigned Depth;
246 
247  /// Minimum number of cycles from this instruction is issued to the of the
248  /// trace, as determined by data dependencies and instruction latencies.
249  unsigned Height;
250  };
251 
252  /// A trace represents a plausible sequence of executed basic blocks that
253  /// passes through the current basic block one. The Trace class serves as a
254  /// handle to internal cached data structures.
255  class Trace {
256  Ensemble &TE;
257  TraceBlockInfo &TBI;
258 
259  unsigned getBlockNum() const { return &TBI - &TE.BlockInfo[0]; }
260 
261  public:
262  explicit Trace(Ensemble &te, TraceBlockInfo &tbi) : TE(te), TBI(tbi) {}
263 
264  void print(raw_ostream&) const;
265 
266  /// Compute the total number of instructions in the trace.
267  unsigned getInstrCount() const {
268  return TBI.InstrDepth + TBI.InstrHeight;
269  }
270 
271  /// Return the resource depth of the top/bottom of the trace center block.
272  /// This is the number of cycles required to execute all instructions from
273  /// the trace head to the trace center block. The resource depth only
274  /// considers execution resources, it ignores data dependencies.
275  /// When Bottom is set, instructions in the trace center block are included.
276  unsigned getResourceDepth(bool Bottom) const;
277 
278  /// Return the resource length of the trace. This is the number of cycles
279  /// required to execute the instructions in the trace if they were all
280  /// independent, exposing the maximum instruction-level parallelism.
281  ///
282  /// Any blocks in Extrablocks are included as if they were part of the
283  /// trace. Likewise, extra resources required by the specified scheduling
284  /// classes are included. For the caller to account for extra machine
285  /// instructions, it must first resolve each instruction's scheduling class.
286  unsigned getResourceLength(
289  ArrayRef<const MCSchedClassDesc *> RemoveInstrs = None) const;
290 
291  /// Return the length of the (data dependency) critical path through the
292  /// trace.
293  unsigned getCriticalPath() const { return TBI.CriticalPath; }
294 
295  /// Return the depth and height of MI. The depth is only valid for
296  /// instructions in or above the trace center block. The height is only
297  /// valid for instructions in or below the trace center block.
299  return TE.Cycles.lookup(&MI);
300  }
301 
302  /// Return the slack of MI. This is the number of cycles MI can be delayed
303  /// before the critical path becomes longer.
304  /// MI must be an instruction in the trace center block.
305  unsigned getInstrSlack(const MachineInstr &MI) const;
306 
307  /// Return the Depth of a PHI instruction in a trace center block successor.
308  /// The PHI does not have to be part of the trace.
309  unsigned getPHIDepth(const MachineInstr &PHI) const;
310 
311  /// A dependence is useful if the basic block of the defining instruction
312  /// is part of the trace of the user instruction. It is assumed that DefMI
313  /// dominates UseMI (see also isUsefulDominator).
314  bool isDepInTrace(const MachineInstr &DefMI,
315  const MachineInstr &UseMI) const;
316  };
317 
318  /// A trace ensemble is a collection of traces selected using the same
319  /// strategy, for example 'minimum resource height'. There is one trace for
320  /// every block in the function.
321  class Ensemble {
322  friend class Trace;
323 
326  SmallVector<unsigned, 0> ProcResourceDepths;
327  SmallVector<unsigned, 0> ProcResourceHeights;
328 
329  void computeTrace(const MachineBasicBlock*);
330  void computeDepthResources(const MachineBasicBlock*);
331  void computeHeightResources(const MachineBasicBlock*);
332  unsigned computeCrossBlockCriticalPath(const TraceBlockInfo&);
333  void computeInstrDepths(const MachineBasicBlock*);
334  void computeInstrHeights(const MachineBasicBlock*);
335  void addLiveIns(const MachineInstr *DefMI, unsigned DefOp,
337 
338  protected:
340 
341  explicit Ensemble(MachineTraceMetrics*);
342 
343  virtual const MachineBasicBlock *pickTracePred(const MachineBasicBlock*) =0;
344  virtual const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock*) =0;
345  const MachineLoop *getLoopFor(const MachineBasicBlock*) const;
346  const TraceBlockInfo *getDepthResources(const MachineBasicBlock*) const;
347  const TraceBlockInfo *getHeightResources(const MachineBasicBlock*) const;
348  ArrayRef<unsigned> getProcResourceDepths(unsigned MBBNum) const;
349  ArrayRef<unsigned> getProcResourceHeights(unsigned MBBNum) const;
350 
351  public:
352  virtual ~Ensemble();
353 
354  virtual const char *getName() const = 0;
355  void print(raw_ostream&) const;
356  void invalidate(const MachineBasicBlock *MBB);
357  void verify() const;
358 
359  /// Get the trace that passes through MBB.
360  /// The trace is computed on demand.
361  Trace getTrace(const MachineBasicBlock *MBB);
362 
363  /// Updates the depth of an machine instruction, given RegUnits.
364  void updateDepth(TraceBlockInfo &TBI, const MachineInstr&,
365  SparseSet<LiveRegUnit> &RegUnits);
366  void updateDepth(const MachineBasicBlock *, const MachineInstr&,
367  SparseSet<LiveRegUnit> &RegUnits);
368 
369  /// Updates the depth of the instructions from Start to End.
370  void updateDepths(MachineBasicBlock::iterator Start,
372  SparseSet<LiveRegUnit> &RegUnits);
373 
374  };
375 
376  /// Strategies for selecting traces.
377  enum Strategy {
378  /// Select the trace through a block that has the fewest instructions.
380 
381  TS_NumStrategies
382  };
383 
384  /// Get the trace ensemble representing the given trace selection strategy.
385  /// The returned Ensemble object is owned by the MachineTraceMetrics analysis,
386  /// and valid for the lifetime of the analysis pass.
387  Ensemble *getEnsemble(Strategy);
388 
389  /// Invalidate cached information about MBB. This must be called *before* MBB
390  /// is erased, or the CFG is otherwise changed.
391  ///
392  /// This invalidates per-block information about resource usage for MBB only,
393  /// and it invalidates per-trace information for any trace that passes
394  /// through MBB.
395  ///
396  /// Call Ensemble::getTrace() again to update any trace handles.
397  void invalidate(const MachineBasicBlock *MBB);
398 
399 private:
400  // One entry per basic block, indexed by block number.
402 
403  // Cycles consumed on each processor resource per block.
404  // The number of processor resource kinds is constant for a given subtarget,
405  // but it is not known at compile time. The number of cycles consumed by
406  // block B on processor resource R is at ProcResourceCycles[B*Kinds + R]
407  // where Kinds = SchedModel.getNumProcResourceKinds().
408  SmallVector<unsigned, 0> ProcResourceCycles;
409 
410  // One ensemble per strategy.
411  Ensemble* Ensembles[TS_NumStrategies];
412 
413  // Convert scaled resource usage to a cycle count that can be compared with
414  // latencies.
415  unsigned getCycles(unsigned Scaled) {
416  unsigned Factor = SchedModel.getLatencyFactor();
417  return (Scaled + Factor - 1) / Factor;
418  }
419 };
420 
422  const MachineTraceMetrics::Trace &Tr) {
423  Tr.print(OS);
424  return OS;
425 }
426 
428  const MachineTraceMetrics::Ensemble &En) {
429  En.print(OS);
430  return OS;
431 }
432 
433 } // end namespace llvm
434 
435 #endif // LLVM_CODEGEN_MACHINETRACEMETRICS_H
This class represents lattice values for constants.
Definition: AllocatorList.h:23
unsigned Depth
Earliest issue cycle as determined by data dependencies and instruction latencies from the beginning ...
void invalidateHeight()
Invalidate height resources when a block below this one has changed.
unsigned const TargetRegisterInfo * TRI
A trace ensemble is a collection of traces selected using the same strategy, for example &#39;minimum res...
static unsigned InstrCount
Trace(Ensemble &te, TraceBlockInfo &tbi)
Hexagon Hardware Loops
bool hasResources() const
Returns true when resource information for this block has been computed.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Provide an instruction scheduling machine model to CodeGen passes.
const HexagonInstrInfo * TII
Strategy
Strategies for selecting traces.
const MachineInstr * MI
static StringRef getName(Value *V)
LiveInReg(unsigned Reg, unsigned Height=0)
bool hasValidHeight() const
Returns true if the height resources have been computed from the trace below this block...
Select the trace through a block that has the fewest instructions.
bool hasValidDepth() const
Returns true if the depth resources have been computed from the trace above this block.
void addLiveIns(MachineBasicBlock &MBB, const LivePhysRegs &LiveRegs)
Adds registers contained in LiveRegs to the block live-in list of MBB.
unsigned Height
For virtual registers: Minimum height of the defining instruction.
LiveRegUnit(unsigned RU)
TargetInstrInfo - Interface to description of machine instruction set.
void invalidateDepth()
Invalidate depth resources when some block above this one has changed.
unsigned CriticalPath
Critical path length.
unsigned const MachineRegisterInfo * MRI
void invalidate()
Invalidate resource information.
unsigned InstrHeight
Accumulated number of instructions in the trace below this block.
MachineInstrBuilder & UseMI
unsigned getCriticalPath() const
Return the length of the (data dependency) critical path through the trace.
InstrCycles getInstrCycles(const MachineInstr &MI) const
Return the depth and height of MI.
Per-basic block information that doesn&#39;t depend on the trace through the block.
unsigned getLatencyFactor() const
Multiply cycle count by this factor to normalize it relative to other resources.
Represent the analysis usage information of a pass.
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
unsigned Tail
The block number of the tail of the trace. (When hasValidHeight()).
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
A trace represents a plausible sequence of executed basic blocks that passes through the current basi...
bool verify(const TargetRegisterInfo &TRI) const
Check that information hold by this instance make sense for the given TRI.
unsigned getInstrCount() const
Compute the total number of instructions in the trace.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
unsigned Height
Minimum number of cycles from this instruction is issued to the of the trace, as determined by data d...
MachineInstrBuilder MachineInstrBuilder & DefMI
Per-basic block information that relates to a specific trace through the block.
unsigned Head
The block number of the head of the trace. (When hasValidDepth()).
unsigned Reg
The virtual register required, or a register unit.
A virtual register or regunit required by a basic block or its trace successors.
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
Representation of each machine instruction.
Definition: MachineInstr.h:63
SparseSet - Fast set implmentation for objects that can be identified by small unsigned keys...
Definition: SparseSet.h:123
InstrCycles represents the cycle height and depth of an instruction in a trace.
raw_ostream & operator<<(raw_ostream &OS, const APInt &I)
Definition: APInt.h:2047
SmallVector< LiveInReg, 4 > LiveIns
Live-in registers.
unsigned InstrDepth
Accumulated number of instructions in the trace above this block.
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:45
bool isUsefulDominator(const TraceBlockInfo &TBI) const
Assuming that this is a dominator of TBI, determine if it contains useful instruction depths...
unsigned getSparseSetIndex() const