LLVM  4.0.0
MachineTraceMetrics.h
Go to the documentation of this file.
1 //===- lib/CodeGen/MachineTraceMetrics.h - Super-scalar metrics -*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the interface for the MachineTraceMetrics analysis pass
11 // that estimates CPU resource usage and critical data dependency paths through
12 // preferred traces. This is useful for super-scalar CPUs where execution speed
13 // can be limited both by data dependencies and by limited execution resources.
14 //
15 // Out-of-order CPUs will often be executing instructions from multiple basic
16 // blocks at the same time. This makes it difficult to estimate the resource
17 // usage accurately in a single basic block. Resources can be estimated better
18 // by looking at a trace through the current basic block.
19 //
20 // For every block, the MachineTraceMetrics pass will pick a preferred trace
21 // that passes through the block. The trace is chosen based on loop structure,
22 // branch probabilities, and resource usage. The intention is to pick likely
23 // traces that would be the most affected by code transformations.
24 //
25 // It is expensive to compute a full arbitrary trace for every block, so to
26 // save some computations, traces are chosen to be convergent. This means that
27 // if the traces through basic blocks A and B ever cross when moving away from
28 // A and B, they never diverge again. This applies in both directions - If the
29 // traces meet above A and B, they won't diverge when going further back.
30 //
31 // Traces tend to align with loops. The trace through a block in an inner loop
32 // will begin at the loop entry block and end at a back edge. If there are
33 // nested loops, the trace may begin and end at those instead.
34 //
35 // For each trace, we compute the critical path length, which is the number of
36 // cycles required to execute the trace when execution is limited by data
37 // dependencies only. We also compute the resource height, which is the number
38 // of cycles required to execute all instructions in the trace when ignoring
39 // data dependencies.
40 //
41 // Every instruction in the current block has a slack - the number of cycles
42 // execution of the instruction can be delayed without extending the critical
43 // path.
44 //
45 //===----------------------------------------------------------------------===//
46 
47 #ifndef LLVM_CODEGEN_MACHINETRACEMETRICS_H
48 #define LLVM_CODEGEN_MACHINETRACEMETRICS_H
49 
50 #include "llvm/ADT/ArrayRef.h"
51 #include "llvm/ADT/DenseMap.h"
55 
56 namespace llvm {
57 
58 class InstrItineraryData;
59 class MachineBasicBlock;
60 class MachineInstr;
61 class MachineLoop;
62 class MachineLoopInfo;
63 class MachineRegisterInfo;
64 class TargetInstrInfo;
65 class TargetRegisterInfo;
66 class raw_ostream;
67 
69  const MachineFunction *MF;
70  const TargetInstrInfo *TII;
71  const TargetRegisterInfo *TRI;
72  const MachineRegisterInfo *MRI;
73  const MachineLoopInfo *Loops;
74  TargetSchedModel SchedModel;
75 
76 public:
77  class Ensemble;
78  class Trace;
79  static char ID;
81  void getAnalysisUsage(AnalysisUsage&) const override;
82  bool runOnMachineFunction(MachineFunction&) override;
83  void releaseMemory() override;
84  void verifyAnalysis() const override;
85 
86  friend class Ensemble;
87  friend class Trace;
88 
89  /// Per-basic block information that doesn't depend on the trace through the
90  /// block.
91  struct FixedBlockInfo {
92  /// The number of non-trivial instructions in the block.
93  /// Doesn't count PHI and COPY instructions that are likely to be removed.
94  unsigned InstrCount;
95 
96  /// True when the block contains calls.
97  bool HasCalls;
98 
100 
101  /// Returns true when resource information for this block has been computed.
102  bool hasResources() const { return InstrCount != ~0u; }
103 
104  /// Invalidate resource information.
105  void invalidate() { InstrCount = ~0u; }
106  };
107 
108  /// Get the fixed resource information about MBB. Compute it on demand.
109  const FixedBlockInfo *getResources(const MachineBasicBlock*);
110 
111  /// Get the scaled number of cycles used per processor resource in MBB.
112  /// This is an array with SchedModel.getNumProcResourceKinds() entries.
113  /// The getResources() function above must have been called first.
114  ///
115  /// These numbers have already been scaled by SchedModel.getResourceFactor().
116  ArrayRef<unsigned> getProcResourceCycles(unsigned MBBNum) const;
117 
118  /// A virtual register or regunit required by a basic block or its trace
119  /// successors.
120  struct LiveInReg {
121  /// The virtual register required, or a register unit.
122  unsigned Reg;
123 
124  /// For virtual registers: Minimum height of the defining instruction.
125  /// For regunits: Height of the highest user in the trace.
126  unsigned Height;
127 
128  LiveInReg(unsigned Reg, unsigned Height = 0) : Reg(Reg), Height(Height) {}
129  };
130 
131  /// Per-basic block information that relates to a specific trace through the
132  /// block. Convergent traces means that only one of these is required per
133  /// block in a trace ensemble.
134  struct TraceBlockInfo {
135  /// Trace predecessor, or NULL for the first block in the trace.
136  /// Valid when hasValidDepth().
138 
139  /// Trace successor, or NULL for the last block in the trace.
140  /// Valid when hasValidHeight().
142 
143  /// The block number of the head of the trace. (When hasValidDepth()).
144  unsigned Head;
145 
146  /// The block number of the tail of the trace. (When hasValidHeight()).
147  unsigned Tail;
148 
149  /// Accumulated number of instructions in the trace above this block.
150  /// Does not include instructions in this block.
151  unsigned InstrDepth;
152 
153  /// Accumulated number of instructions in the trace below this block.
154  /// Includes instructions in this block.
155  unsigned InstrHeight;
156 
158  Pred(nullptr), Succ(nullptr),
159  InstrDepth(~0u), InstrHeight(~0u),
161 
162  /// Returns true if the depth resources have been computed from the trace
163  /// above this block.
164  bool hasValidDepth() const { return InstrDepth != ~0u; }
165 
166  /// Returns true if the height resources have been computed from the trace
167  /// below this block.
168  bool hasValidHeight() const { return InstrHeight != ~0u; }
169 
170  /// Invalidate depth resources when some block above this one has changed.
171  void invalidateDepth() { InstrDepth = ~0u; HasValidInstrDepths = false; }
172 
173  /// Invalidate height resources when a block below this one has changed.
175 
176  /// Assuming that this is a dominator of TBI, determine if it contains
177  /// useful instruction depths. A dominating block can be above the current
178  /// trace head, and any dependencies from such a far away dominator are not
179  /// expected to affect the critical path.
180  ///
181  /// Also returns true when TBI == this.
182  bool isUsefulDominator(const TraceBlockInfo &TBI) const {
183  // The trace for TBI may not even be calculated yet.
184  if (!hasValidDepth() || !TBI.hasValidDepth())
185  return false;
186  // Instruction depths are only comparable if the traces share a head.
187  if (Head != TBI.Head)
188  return false;
189  // It is almost always the case that TBI belongs to the same trace as
190  // this block, but rare convoluted cases involving irreducible control
191  // flow, a dominator may share a trace head without actually being on the
192  // same trace as TBI. This is not a big problem as long as it doesn't
193  // increase the instruction depth.
194  return HasValidInstrDepths && InstrDepth <= TBI.InstrDepth;
195  }
196 
197  // Data-dependency-related information. Per-instruction depth and height
198  // are computed from data dependencies in the current trace, using
199  // itinerary data.
200 
201  /// Instruction depths have been computed. This implies hasValidDepth().
203 
204  /// Instruction heights have been computed. This implies hasValidHeight().
206 
207  /// Critical path length. This is the number of cycles in the longest data
208  /// dependency chain through the trace. This is only valid when both
209  /// HasValidInstrDepths and HasValidInstrHeights are set.
210  unsigned CriticalPath;
211 
212  /// Live-in registers. These registers are defined above the current block
213  /// and used by this block or a block below it.
214  /// This does not include PHI uses in the current block, but it does
215  /// include PHI uses in deeper blocks.
217 
218  void print(raw_ostream&) const;
219  };
220 
221  /// InstrCycles represents the cycle height and depth of an instruction in a
222  /// trace.
223  struct InstrCycles {
224  /// Earliest issue cycle as determined by data dependencies and instruction
225  /// latencies from the beginning of the trace. Data dependencies from
226  /// before the trace are not included.
227  unsigned Depth;
228 
229  /// Minimum number of cycles from this instruction is issued to the of the
230  /// trace, as determined by data dependencies and instruction latencies.
231  unsigned Height;
232  };
233 
234  /// A trace represents a plausible sequence of executed basic blocks that
235  /// passes through the current basic block one. The Trace class serves as a
236  /// handle to internal cached data structures.
237  class Trace {
238  Ensemble &TE;
239  TraceBlockInfo &TBI;
240 
241  unsigned getBlockNum() const { return &TBI - &TE.BlockInfo[0]; }
242 
243  public:
244  explicit Trace(Ensemble &te, TraceBlockInfo &tbi) : TE(te), TBI(tbi) {}
245  void print(raw_ostream&) const;
246 
247  /// Compute the total number of instructions in the trace.
248  unsigned getInstrCount() const {
249  return TBI.InstrDepth + TBI.InstrHeight;
250  }
251 
252  /// Return the resource depth of the top/bottom of the trace center block.
253  /// This is the number of cycles required to execute all instructions from
254  /// the trace head to the trace center block. The resource depth only
255  /// considers execution resources, it ignores data dependencies.
256  /// When Bottom is set, instructions in the trace center block are included.
257  unsigned getResourceDepth(bool Bottom) const;
258 
259  /// Return the resource length of the trace. This is the number of cycles
260  /// required to execute the instructions in the trace if they were all
261  /// independent, exposing the maximum instruction-level parallelism.
262  ///
263  /// Any blocks in Extrablocks are included as if they were part of the
264  /// trace. Likewise, extra resources required by the specified scheduling
265  /// classes are included. For the caller to account for extra machine
266  /// instructions, it must first resolve each instruction's scheduling class.
267  unsigned getResourceLength(
270  ArrayRef<const MCSchedClassDesc *> RemoveInstrs = None) const;
271 
272  /// Return the length of the (data dependency) critical path through the
273  /// trace.
274  unsigned getCriticalPath() const { return TBI.CriticalPath; }
275 
276  /// Return the depth and height of MI. The depth is only valid for
277  /// instructions in or above the trace center block. The height is only
278  /// valid for instructions in or below the trace center block.
280  return TE.Cycles.lookup(&MI);
281  }
282 
283  /// Return the slack of MI. This is the number of cycles MI can be delayed
284  /// before the critical path becomes longer.
285  /// MI must be an instruction in the trace center block.
286  unsigned getInstrSlack(const MachineInstr &MI) const;
287 
288  /// Return the Depth of a PHI instruction in a trace center block successor.
289  /// The PHI does not have to be part of the trace.
290  unsigned getPHIDepth(const MachineInstr &PHI) const;
291 
292  /// A dependence is useful if the basic block of the defining instruction
293  /// is part of the trace of the user instruction. It is assumed that DefMI
294  /// dominates UseMI (see also isUsefulDominator).
295  bool isDepInTrace(const MachineInstr &DefMI,
296  const MachineInstr &UseMI) const;
297  };
298 
299  /// A trace ensemble is a collection of traces selected using the same
300  /// strategy, for example 'minimum resource height'. There is one trace for
301  /// every block in the function.
302  class Ensemble {
305  SmallVector<unsigned, 0> ProcResourceDepths;
306  SmallVector<unsigned, 0> ProcResourceHeights;
307  friend class Trace;
308 
309  void computeTrace(const MachineBasicBlock*);
310  void computeDepthResources(const MachineBasicBlock*);
311  void computeHeightResources(const MachineBasicBlock*);
312  unsigned computeCrossBlockCriticalPath(const TraceBlockInfo&);
313  void computeInstrDepths(const MachineBasicBlock*);
314  void computeInstrHeights(const MachineBasicBlock*);
315  void addLiveIns(const MachineInstr *DefMI, unsigned DefOp,
317 
318  protected:
320  virtual const MachineBasicBlock *pickTracePred(const MachineBasicBlock*) =0;
321  virtual const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock*) =0;
322  explicit Ensemble(MachineTraceMetrics*);
323  const MachineLoop *getLoopFor(const MachineBasicBlock*) const;
326  ArrayRef<unsigned> getProcResourceDepths(unsigned MBBNum) const;
327  ArrayRef<unsigned> getProcResourceHeights(unsigned MBBNum) const;
328 
329  public:
330  virtual ~Ensemble();
331  virtual const char *getName() const =0;
332  void print(raw_ostream&) const;
333  void invalidate(const MachineBasicBlock *MBB);
334  void verify() const;
335 
336  /// Get the trace that passes through MBB.
337  /// The trace is computed on demand.
339  };
340 
341  /// Strategies for selecting traces.
342  enum Strategy {
343  /// Select the trace through a block that has the fewest instructions.
345 
347  };
348 
349  /// Get the trace ensemble representing the given trace selection strategy.
350  /// The returned Ensemble object is owned by the MachineTraceMetrics analysis,
351  /// and valid for the lifetime of the analysis pass.
353 
354  /// Invalidate cached information about MBB. This must be called *before* MBB
355  /// is erased, or the CFG is otherwise changed.
356  ///
357  /// This invalidates per-block information about resource usage for MBB only,
358  /// and it invalidates per-trace information for any trace that passes
359  /// through MBB.
360  ///
361  /// Call Ensemble::getTrace() again to update any trace handles.
362  void invalidate(const MachineBasicBlock *MBB);
363 
364 private:
365  // One entry per basic block, indexed by block number.
367 
368  // Cycles consumed on each processor resource per block.
369  // The number of processor resource kinds is constant for a given subtarget,
370  // but it is not known at compile time. The number of cycles consumed by
371  // block B on processor resource R is at ProcResourceCycles[B*Kinds + R]
372  // where Kinds = SchedModel.getNumProcResourceKinds().
373  SmallVector<unsigned, 0> ProcResourceCycles;
374 
375  // One ensemble per strategy.
376  Ensemble* Ensembles[TS_NumStrategies];
377 
378  // Convert scaled resource usage to a cycle count that can be compared with
379  // latencies.
380  unsigned getCycles(unsigned Scaled) {
381  unsigned Factor = SchedModel.getLatencyFactor();
382  return (Scaled + Factor - 1) / Factor;
383  }
384 };
385 
387  const MachineTraceMetrics::Trace &Tr) {
388  Tr.print(OS);
389  return OS;
390 }
391 
393  const MachineTraceMetrics::Ensemble &En) {
394  En.print(OS);
395  return OS;
396 }
397 } // end namespace llvm
398 
399 #endif
bool HasValidInstrDepths
Instruction depths have been computed. This implies hasValidDepth().
ArrayRef< unsigned > getProcResourceDepths(unsigned MBBNum) const
Get an array of processor resource depths for MBB.
const MachineBasicBlock * Pred
Trace predecessor, or NULL for the first block in the trace.
unsigned Depth
Earliest issue cycle as determined by data dependencies and instruction latencies from the beginning ...
bool HasCalls
True when the block contains calls.
unsigned getCriticalPath() const
Return the length of the (data dependency) critical path through the trace.
void invalidateHeight()
Invalidate height resources when a block below this one has changed.
A trace ensemble is a collection of traces selected using the same strategy, for example 'minimum res...
Trace(Ensemble &te, TraceBlockInfo &tbi)
unsigned getInstrCount() const
Compute the total number of instructions in the trace.
unsigned InstrCount
The number of non-trivial instructions in the block.
InstrCycles getInstrCycles(const MachineInstr &MI) const
Return the depth and height of MI.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Provide an instruction scheduling machine model to CodeGen passes.
const MachineLoop * getLoopFor(const MachineBasicBlock *) const
Strategy
Strategies for selecting traces.
bool isUsefulDominator(const TraceBlockInfo &TBI) const
Assuming that this is a dominator of TBI, determine if it contains useful instruction depths...
void verifyAnalysis() const override
verifyAnalysis() - This member can be implemented by a analysis pass to check state of analysis infor...
LiveInReg(unsigned Reg, unsigned Height=0)
virtual const MachineBasicBlock * pickTraceSucc(const MachineBasicBlock *)=0
Select the trace through a block that has the fewest instructions.
const TraceBlockInfo * getHeightResources(const MachineBasicBlock *) const
MachineBasicBlock * MBB
Function Alias Analysis false
unsigned Height
For virtual registers: Minimum height of the defining instruction.
TargetInstrInfo - Interface to description of machine instruction set.
void invalidateDepth()
Invalidate depth resources when some block above this one has changed.
void invalidate(const MachineBasicBlock *MBB)
Invalidate traces through BadMBB.
unsigned CriticalPath
Critical path length.
void invalidate()
Invalidate resource information.
bool HasValidInstrHeights
Instruction heights have been computed. This implies hasValidHeight().
unsigned InstrHeight
Accumulated number of instructions in the trace below this block.
Ensemble * getEnsemble(Strategy)
Get the trace ensemble representing the given trace selection strategy.
void releaseMemory() override
releaseMemory() - This member can be implemented by a pass if it wants to be able to release its memo...
MachineInstrBuilder & UseMI
unsigned getLatencyFactor() const
Multiply cycle count by this factor to normalize it relative to other resources.
Per-basic block information that doesn't depend on the trace through the block.
unsigned getResourceLength(ArrayRef< const MachineBasicBlock * > Extrablocks=None, ArrayRef< const MCSchedClassDesc * > ExtraInstrs=None, ArrayRef< const MCSchedClassDesc * > RemoveInstrs=None) const
Return the resource length of the trace.
Represent the analysis usage information of a pass.
const MachineBasicBlock * Succ
Trace successor, or NULL for the last block in the trace.
const FixedBlockInfo * getResources(const MachineBasicBlock *)
Get the fixed resource information about MBB. Compute it on demand.
unsigned Tail
The block number of the tail of the trace. (When hasValidHeight()).
const TraceBlockInfo * getDepthResources(const MachineBasicBlock *) const
virtual const char * getName() const =0
virtual const MachineBasicBlock * pickTracePred(const MachineBasicBlock *)=0
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
A trace represents a plausible sequence of executed basic blocks that passes through the current basi...
ArrayRef< unsigned > getProcResourceHeights(unsigned MBBNum) const
Get an array of processor resource heights for MBB.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:843
unsigned Height
Minimum number of cycles from this instruction is issued to the of the trace, as determined by data d...
unsigned getPHIDepth(const MachineInstr &PHI) const
Return the Depth of a PHI instruction in a trace center block successor.
bool hasValidDepth() const
Returns true if the depth resources have been computed from the trace above this block.
Per-basic block information that relates to a specific trace through the block.
void invalidate(const MachineBasicBlock *MBB)
Invalidate cached information about MBB.
unsigned Head
The block number of the head of the trace. (When hasValidDepth()).
unsigned Reg
The virtual register required, or a register unit.
A virtual register or regunit required by a basic block or its trace successors.
bool hasResources() const
Returns true when resource information for this block has been computed.
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
unsigned getInstrSlack(const MachineInstr &MI) const
Return the slack of MI.
Representation of each machine instruction.
Definition: MachineInstr.h:52
InstrCycles represents the cycle height and depth of an instruction in a trace.
bool runOnMachineFunction(MachineFunction &) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
bool hasValidHeight() const
Returns true if the height resources have been computed from the trace below this block...
raw_ostream & operator<<(raw_ostream &OS, const APInt &I)
Definition: APInt.h:1726
SmallVector< LiveInReg, 4 > LiveIns
Live-in registers.
unsigned getResourceDepth(bool Bottom) const
Return the resource depth of the top/bottom of the trace center block.
unsigned InstrDepth
Accumulated number of instructions in the trace above this block.
bool isDepInTrace(const MachineInstr &DefMI, const MachineInstr &UseMI) const
A dependence is useful if the basic block of the defining instruction is part of the trace of the use...
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:44
IRTranslator LLVM IR MI
ArrayRef< unsigned > getProcResourceCycles(unsigned MBBNum) const
Get the scaled number of cycles used per processor resource in MBB.
Trace getTrace(const MachineBasicBlock *MBB)
Get the trace that passes through MBB.
void getAnalysisUsage(AnalysisUsage &) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.