LLVM  3.7.0
MachineTraceMetrics.h
Go to the documentation of this file.
1 //===- lib/CodeGen/MachineTraceMetrics.h - Super-scalar metrics -*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the interface for the MachineTraceMetrics analysis pass
11 // that estimates CPU resource usage and critical data dependency paths through
12 // preferred traces. This is useful for super-scalar CPUs where execution speed
13 // can be limited both by data dependencies and by limited execution resources.
14 //
15 // Out-of-order CPUs will often be executing instructions from multiple basic
16 // blocks at the same time. This makes it difficult to estimate the resource
17 // usage accurately in a single basic block. Resources can be estimated better
18 // by looking at a trace through the current basic block.
19 //
20 // For every block, the MachineTraceMetrics pass will pick a preferred trace
21 // that passes through the block. The trace is chosen based on loop structure,
22 // branch probabilities, and resource usage. The intention is to pick likely
23 // traces that would be the most affected by code transformations.
24 //
25 // It is expensive to compute a full arbitrary trace for every block, so to
26 // save some computations, traces are chosen to be convergent. This means that
27 // if the traces through basic blocks A and B ever cross when moving away from
28 // A and B, they never diverge again. This applies in both directions - If the
29 // traces meet above A and B, they won't diverge when going further back.
30 //
31 // Traces tend to align with loops. The trace through a block in an inner loop
32 // will begin at the loop entry block and end at a back edge. If there are
33 // nested loops, the trace may begin and end at those instead.
34 //
35 // For each trace, we compute the critical path length, which is the number of
36 // cycles required to execute the trace when execution is limited by data
37 // dependencies only. We also compute the resource height, which is the number
38 // of cycles required to execute all instructions in the trace when ignoring
39 // data dependencies.
40 //
41 // Every instruction in the current block has a slack - the number of cycles
42 // execution of the instruction can be delayed without extending the critical
43 // path.
44 //
45 //===----------------------------------------------------------------------===//
46 
47 #ifndef LLVM_CODEGEN_MACHINETRACEMETRICS_H
48 #define LLVM_CODEGEN_MACHINETRACEMETRICS_H
49 
50 #include "llvm/ADT/ArrayRef.h"
51 #include "llvm/ADT/DenseMap.h"
54 
55 namespace llvm {
56 
57 class InstrItineraryData;
58 class MachineBasicBlock;
59 class MachineInstr;
60 class MachineLoop;
61 class MachineLoopInfo;
62 class MachineRegisterInfo;
63 class TargetInstrInfo;
64 class TargetRegisterInfo;
65 class raw_ostream;
66 
68  const MachineFunction *MF;
69  const TargetInstrInfo *TII;
70  const TargetRegisterInfo *TRI;
71  const MachineRegisterInfo *MRI;
72  const MachineLoopInfo *Loops;
73  TargetSchedModel SchedModel;
74 
75 public:
76  class Ensemble;
77  class Trace;
78  static char ID;
80  void getAnalysisUsage(AnalysisUsage&) const override;
81  bool runOnMachineFunction(MachineFunction&) override;
82  void releaseMemory() override;
83  void verifyAnalysis() const override;
84 
85  friend class Ensemble;
86  friend class Trace;
87 
88  /// Per-basic block information that doesn't depend on the trace through the
89  /// block.
90  struct FixedBlockInfo {
91  /// The number of non-trivial instructions in the block.
92  /// Doesn't count PHI and COPY instructions that are likely to be removed.
93  unsigned InstrCount;
94 
95  /// True when the block contains calls.
96  bool HasCalls;
97 
99 
100  /// Returns true when resource information for this block has been computed.
101  bool hasResources() const { return InstrCount != ~0u; }
102 
103  /// Invalidate resource information.
104  void invalidate() { InstrCount = ~0u; }
105  };
106 
107  /// Get the fixed resource information about MBB. Compute it on demand.
108  const FixedBlockInfo *getResources(const MachineBasicBlock*);
109 
110  /// Get the scaled number of cycles used per processor resource in MBB.
111  /// This is an array with SchedModel.getNumProcResourceKinds() entries.
112  /// The getResources() function above must have been called first.
113  ///
114  /// These numbers have already been scaled by SchedModel.getResourceFactor().
115  ArrayRef<unsigned> getProcResourceCycles(unsigned MBBNum) const;
116 
117  /// A virtual register or regunit required by a basic block or its trace
118  /// successors.
119  struct LiveInReg {
120  /// The virtual register required, or a register unit.
121  unsigned Reg;
122 
123  /// For virtual registers: Minimum height of the defining instruction.
124  /// For regunits: Height of the highest user in the trace.
125  unsigned Height;
126 
127  LiveInReg(unsigned Reg, unsigned Height = 0) : Reg(Reg), Height(Height) {}
128  };
129 
130  /// Per-basic block information that relates to a specific trace through the
131  /// block. Convergent traces means that only one of these is required per
132  /// block in a trace ensemble.
133  struct TraceBlockInfo {
134  /// Trace predecessor, or NULL for the first block in the trace.
135  /// Valid when hasValidDepth().
137 
138  /// Trace successor, or NULL for the last block in the trace.
139  /// Valid when hasValidHeight().
141 
142  /// The block number of the head of the trace. (When hasValidDepth()).
143  unsigned Head;
144 
145  /// The block number of the tail of the trace. (When hasValidHeight()).
146  unsigned Tail;
147 
148  /// Accumulated number of instructions in the trace above this block.
149  /// Does not include instructions in this block.
150  unsigned InstrDepth;
151 
152  /// Accumulated number of instructions in the trace below this block.
153  /// Includes instructions in this block.
154  unsigned InstrHeight;
155 
157  Pred(nullptr), Succ(nullptr),
158  InstrDepth(~0u), InstrHeight(~0u),
160 
161  /// Returns true if the depth resources have been computed from the trace
162  /// above this block.
163  bool hasValidDepth() const { return InstrDepth != ~0u; }
164 
165  /// Returns true if the height resources have been computed from the trace
166  /// below this block.
167  bool hasValidHeight() const { return InstrHeight != ~0u; }
168 
169  /// Invalidate depth resources when some block above this one has changed.
170  void invalidateDepth() { InstrDepth = ~0u; HasValidInstrDepths = false; }
171 
172  /// Invalidate height resources when a block below this one has changed.
174 
175  /// Assuming that this is a dominator of TBI, determine if it contains
176  /// useful instruction depths. A dominating block can be above the current
177  /// trace head, and any dependencies from such a far away dominator are not
178  /// expected to affect the critical path.
179  ///
180  /// Also returns true when TBI == this.
181  bool isUsefulDominator(const TraceBlockInfo &TBI) const {
182  // The trace for TBI may not even be calculated yet.
183  if (!hasValidDepth() || !TBI.hasValidDepth())
184  return false;
185  // Instruction depths are only comparable if the traces share a head.
186  if (Head != TBI.Head)
187  return false;
188  // It is almost always the case that TBI belongs to the same trace as
189  // this block, but rare convoluted cases involving irreducible control
190  // flow, a dominator may share a trace head without actually being on the
191  // same trace as TBI. This is not a big problem as long as it doesn't
192  // increase the instruction depth.
193  return HasValidInstrDepths && InstrDepth <= TBI.InstrDepth;
194  }
195 
196  // Data-dependency-related information. Per-instruction depth and height
197  // are computed from data dependencies in the current trace, using
198  // itinerary data.
199 
200  /// Instruction depths have been computed. This implies hasValidDepth().
202 
203  /// Instruction heights have been computed. This implies hasValidHeight().
205 
206  /// Critical path length. This is the number of cycles in the longest data
207  /// dependency chain through the trace. This is only valid when both
208  /// HasValidInstrDepths and HasValidInstrHeights are set.
209  unsigned CriticalPath;
210 
211  /// Live-in registers. These registers are defined above the current block
212  /// and used by this block or a block below it.
213  /// This does not include PHI uses in the current block, but it does
214  /// include PHI uses in deeper blocks.
216 
217  void print(raw_ostream&) const;
218  };
219 
220  /// InstrCycles represents the cycle height and depth of an instruction in a
221  /// trace.
222  struct InstrCycles {
223  /// Earliest issue cycle as determined by data dependencies and instruction
224  /// latencies from the beginning of the trace. Data dependencies from
225  /// before the trace are not included.
226  unsigned Depth;
227 
228  /// Minimum number of cycles from this instruction is issued to the of the
229  /// trace, as determined by data dependencies and instruction latencies.
230  unsigned Height;
231  };
232 
233  /// A trace represents a plausible sequence of executed basic blocks that
234  /// passes through the current basic block one. The Trace class serves as a
235  /// handle to internal cached data structures.
236  class Trace {
237  Ensemble &TE;
238  TraceBlockInfo &TBI;
239 
240  unsigned getBlockNum() const { return &TBI - &TE.BlockInfo[0]; }
241 
242  public:
243  explicit Trace(Ensemble &te, TraceBlockInfo &tbi) : TE(te), TBI(tbi) {}
244  void print(raw_ostream&) const;
245 
246  /// Compute the total number of instructions in the trace.
247  unsigned getInstrCount() const {
248  return TBI.InstrDepth + TBI.InstrHeight;
249  }
250 
251  /// Return the resource depth of the top/bottom of the trace center block.
252  /// This is the number of cycles required to execute all instructions from
253  /// the trace head to the trace center block. The resource depth only
254  /// considers execution resources, it ignores data dependencies.
255  /// When Bottom is set, instructions in the trace center block are included.
256  unsigned getResourceDepth(bool Bottom) const;
257 
258  /// Return the resource length of the trace. This is the number of cycles
259  /// required to execute the instructions in the trace if they were all
260  /// independent, exposing the maximum instruction-level parallelism.
261  ///
262  /// Any blocks in Extrablocks are included as if they were part of the
263  /// trace. Likewise, extra resources required by the specified scheduling
264  /// classes are included. For the caller to account for extra machine
265  /// instructions, it must first resolve each instruction's scheduling class.
266  unsigned getResourceLength(
269  ArrayRef<const MCSchedClassDesc *> RemoveInstrs = None) const;
270 
271  /// Return the length of the (data dependency) critical path through the
272  /// trace.
273  unsigned getCriticalPath() const { return TBI.CriticalPath; }
274 
275  /// Return the depth and height of MI. The depth is only valid for
276  /// instructions in or above the trace center block. The height is only
277  /// valid for instructions in or below the trace center block.
279  return TE.Cycles.lookup(MI);
280  }
281 
282  /// Return the slack of MI. This is the number of cycles MI can be delayed
283  /// before the critical path becomes longer.
284  /// MI must be an instruction in the trace center block.
285  unsigned getInstrSlack(const MachineInstr *MI) const;
286 
287  /// Return the Depth of a PHI instruction in a trace center block successor.
288  /// The PHI does not have to be part of the trace.
289  unsigned getPHIDepth(const MachineInstr *PHI) const;
290 
291  /// A dependence is useful if the basic block of the defining instruction
292  /// is part of the trace of the user instruction. It is assumed that DefMI
293  /// dominates UseMI (see also isUsefulDominator).
294  bool isDepInTrace(const MachineInstr *DefMI,
295  const MachineInstr *UseMI) const;
296  };
297 
298  /// A trace ensemble is a collection of traces selected using the same
299  /// strategy, for example 'minimum resource height'. There is one trace for
300  /// every block in the function.
301  class Ensemble {
304  SmallVector<unsigned, 0> ProcResourceDepths;
305  SmallVector<unsigned, 0> ProcResourceHeights;
306  friend class Trace;
307 
308  void computeTrace(const MachineBasicBlock*);
309  void computeDepthResources(const MachineBasicBlock*);
310  void computeHeightResources(const MachineBasicBlock*);
311  unsigned computeCrossBlockCriticalPath(const TraceBlockInfo&);
312  void computeInstrDepths(const MachineBasicBlock*);
313  void computeInstrHeights(const MachineBasicBlock*);
314  void addLiveIns(const MachineInstr *DefMI, unsigned DefOp,
316 
317  protected:
319  virtual const MachineBasicBlock *pickTracePred(const MachineBasicBlock*) =0;
320  virtual const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock*) =0;
321  explicit Ensemble(MachineTraceMetrics*);
322  const MachineLoop *getLoopFor(const MachineBasicBlock*) const;
325  ArrayRef<unsigned> getProcResourceDepths(unsigned MBBNum) const;
326  ArrayRef<unsigned> getProcResourceHeights(unsigned MBBNum) const;
327 
328  public:
329  virtual ~Ensemble();
330  virtual const char *getName() const =0;
331  void print(raw_ostream&) const;
332  void invalidate(const MachineBasicBlock *MBB);
333  void verify() const;
334 
335  /// Get the trace that passes through MBB.
336  /// The trace is computed on demand.
337  Trace getTrace(const MachineBasicBlock *MBB);
338  };
339 
340  /// Strategies for selecting traces.
341  enum Strategy {
342  /// Select the trace through a block that has the fewest instructions.
344 
346  };
347 
348  /// Get the trace ensemble representing the given trace selection strategy.
349  /// The returned Ensemble object is owned by the MachineTraceMetrics analysis,
350  /// and valid for the lifetime of the analysis pass.
352 
353  /// Invalidate cached information about MBB. This must be called *before* MBB
354  /// is erased, or the CFG is otherwise changed.
355  ///
356  /// This invalidates per-block information about resource usage for MBB only,
357  /// and it invalidates per-trace information for any trace that passes
358  /// through MBB.
359  ///
360  /// Call Ensemble::getTrace() again to update any trace handles.
361  void invalidate(const MachineBasicBlock *MBB);
362 
363 private:
364  // One entry per basic block, indexed by block number.
366 
367  // Cycles consumed on each processor resource per block.
368  // The number of processor resource kinds is constant for a given subtarget,
369  // but it is not known at compile time. The number of cycles consumed by
370  // block B on processor resource R is at ProcResourceCycles[B*Kinds + R]
371  // where Kinds = SchedModel.getNumProcResourceKinds().
372  SmallVector<unsigned, 0> ProcResourceCycles;
373 
374  // One ensemble per strategy.
375  Ensemble* Ensembles[TS_NumStrategies];
376 
377  // Convert scaled resource usage to a cycle count that can be compared with
378  // latencies.
379  unsigned getCycles(unsigned Scaled) {
380  unsigned Factor = SchedModel.getLatencyFactor();
381  return (Scaled + Factor - 1) / Factor;
382  }
383 };
384 
386  const MachineTraceMetrics::Trace &Tr) {
387  Tr.print(OS);
388  return OS;
389 }
390 
392  const MachineTraceMetrics::Ensemble &En) {
393  En.print(OS);
394  return OS;
395 }
396 } // end namespace llvm
397 
398 #endif
bool HasValidInstrDepths
Instruction depths have been computed. This implies hasValidDepth().
ArrayRef< unsigned > getProcResourceDepths(unsigned MBBNum) const
Get an array of processor resource depths for MBB.
const MachineBasicBlock * Pred
Trace predecessor, or NULL for the first block in the trace.
unsigned Depth
Earliest issue cycle as determined by data dependencies and instruction latencies from the beginning ...
bool isDepInTrace(const MachineInstr *DefMI, const MachineInstr *UseMI) const
A dependence is useful if the basic block of the defining instruction is part of the trace of the use...
bool HasCalls
True when the block contains calls.
unsigned getCriticalPath() const
Return the length of the (data dependency) critical path through the trace.
void invalidateHeight()
Invalidate height resources when a block below this one has changed.
A trace ensemble is a collection of traces selected using the same strategy, for example 'minimum res...
Trace(Ensemble &te, TraceBlockInfo &tbi)
unsigned getInstrCount() const
Compute the total number of instructions in the trace.
unsigned InstrCount
The number of non-trivial instructions in the block.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Provide an instruction scheduling machine model to CodeGen passes.
const MachineLoop * getLoopFor(const MachineBasicBlock *) const
Strategy
Strategies for selecting traces.
bool isUsefulDominator(const TraceBlockInfo &TBI) const
Assuming that this is a dominator of TBI, determine if it contains useful instruction depths...
void verifyAnalysis() const override
verifyAnalysis() - This member can be implemented by a analysis pass to check state of analysis infor...
#define false
Definition: ConvertUTF.c:65
LiveInReg(unsigned Reg, unsigned Height=0)
virtual const MachineBasicBlock * pickTraceSucc(const MachineBasicBlock *)=0
Select the trace through a block that has the fewest instructions.
const TraceBlockInfo * getHeightResources(const MachineBasicBlock *) const
unsigned Height
For virtual registers: Minimum height of the defining instruction.
TargetInstrInfo - Interface to description of machine instruction set.
void invalidateDepth()
Invalidate depth resources when some block above this one has changed.
void invalidate(const MachineBasicBlock *MBB)
Invalidate traces through BadMBB.
unsigned CriticalPath
Critical path length.
void invalidate()
Invalidate resource information.
bool HasValidInstrHeights
Instruction heights have been computed. This implies hasValidHeight().
unsigned InstrHeight
Accumulated number of instructions in the trace below this block.
Ensemble * getEnsemble(Strategy)
Get the trace ensemble representing the given trace selection strategy.
void releaseMemory() override
releaseMemory() - This member can be implemented by a pass if it wants to be able to release its memo...
unsigned getLatencyFactor() const
Multiply cycle count by this factor to normalize it relative to other resources.
Per-basic block information that doesn't depend on the trace through the block.
unsigned getResourceLength(ArrayRef< const MachineBasicBlock * > Extrablocks=None, ArrayRef< const MCSchedClassDesc * > ExtraInstrs=None, ArrayRef< const MCSchedClassDesc * > RemoveInstrs=None) const
Return the resource length of the trace.
static void addLiveIns(LivePhysRegs &LiveRegs, const MachineBasicBlock &MBB)
Add live-in registers of basic block MBB to LiveRegs.
Represent the analysis usage information of a pass.
const MachineBasicBlock * Succ
Trace successor, or NULL for the last block in the trace.
const FixedBlockInfo * getResources(const MachineBasicBlock *)
Get the fixed resource information about MBB. Compute it on demand.
unsigned Tail
The block number of the tail of the trace. (When hasValidHeight()).
const TraceBlockInfo * getDepthResources(const MachineBasicBlock *) const
virtual const char * getName() const =0
virtual const MachineBasicBlock * pickTracePred(const MachineBasicBlock *)=0
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
A trace represents a plausible sequence of executed basic blocks that passes through the current basi...
ArrayRef< unsigned > getProcResourceHeights(unsigned MBBNum) const
Get an array of processor resource heights for MBB.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:861
unsigned Height
Minimum number of cycles from this instruction is issued to the of the trace, as determined by data d...
bool hasValidDepth() const
Returns true if the depth resources have been computed from the trace above this block.
Per-basic block information that relates to a specific trace through the block.
void invalidate(const MachineBasicBlock *MBB)
Invalidate cached information about MBB.
unsigned Head
The block number of the head of the trace. (When hasValidDepth()).
unsigned Reg
The virtual register required, or a register unit.
A virtual register or regunit required by a basic block or its trace successors.
bool hasResources() const
Returns true when resource information for this block has been computed.
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
InstrCycles getInstrCycles(const MachineInstr *MI) const
Return the depth and height of MI.
Representation of each machine instruction.
Definition: MachineInstr.h:51
InstrCycles represents the cycle height and depth of an instruction in a trace.
bool runOnMachineFunction(MachineFunction &) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
bool hasValidHeight() const
Returns true if the height resources have been computed from the trace below this block...
raw_ostream & operator<<(raw_ostream &OS, const APInt &I)
Definition: APInt.h:1738
unsigned getPHIDepth(const MachineInstr *PHI) const
Return the Depth of a PHI instruction in a trace center block successor.
SmallVector< LiveInReg, 4 > LiveIns
Live-in registers.
unsigned getResourceDepth(bool Bottom) const
Return the resource depth of the top/bottom of the trace center block.
unsigned InstrDepth
Accumulated number of instructions in the trace above this block.
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:38
ArrayRef< unsigned > getProcResourceCycles(unsigned MBBNum) const
Get the scaled number of cycles used per processor resource in MBB.
Trace getTrace(const MachineBasicBlock *MBB)
Get the trace that passes through MBB.
unsigned getInstrSlack(const MachineInstr *MI) const
Return the slack of MI.
void getAnalysisUsage(AnalysisUsage &) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.