LCOV - code coverage report
Current view: top level - include/llvm/CodeGen - MachineTraceMetrics.h (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 20 21 95.2 %
Date: 2017-09-14 15:23:50 Functions: 3 5 60.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===- lib/CodeGen/MachineTraceMetrics.h - Super-scalar metrics -*- C++ -*-===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : // This file defines the interface for the MachineTraceMetrics analysis pass
      11             : // that estimates CPU resource usage and critical data dependency paths through
      12             : // preferred traces. This is useful for super-scalar CPUs where execution speed
      13             : // can be limited both by data dependencies and by limited execution resources.
      14             : //
      15             : // Out-of-order CPUs will often be executing instructions from multiple basic
      16             : // blocks at the same time. This makes it difficult to estimate the resource
      17             : // usage accurately in a single basic block. Resources can be estimated better
      18             : // by looking at a trace through the current basic block.
      19             : //
      20             : // For every block, the MachineTraceMetrics pass will pick a preferred trace
      21             : // that passes through the block. The trace is chosen based on loop structure,
      22             : // branch probabilities, and resource usage. The intention is to pick likely
      23             : // traces that would be the most affected by code transformations.
      24             : //
      25             : // It is expensive to compute a full arbitrary trace for every block, so to
      26             : // save some computations, traces are chosen to be convergent. This means that
      27             : // if the traces through basic blocks A and B ever cross when moving away from
      28             : // A and B, they never diverge again. This applies in both directions - If the
      29             : // traces meet above A and B, they won't diverge when going further back.
      30             : //
      31             : // Traces tend to align with loops. The trace through a block in an inner loop
      32             : // will begin at the loop entry block and end at a back edge. If there are
      33             : // nested loops, the trace may begin and end at those instead.
      34             : //
      35             : // For each trace, we compute the critical path length, which is the number of
      36             : // cycles required to execute the trace when execution is limited by data
      37             : // dependencies only. We also compute the resource height, which is the number
      38             : // of cycles required to execute all instructions in the trace when ignoring
      39             : // data dependencies.
      40             : //
      41             : // Every instruction in the current block has a slack - the number of cycles
      42             : // execution of the instruction can be delayed without extending the critical
      43             : // path.
      44             : //
      45             : //===----------------------------------------------------------------------===//
      46             : 
      47             : #ifndef LLVM_CODEGEN_MACHINETRACEMETRICS_H
      48             : #define LLVM_CODEGEN_MACHINETRACEMETRICS_H
      49             : 
      50             : #include "llvm/ADT/SparseSet.h"
      51             : #include "llvm/ADT/ArrayRef.h"
      52             : #include "llvm/ADT/DenseMap.h"
      53             : #include "llvm/ADT/None.h"
      54             : #include "llvm/ADT/SmallVector.h"
      55             : #include "llvm/CodeGen/MachineBasicBlock.h"
      56             : #include "llvm/CodeGen/MachineFunctionPass.h"
      57             : #include "llvm/CodeGen/TargetSchedule.h"
      58             : 
      59             : namespace llvm {
      60             : 
      61             : class AnalysisUsage;
      62             : class MachineFunction;
      63             : class MachineInstr;
      64             : class MachineLoop;
      65             : class MachineLoopInfo;
      66             : class MachineRegisterInfo;
      67             : struct MCSchedClassDesc;
      68             : class raw_ostream;
      69             : class TargetInstrInfo;
      70             : class TargetRegisterInfo;
      71             : 
      72             : // Keep track of physreg data dependencies by recording each live register unit.
      73             : // Associate each regunit with an instruction operand. Depending on the
      74             : // direction instructions are scanned, it could be the operand that defined the
      75             : // regunit, or the highest operand to read the regunit.
      76             : struct LiveRegUnit {
      77             :   unsigned RegUnit;
      78             :   unsigned Cycle = 0;
      79             :   const MachineInstr *MI = nullptr;
      80             :   unsigned Op = 0;
      81             : 
      82             :   unsigned getSparseSetIndex() const { return RegUnit; }
      83             : 
      84        5428 :   LiveRegUnit(unsigned RU) : RegUnit(RU) {}
      85             : };
      86             : 
      87             : 
      88       49585 : class MachineTraceMetrics : public MachineFunctionPass {
      89             :   const MachineFunction *MF = nullptr;
      90             :   const TargetInstrInfo *TII = nullptr;
      91             :   const TargetRegisterInfo *TRI = nullptr;
      92             :   const MachineRegisterInfo *MRI = nullptr;
      93             :   const MachineLoopInfo *Loops = nullptr;
      94             :   TargetSchedModel SchedModel;
      95             : 
      96             : public:
      97             :   friend class Ensemble;
      98             :   friend class Trace;
      99             : 
     100             :   class Ensemble;
     101             : 
     102             :   static char ID;
     103             : 
     104             :   MachineTraceMetrics();
     105             : 
     106             :   void getAnalysisUsage(AnalysisUsage&) const override;
     107             :   bool runOnMachineFunction(MachineFunction&) override;
     108             :   void releaseMemory() override;
     109             :   void verifyAnalysis() const override;
     110             : 
     111             :   /// Per-basic block information that doesn't depend on the trace through the
     112             :   /// block.
     113             :   struct FixedBlockInfo {
     114             :     /// The number of non-trivial instructions in the block.
     115             :     /// Doesn't count PHI and COPY instructions that are likely to be removed.
     116             :     unsigned InstrCount = ~0u;
     117             : 
     118             :     /// True when the block contains calls.
     119             :     bool HasCalls = false;
     120             : 
     121      240513 :     FixedBlockInfo() = default;
     122             : 
     123             :     /// Returns true when resource information for this block has been computed.
     124             :     bool hasResources() const { return InstrCount != ~0u; }
     125             : 
     126             :     /// Invalidate resource information.
     127         387 :     void invalidate() { InstrCount = ~0u; }
     128             :   };
     129             : 
     130             :   /// Get the fixed resource information about MBB. Compute it on demand.
     131             :   const FixedBlockInfo *getResources(const MachineBasicBlock*);
     132             : 
     133             :   /// Get the scaled number of cycles used per processor resource in MBB.
     134             :   /// This is an array with SchedModel.getNumProcResourceKinds() entries.
     135             :   /// The getResources() function above must have been called first.
     136             :   ///
     137             :   /// These numbers have already been scaled by SchedModel.getResourceFactor().
     138             :   ArrayRef<unsigned> getProcResourceCycles(unsigned MBBNum) const;
     139             : 
     140             :   /// A virtual register or regunit required by a basic block or its trace
     141             :   /// successors.
     142             :   struct LiveInReg {
     143             :     /// The virtual register required, or a register unit.
     144             :     unsigned Reg;
     145             : 
     146             :     /// For virtual registers: Minimum height of the defining instruction.
     147             :     /// For regunits: Height of the highest user in the trace.
     148             :     unsigned Height;
     149             : 
     150        2872 :     LiveInReg(unsigned Reg, unsigned Height = 0) : Reg(Reg), Height(Height) {}
     151             :   };
     152             : 
     153             :   /// Per-basic block information that relates to a specific trace through the
     154             :   /// block. Convergent traces means that only one of these is required per
     155             :   /// block in a trace ensemble.
     156      435072 :   struct TraceBlockInfo {
     157             :     /// Trace predecessor, or NULL for the first block in the trace.
     158             :     /// Valid when hasValidDepth().
     159             :     const MachineBasicBlock *Pred = nullptr;
     160             : 
     161             :     /// Trace successor, or NULL for the last block in the trace.
     162             :     /// Valid when hasValidHeight().
     163             :     const MachineBasicBlock *Succ = nullptr;
     164             : 
     165             :     /// The block number of the head of the trace. (When hasValidDepth()).
     166             :     unsigned Head;
     167             : 
     168             :     /// The block number of the tail of the trace. (When hasValidHeight()).
     169             :     unsigned Tail;
     170             : 
     171             :     /// Accumulated number of instructions in the trace above this block.
     172             :     /// Does not include instructions in this block.
     173             :     unsigned InstrDepth = ~0u;
     174             : 
     175             :     /// Accumulated number of instructions in the trace below this block.
     176             :     /// Includes instructions in this block.
     177             :     unsigned InstrHeight = ~0u;
     178             : 
     179      435072 :     TraceBlockInfo() = default;
     180             : 
     181             :     /// Returns true if the depth resources have been computed from the trace
     182             :     /// above this block.
     183        1089 :     bool hasValidDepth() const { return InstrDepth != ~0u; }
     184             : 
     185             :     /// Returns true if the height resources have been computed from the trace
     186             :     /// below this block.
     187        1270 :     bool hasValidHeight() const { return InstrHeight != ~0u; }
     188             : 
     189             :     /// Invalidate depth resources when some block above this one has changed.
     190         405 :     void invalidateDepth() { InstrDepth = ~0u; HasValidInstrDepths = false; }
     191             : 
     192             :     /// Invalidate height resources when a block below this one has changed.
     193         405 :     void invalidateHeight() { InstrHeight = ~0u; HasValidInstrHeights = false; }
     194             : 
     195             :     /// Assuming that this is a dominator of TBI, determine if it contains
     196             :     /// useful instruction depths. A dominating block can be above the current
     197             :     /// trace head, and any dependencies from such a far away dominator are not
     198             :     /// expected to affect the critical path.
     199             :     ///
     200             :     /// Also returns true when TBI == this.
     201             :     bool isUsefulDominator(const TraceBlockInfo &TBI) const {
     202             :       // The trace for TBI may not even be calculated yet.
     203       12312 :       if (!hasValidDepth() || !TBI.hasValidDepth())
     204             :         return false;
     205             :       // Instruction depths are only comparable if the traces share a head.
     206       12269 :       if (Head != TBI.Head)
     207             :         return false;
     208             :       // It is almost always the case that TBI belongs to the same trace as
     209             :       // this block, but rare convoluted cases involving irreducible control
     210             :       // flow, a dominator may share a trace head without actually being on the
     211             :       // same trace as TBI. This is not a big problem as long as it doesn't
     212             :       // increase the instruction depth.
     213       12269 :       return HasValidInstrDepths && InstrDepth <= TBI.InstrDepth;
     214             :     }
     215             : 
     216             :     // Data-dependency-related information. Per-instruction depth and height
     217             :     // are computed from data dependencies in the current trace, using
     218             :     // itinerary data.
     219             : 
     220             :     /// Instruction depths have been computed. This implies hasValidDepth().
     221             :     bool HasValidInstrDepths = false;
     222             : 
     223             :     /// Instruction heights have been computed. This implies hasValidHeight().
     224             :     bool HasValidInstrHeights = false;
     225             : 
     226             :     /// Critical path length. This is the number of cycles in the longest data
     227             :     /// dependency chain through the trace. This is only valid when both
     228             :     /// HasValidInstrDepths and HasValidInstrHeights are set.
     229             :     unsigned CriticalPath;
     230             : 
     231             :     /// Live-in registers. These registers are defined above the current block
     232             :     /// and used by this block or a block below it.
     233             :     /// This does not include PHI uses in the current block, but it does
     234             :     /// include PHI uses in deeper blocks.
     235             :     SmallVector<LiveInReg, 4> LiveIns;
     236             : 
     237             :     void print(raw_ostream&) const;
     238             :   };
     239             : 
     240             :   /// InstrCycles represents the cycle height and depth of an instruction in a
     241             :   /// trace.
     242             :   struct InstrCycles {
     243             :     /// Earliest issue cycle as determined by data dependencies and instruction
     244             :     /// latencies from the beginning of the trace. Data dependencies from
     245             :     /// before the trace are not included.
     246             :     unsigned Depth;
     247             : 
     248             :     /// Minimum number of cycles from this instruction is issued to the of the
     249             :     /// trace, as determined by data dependencies and instruction latencies.
     250             :     unsigned Height;
     251             :   };
     252             : 
     253             :   /// A trace represents a plausible sequence of executed basic blocks that
     254             :   /// passes through the current basic block one. The Trace class serves as a
     255             :   /// handle to internal cached data structures.
     256             :   class Trace {
     257             :     Ensemble &TE;
     258             :     TraceBlockInfo &TBI;
     259             : 
     260        2860 :     unsigned getBlockNum() const { return &TBI - &TE.BlockInfo[0]; }
     261             : 
     262             :   public:
     263             :     explicit Trace(Ensemble &te, TraceBlockInfo &tbi) : TE(te), TBI(tbi) {}
     264             : 
     265             :     void print(raw_ostream&) const;
     266             : 
     267             :     /// Compute the total number of instructions in the trace.
     268             :     unsigned getInstrCount() const {
     269           0 :       return TBI.InstrDepth + TBI.InstrHeight;
     270             :     }
     271             : 
     272             :     /// Return the resource depth of the top/bottom of the trace center block.
     273             :     /// This is the number of cycles required to execute all instructions from
     274             :     /// the trace head to the trace center block. The resource depth only
     275             :     /// considers execution resources, it ignores data dependencies.
     276             :     /// When Bottom is set, instructions in the trace center block are included.
     277             :     unsigned getResourceDepth(bool Bottom) const;
     278             : 
     279             :     /// Return the resource length of the trace. This is the number of cycles
     280             :     /// required to execute the instructions in the trace if they were all
     281             :     /// independent, exposing the maximum instruction-level parallelism.
     282             :     ///
     283             :     /// Any blocks in Extrablocks are included as if they were part of the
     284             :     /// trace. Likewise, extra resources required by the specified scheduling
     285             :     /// classes are included. For the caller to account for extra machine
     286             :     /// instructions, it must first resolve each instruction's scheduling class.
     287             :     unsigned getResourceLength(
     288             :         ArrayRef<const MachineBasicBlock *> Extrablocks = None,
     289             :         ArrayRef<const MCSchedClassDesc *> ExtraInstrs = None,
     290             :         ArrayRef<const MCSchedClassDesc *> RemoveInstrs = None) const;
     291             : 
     292             :     /// Return the length of the (data dependency) critical path through the
     293             :     /// trace.
     294         167 :     unsigned getCriticalPath() const { return TBI.CriticalPath; }
     295             : 
     296             :     /// Return the depth and height of MI. The depth is only valid for
     297             :     /// instructions in or above the trace center block. The height is only
     298             :     /// valid for instructions in or below the trace center block.
     299             :     InstrCycles getInstrCycles(const MachineInstr &MI) const {
     300        5718 :       return TE.Cycles.lookup(&MI);
     301             :     }
     302             : 
     303             :     /// Return the slack of MI. This is the number of cycles MI can be delayed
     304             :     /// before the critical path becomes longer.
     305             :     /// MI must be an instruction in the trace center block.
     306             :     unsigned getInstrSlack(const MachineInstr &MI) const;
     307             : 
     308             :     /// Return the Depth of a PHI instruction in a trace center block successor.
     309             :     /// The PHI does not have to be part of the trace.
     310             :     unsigned getPHIDepth(const MachineInstr &PHI) const;
     311             : 
     312             :     /// A dependence is useful if the basic block of the defining instruction
     313             :     /// is part of the trace of the user instruction. It is assumed that DefMI
     314             :     /// dominates UseMI (see also isUsefulDominator).
     315             :     bool isDepInTrace(const MachineInstr &DefMI,
     316             :                       const MachineInstr &UseMI) const;
     317             :   };
     318             : 
     319             :   /// A trace ensemble is a collection of traces selected using the same
     320             :   /// strategy, for example 'minimum resource height'. There is one trace for
     321             :   /// every block in the function.
     322      433590 :   class Ensemble {
     323             :     friend class Trace;
     324             : 
     325             :     SmallVector<TraceBlockInfo, 4> BlockInfo;
     326             :     DenseMap<const MachineInstr*, InstrCycles> Cycles;
     327             :     SmallVector<unsigned, 0> ProcResourceDepths;
     328             :     SmallVector<unsigned, 0> ProcResourceHeights;
     329             : 
     330             :     void computeTrace(const MachineBasicBlock*);
     331             :     void computeDepthResources(const MachineBasicBlock*);
     332             :     void computeHeightResources(const MachineBasicBlock*);
     333             :     unsigned computeCrossBlockCriticalPath(const TraceBlockInfo&);
     334             :     void computeInstrDepths(const MachineBasicBlock*);
     335             :     void computeInstrHeights(const MachineBasicBlock*);
     336             :     void addLiveIns(const MachineInstr *DefMI, unsigned DefOp,
     337             :                     ArrayRef<const MachineBasicBlock*> Trace);
     338             : 
     339             :   protected:
     340             :     MachineTraceMetrics &MTM;
     341             : 
     342             :     explicit Ensemble(MachineTraceMetrics*);
     343             : 
     344             :     virtual const MachineBasicBlock *pickTracePred(const MachineBasicBlock*) =0;
     345             :     virtual const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock*) =0;
     346             :     const MachineLoop *getLoopFor(const MachineBasicBlock*) const;
     347             :     const TraceBlockInfo *getDepthResources(const MachineBasicBlock*) const;
     348             :     const TraceBlockInfo *getHeightResources(const MachineBasicBlock*) const;
     349             :     ArrayRef<unsigned> getProcResourceDepths(unsigned MBBNum) const;
     350             :     ArrayRef<unsigned> getProcResourceHeights(unsigned MBBNum) const;
     351             : 
     352             :   public:
     353             :     virtual ~Ensemble();
     354             : 
     355             :     virtual const char *getName() const = 0;
     356             :     void print(raw_ostream&) const;
     357             :     void invalidate(const MachineBasicBlock *MBB);
     358             :     void verify() const;
     359             : 
     360             :     /// Get the trace that passes through MBB.
     361             :     /// The trace is computed on demand.
     362             :     Trace getTrace(const MachineBasicBlock *MBB);
     363             : 
     364             :     /// Updates the depth of an machine instruction, given RegUnits.
     365             :     void updateDepth(TraceBlockInfo &TBI, const MachineInstr&,
     366             :                      SparseSet<LiveRegUnit> &RegUnits);
     367             :     void updateDepth(const MachineBasicBlock *, const MachineInstr&,
     368             :                      SparseSet<LiveRegUnit> &RegUnits);
     369             : 
     370             :     /// Updates the depth of the instructions from Start to End.
     371             :     void updateDepths(MachineBasicBlock::iterator Start,
     372             :                       MachineBasicBlock::iterator End,
     373             :                       SparseSet<LiveRegUnit> &RegUnits);
     374             : 
     375             :   };
     376             : 
     377             :   /// Strategies for selecting traces.
     378             :   enum Strategy {
     379             :     /// Select the trace through a block that has the fewest instructions.
     380             :     TS_MinInstrCount,
     381             : 
     382             :     TS_NumStrategies
     383             :   };
     384             : 
     385             :   /// Get the trace ensemble representing the given trace selection strategy.
     386             :   /// The returned Ensemble object is owned by the MachineTraceMetrics analysis,
     387             :   /// and valid for the lifetime of the analysis pass.
     388             :   Ensemble *getEnsemble(Strategy);
     389             : 
     390             :   /// Invalidate cached information about MBB. This must be called *before* MBB
     391             :   /// is erased, or the CFG is otherwise changed.
     392             :   ///
     393             :   /// This invalidates per-block information about resource usage for MBB only,
     394             :   /// and it invalidates per-trace information for any trace that passes
     395             :   /// through MBB.
     396             :   ///
     397             :   /// Call Ensemble::getTrace() again to update any trace handles.
     398             :   void invalidate(const MachineBasicBlock *MBB);
     399             : 
     400             : private:
     401             :   // One entry per basic block, indexed by block number.
     402             :   SmallVector<FixedBlockInfo, 4> BlockInfo;
     403             : 
     404             :   // Cycles consumed on each processor resource per block.
     405             :   // The number of processor resource kinds is constant for a given subtarget,
     406             :   // but it is not known at compile time. The number of cycles consumed by
     407             :   // block B on processor resource R is at ProcResourceCycles[B*Kinds + R]
     408             :   // where Kinds = SchedModel.getNumProcResourceKinds().
     409             :   SmallVector<unsigned, 0> ProcResourceCycles;
     410             : 
     411             :   // One ensemble per strategy.
     412             :   Ensemble* Ensembles[TS_NumStrategies];
     413             : 
     414             :   // Convert scaled resource usage to a cycle count that can be compared with
     415             :   // latencies.
     416             :   unsigned getCycles(unsigned Scaled) {
     417         656 :     unsigned Factor = SchedModel.getLatencyFactor();
     418         656 :     return (Scaled + Factor - 1) / Factor;
     419             :   }
     420             : };
     421             : 
     422             : inline raw_ostream &operator<<(raw_ostream &OS,
     423             :                                const MachineTraceMetrics::Trace &Tr) {
     424             :   Tr.print(OS);
     425             :   return OS;
     426             : }
     427             : 
     428             : inline raw_ostream &operator<<(raw_ostream &OS,
     429             :                                const MachineTraceMetrics::Ensemble &En) {
     430             :   En.print(OS);
     431             :   return OS;
     432             : }
     433             : 
     434             : } // end namespace llvm
     435             : 
     436             : #endif // LLVM_CODEGEN_MACHINETRACEMETRICS_H

Generated by: LCOV version 1.13