LCOV - code coverage report
Current view: top level - include/llvm/MC - MCSchedule.h (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 9 9 100.0 %
Date: 2018-07-13 00:08:38 Functions: 0 0 -
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===-- llvm/MC/MCSchedule.h - Scheduling -----------------------*- C++ -*-===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : // This file defines the classes used to describe a subtarget's machine model
      11             : // for scheduling and other instruction cost heuristics.
      12             : //
      13             : //===----------------------------------------------------------------------===//
      14             : 
      15             : #ifndef LLVM_MC_MCSCHEDULE_H
      16             : #define LLVM_MC_MCSCHEDULE_H
      17             : 
      18             : #include "llvm/ADT/Optional.h"
      19             : #include "llvm/Config/llvm-config.h"
      20             : #include "llvm/Support/DataTypes.h"
      21             : #include <cassert>
      22             : 
      23             : namespace llvm {
      24             : 
      25             : struct InstrItinerary;
      26             : class MCSubtargetInfo;
      27             : class MCInstrInfo;
      28             : class MCInst;
      29             : class InstrItineraryData;
      30             : 
      31             : /// Define a kind of processor resource that will be modeled by the scheduler.
      32             : struct MCProcResourceDesc {
      33             :   const char *Name;
      34             :   unsigned NumUnits; // Number of resource of this kind
      35             :   unsigned SuperIdx; // Index of the resources kind that contains this kind.
      36             : 
      37             :   // Number of resources that may be buffered.
      38             :   //
      39             :   // Buffered resources (BufferSize != 0) may be consumed at some indeterminate
      40             :   // cycle after dispatch. This should be used for out-of-order cpus when
      41             :   // instructions that use this resource can be buffered in a reservaton
      42             :   // station.
      43             :   //
      44             :   // Unbuffered resources (BufferSize == 0) always consume their resource some
      45             :   // fixed number of cycles after dispatch. If a resource is unbuffered, then
      46             :   // the scheduler will avoid scheduling instructions with conflicting resources
      47             :   // in the same cycle. This is for in-order cpus, or the in-order portion of
      48             :   // an out-of-order cpus.
      49             :   int BufferSize;
      50             : 
      51             :   // If the resource has sub-units, a pointer to the first element of an array
      52             :   // of `NumUnits` elements containing the ProcResourceIdx of the sub units.
      53             :   // nullptr if the resource does not have sub-units.
      54             :   const unsigned *SubUnitsIdxBegin;
      55             : 
      56             :   bool operator==(const MCProcResourceDesc &Other) const {
      57             :     return NumUnits == Other.NumUnits && SuperIdx == Other.SuperIdx
      58             :       && BufferSize == Other.BufferSize;
      59             :   }
      60             : };
      61             : 
      62             : /// Identify one of the processor resource kinds consumed by a particular
      63             : /// scheduling class for the specified number of cycles.
      64             : struct MCWriteProcResEntry {
      65             :   uint16_t ProcResourceIdx;
      66             :   uint16_t Cycles;
      67             : 
      68             :   bool operator==(const MCWriteProcResEntry &Other) const {
      69    14873165 :     return ProcResourceIdx == Other.ProcResourceIdx && Cycles == Other.Cycles;
      70             :   }
      71             : };
      72             : 
      73             : /// Specify the latency in cpu cycles for a particular scheduling class and def
      74             : /// index. -1 indicates an invalid latency. Heuristics would typically consider
      75             : /// an instruction with invalid latency to have infinite latency.  Also identify
      76             : /// the WriteResources of this def. When the operand expands to a sequence of
      77             : /// writes, this ID is the last write in the sequence.
      78             : struct MCWriteLatencyEntry {
      79             :   int16_t Cycles;
      80             :   uint16_t WriteResourceID;
      81             : 
      82             :   bool operator==(const MCWriteLatencyEntry &Other) const {
      83     1497559 :     return Cycles == Other.Cycles && WriteResourceID == Other.WriteResourceID;
      84             :   }
      85             : };
      86             : 
      87             : /// Specify the number of cycles allowed after instruction issue before a
      88             : /// particular use operand reads its registers. This effectively reduces the
      89             : /// write's latency. Here we allow negative cycles for corner cases where
      90             : /// latency increases. This rule only applies when the entry's WriteResource
      91             : /// matches the write's WriteResource.
      92             : ///
      93             : /// MCReadAdvanceEntries are sorted first by operand index (UseIdx), then by
      94             : /// WriteResourceIdx.
      95             : struct MCReadAdvanceEntry {
      96             :   unsigned UseIdx;
      97             :   unsigned WriteResourceID;
      98             :   int Cycles;
      99             : 
     100             :   bool operator==(const MCReadAdvanceEntry &Other) const {
     101      254961 :     return UseIdx == Other.UseIdx && WriteResourceID == Other.WriteResourceID
     102      187418 :       && Cycles == Other.Cycles;
     103             :   }
     104             : };
     105             : 
     106             : /// Summarize the scheduling resources required for an instruction of a
     107             : /// particular scheduling class.
     108             : ///
     109             : /// Defined as an aggregate struct for creating tables with initializer lists.
     110             : struct MCSchedClassDesc {
     111             :   static const unsigned short InvalidNumMicroOps = (1U << 14) - 1;
     112             :   static const unsigned short VariantNumMicroOps = InvalidNumMicroOps - 1;
     113             : 
     114             : #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
     115             :   const char* Name;
     116             : #endif
     117             :   uint16_t NumMicroOps : 14;
     118             :   bool     BeginGroup : 1;
     119             :   bool     EndGroup : 1;
     120             :   uint16_t WriteProcResIdx; // First index into WriteProcResTable.
     121             :   uint16_t NumWriteProcResEntries;
     122             :   uint16_t WriteLatencyIdx; // First index into WriteLatencyTable.
     123             :   uint16_t NumWriteLatencyEntries;
     124             :   uint16_t ReadAdvanceIdx; // First index into ReadAdvanceTable.
     125             :   uint16_t NumReadAdvanceEntries;
     126             : 
     127             :   bool isValid() const {
     128    25534748 :     return NumMicroOps != InvalidNumMicroOps;
     129             :   }
     130             :   bool isVariant() const {
     131    14836289 :     return NumMicroOps == VariantNumMicroOps;
     132             :   }
     133             : };
     134             : 
     135             : /// Specify the cost of a register definition in terms of number of physical
     136             : /// register allocated at register renaming stage. For example, AMD Jaguar.
     137             : /// natively supports 128-bit data types, and operations on 256-bit registers
     138             : /// (i.e. YMM registers) are internally split into two COPs (complex operations)
     139             : /// and each COP updates a physical register. Basically, on Jaguar, a YMM
     140             : /// register write effectively consumes two physical registers. That means,
     141             : /// the cost of a YMM write in the BtVer2 model is 2.
     142             : struct MCRegisterCostEntry {
     143             :   unsigned RegisterClassID;
     144             :   unsigned Cost;
     145             : };
     146             : 
     147             : /// A register file descriptor.
     148             : ///
     149             : /// This struct allows to describe processor register files. In particular, it
     150             : /// helps describing the size of the register file, as well as the cost of
     151             : /// allocating a register file at register renaming stage.
     152             : /// FIXME: this struct can be extended to provide information about the number
     153             : /// of read/write ports to the register file.  A value of zero for field
     154             : /// 'NumPhysRegs' means: this register file has an unbounded number of physical
     155             : /// registers.
     156             : struct MCRegisterFileDesc {
     157             :   const char *Name;
     158             :   uint16_t NumPhysRegs;
     159             :   uint16_t NumRegisterCostEntries;
     160             :   // Index of the first cost entry in MCExtraProcessorInfo::RegisterCostTable.
     161             :   uint16_t RegisterCostEntryIdx;
     162             : };
     163             : 
     164             : /// Provide extra details about the machine processor.
     165             : ///
     166             : /// This is a collection of "optional" processor information that is not
     167             : /// normally used by the LLVM machine schedulers, but that can be consumed by
     168             : /// external tools like llvm-mca to improve the quality of the peformance
     169             : /// analysis.
     170             : struct MCExtraProcessorInfo {
     171             :   // Actual size of the reorder buffer in hardware.
     172             :   unsigned ReorderBufferSize;
     173             :   // Number of instructions retired per cycle.
     174             :   unsigned MaxRetirePerCycle;
     175             :   const MCRegisterFileDesc *RegisterFiles;
     176             :   unsigned NumRegisterFiles;
     177             :   const MCRegisterCostEntry *RegisterCostTable;
     178             :   unsigned NumRegisterCostEntries;
     179             : 
     180             :   struct PfmCountersInfo {
     181             :     // An optional name of a performance counter that can be used to measure
     182             :     // cycles.
     183             :     const char *CycleCounter;
     184             : 
     185             :     // For each MCProcResourceDesc defined by the processor, an optional list of
     186             :     // names of performance counters that can be used to measure the resource
     187             :     // utilization.
     188             :     const char **IssueCounters;
     189             :   };
     190             :   PfmCountersInfo PfmCounters;
     191             : };
     192             : 
     193             : /// Machine model for scheduling, bundling, and heuristics.
     194             : ///
     195             : /// The machine model directly provides basic information about the
     196             : /// microarchitecture to the scheduler in the form of properties. It also
     197             : /// optionally refers to scheduler resource tables and itinerary
     198             : /// tables. Scheduler resource tables model the latency and cost for each
     199             : /// instruction type. Itinerary tables are an independent mechanism that
     200             : /// provides a detailed reservation table describing each cycle of instruction
     201             : /// execution. Subtargets may define any or all of the above categories of data
     202             : /// depending on the type of CPU and selected scheduler.
     203             : ///
     204             : /// The machine independent properties defined here are used by the scheduler as
     205             : /// an abstract machine model. A real micro-architecture has a number of
     206             : /// buffers, queues, and stages. Declaring that a given machine-independent
     207             : /// abstract property corresponds to a specific physical property across all
     208             : /// subtargets can't be done. Nonetheless, the abstract model is
     209             : /// useful. Futhermore, subtargets typically extend this model with processor
     210             : /// specific resources to model any hardware features that can be exploited by
     211             : /// sceduling heuristics and aren't sufficiently represented in the abstract.
     212             : ///
     213             : /// The abstract pipeline is built around the notion of an "issue point". This
     214             : /// is merely a reference point for counting machine cycles. The physical
     215             : /// machine will have pipeline stages that delay execution. The scheduler does
     216             : /// not model those delays because they are irrelevant as long as they are
     217             : /// consistent. Inaccuracies arise when instructions have different execution
     218             : /// delays relative to each other, in addition to their intrinsic latency. Those
     219             : /// special cases can be handled by TableGen constructs such as, ReadAdvance,
     220             : /// which reduces latency when reading data, and ResourceCycles, which consumes
     221             : /// a processor resource when writing data for a number of abstract
     222             : /// cycles.
     223             : ///
     224             : /// TODO: One tool currently missing is the ability to add a delay to
     225             : /// ResourceCycles. That would be easy to add and would likely cover all cases
     226             : /// currently handled by the legacy itinerary tables.
     227             : ///
     228             : /// A note on out-of-order execution and, more generally, instruction
     229             : /// buffers. Part of the CPU pipeline is always in-order. The issue point, which
     230             : /// is the point of reference for counting cycles, only makes sense as an
     231             : /// in-order part of the pipeline. Other parts of the pipeline are sometimes
     232             : /// falling behind and sometimes catching up. It's only interesting to model
     233             : /// those other, decoupled parts of the pipeline if they may be predictably
     234             : /// resource constrained in a way that the scheduler can exploit.
     235             : ///
     236             : /// The LLVM machine model distinguishes between in-order constraints and
     237             : /// out-of-order constraints so that the target's scheduling strategy can apply
     238             : /// appropriate heuristics. For a well-balanced CPU pipeline, out-of-order
     239             : /// resources would not typically be treated as a hard scheduling
     240             : /// constraint. For example, in the GenericScheduler, a delay caused by limited
     241             : /// out-of-order resources is not directly reflected in the number of cycles
     242             : /// that the scheduler sees between issuing an instruction and its dependent
     243             : /// instructions. In other words, out-of-order resources don't directly increase
     244             : /// the latency between pairs of instructions. However, they can still be used
     245             : /// to detect potential bottlenecks across a sequence of instructions and bias
     246             : /// the scheduling heuristics appropriately.
     247             : struct MCSchedModel {
     248             :   // IssueWidth is the maximum number of instructions that may be scheduled in
     249             :   // the same per-cycle group. This is meant to be a hard in-order constraint
     250             :   // (a.k.a. "hazard"). In the GenericScheduler strategy, no more than
     251             :   // IssueWidth micro-ops can ever be scheduled in a particular cycle.
     252             :   //
     253             :   // In practice, IssueWidth is useful to model any bottleneck between the
     254             :   // decoder (after micro-op expansion) and the out-of-order reservation
     255             :   // stations or the decoder bandwidth itself. If the total number of
     256             :   // reservation stations is also a bottleneck, or if any other pipeline stage
     257             :   // has a bandwidth limitation, then that can be naturally modeled by adding an
     258             :   // out-of-order processor resource.
     259             :   unsigned IssueWidth;
     260             :   static const unsigned DefaultIssueWidth = 1;
     261             : 
     262             :   // MicroOpBufferSize is the number of micro-ops that the processor may buffer
     263             :   // for out-of-order execution.
     264             :   //
     265             :   // "0" means operations that are not ready in this cycle are not considered
     266             :   // for scheduling (they go in the pending queue). Latency is paramount. This
     267             :   // may be more efficient if many instructions are pending in a schedule.
     268             :   //
     269             :   // "1" means all instructions are considered for scheduling regardless of
     270             :   // whether they are ready in this cycle. Latency still causes issue stalls,
     271             :   // but we balance those stalls against other heuristics.
     272             :   //
     273             :   // "> 1" means the processor is out-of-order. This is a machine independent
     274             :   // estimate of highly machine specific characteristics such as the register
     275             :   // renaming pool and reorder buffer.
     276             :   unsigned MicroOpBufferSize;
     277             :   static const unsigned DefaultMicroOpBufferSize = 0;
     278             : 
     279             :   // LoopMicroOpBufferSize is the number of micro-ops that the processor may
     280             :   // buffer for optimized loop execution. More generally, this represents the
     281             :   // optimal number of micro-ops in a loop body. A loop may be partially
     282             :   // unrolled to bring the count of micro-ops in the loop body closer to this
     283             :   // number.
     284             :   unsigned LoopMicroOpBufferSize;
     285             :   static const unsigned DefaultLoopMicroOpBufferSize = 0;
     286             : 
     287             :   // LoadLatency is the expected latency of load instructions.
     288             :   unsigned LoadLatency;
     289             :   static const unsigned DefaultLoadLatency = 4;
     290             : 
     291             :   // HighLatency is the expected latency of "very high latency" operations.
     292             :   // See TargetInstrInfo::isHighLatencyDef().
     293             :   // By default, this is set to an arbitrarily high number of cycles
     294             :   // likely to have some impact on scheduling heuristics.
     295             :   unsigned HighLatency;
     296             :   static const unsigned DefaultHighLatency = 10;
     297             : 
     298             :   // MispredictPenalty is the typical number of extra cycles the processor
     299             :   // takes to recover from a branch misprediction.
     300             :   unsigned MispredictPenalty;
     301             :   static const unsigned DefaultMispredictPenalty = 10;
     302             : 
     303             :   bool PostRAScheduler; // default value is false
     304             : 
     305             :   bool CompleteModel;
     306             : 
     307             :   unsigned ProcID;
     308             :   const MCProcResourceDesc *ProcResourceTable;
     309             :   const MCSchedClassDesc *SchedClassTable;
     310             :   unsigned NumProcResourceKinds;
     311             :   unsigned NumSchedClasses;
     312             :   // Instruction itinerary tables used by InstrItineraryData.
     313             :   friend class InstrItineraryData;
     314             :   const InstrItinerary *InstrItineraries;
     315             : 
     316             :   const MCExtraProcessorInfo *ExtraProcessorInfo;
     317             : 
     318             :   bool hasExtraProcessorInfo() const { return ExtraProcessorInfo; }
     319             : 
     320             :   unsigned getProcessorID() const { return ProcID; }
     321             : 
     322             :   /// Does this machine model include instruction-level scheduling.
     323             :   bool hasInstrSchedModel() const { return SchedClassTable; }
     324             : 
     325             :   const MCExtraProcessorInfo &getExtraProcessorInfo() const {
     326             :     assert(hasExtraProcessorInfo() &&
     327             :            "No extra information available for this model");
     328             :     return *ExtraProcessorInfo;
     329             :   }
     330             : 
     331             :   /// Return true if this machine model data for all instructions with a
     332             :   /// scheduling class (itinerary class or SchedRW list).
     333             :   bool isComplete() const { return CompleteModel; }
     334             : 
     335             :   /// Return true if machine supports out of order execution.
     336       16272 :   bool isOutOfOrder() const { return MicroOpBufferSize > 1; }
     337             : 
     338             :   unsigned getNumProcResourceKinds() const {
     339             :     return NumProcResourceKinds;
     340             :   }
     341             : 
     342             :   const MCProcResourceDesc *getProcResource(unsigned ProcResourceIdx) const {
     343             :     assert(hasInstrSchedModel() && "No scheduling machine model");
     344             : 
     345             :     assert(ProcResourceIdx < NumProcResourceKinds && "bad proc resource idx");
     346    14618730 :     return &ProcResourceTable[ProcResourceIdx];
     347             :   }
     348             : 
     349             :   const MCSchedClassDesc *getSchedClassDesc(unsigned SchedClassIdx) const {
     350             :     assert(hasInstrSchedModel() && "No scheduling machine model");
     351             : 
     352             :     assert(SchedClassIdx < NumSchedClasses && "bad scheduling class idx");
     353    15044025 :     return &SchedClassTable[SchedClassIdx];
     354             :   }
     355             : 
     356             :   /// Returns the latency value for the scheduling class.
     357             :   static int computeInstrLatency(const MCSubtargetInfo &STI,
     358             :                                  const MCSchedClassDesc &SCDesc);
     359             : 
     360             :   int computeInstrLatency(const MCSubtargetInfo &STI, unsigned SClass) const;
     361             :   int computeInstrLatency(const MCSubtargetInfo &STI, const MCInstrInfo &MCII,
     362             :                           const MCInst &Inst) const;
     363             : 
     364             :   // Returns the reciprocal throughput information from a MCSchedClassDesc.
     365             :   static double
     366             :   getReciprocalThroughput(const MCSubtargetInfo &STI,
     367             :                           const MCSchedClassDesc &SCDesc);
     368             : 
     369             :   static double
     370             :   getReciprocalThroughput(unsigned SchedClass, const InstrItineraryData &IID);
     371             : 
     372             :   double
     373             :   getReciprocalThroughput(const MCSubtargetInfo &STI, const MCInstrInfo &MCII,
     374             :                           const MCInst &Inst) const;
     375             : 
     376             :   /// Returns the default initialized model.
     377             :   static const MCSchedModel &GetDefaultSchedModel() { return Default; }
     378             :   static const MCSchedModel Default;
     379             : };
     380             : 
     381             : } // namespace llvm
     382             : 
     383             : #endif

Generated by: LCOV version 1.13