LCOV - code coverage report
Current view: top level - include/llvm/MC - MCSchedule.h (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 9 9 100.0 %
Date: 2018-05-20 00:06:23 Functions: 0 0 -
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===-- llvm/MC/MCSchedule.h - Scheduling -----------------------*- C++ -*-===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : // This file defines the classes used to describe a subtarget's machine model
      11             : // for scheduling and other instruction cost heuristics.
      12             : //
      13             : //===----------------------------------------------------------------------===//
      14             : 
      15             : #ifndef LLVM_MC_MCSCHEDULE_H
      16             : #define LLVM_MC_MCSCHEDULE_H
      17             : 
      18             : #include "llvm/ADT/Optional.h"
      19             : #include "llvm/Config/llvm-config.h"
      20             : #include "llvm/Support/DataTypes.h"
      21             : #include <cassert>
      22             : 
      23             : namespace llvm {
      24             : 
      25             : struct InstrItinerary;
      26             : class MCSubtargetInfo;
      27             : class MCInstrInfo;
      28             : class InstrItineraryData;
      29             : 
      30             : /// Define a kind of processor resource that will be modeled by the scheduler.
      31             : struct MCProcResourceDesc {
      32             :   const char *Name;
      33             :   unsigned NumUnits; // Number of resource of this kind
      34             :   unsigned SuperIdx; // Index of the resources kind that contains this kind.
      35             : 
      36             :   // Number of resources that may be buffered.
      37             :   //
      38             :   // Buffered resources (BufferSize != 0) may be consumed at some indeterminate
      39             :   // cycle after dispatch. This should be used for out-of-order cpus when
      40             :   // instructions that use this resource can be buffered in a reservaton
      41             :   // station.
      42             :   //
      43             :   // Unbuffered resources (BufferSize == 0) always consume their resource some
      44             :   // fixed number of cycles after dispatch. If a resource is unbuffered, then
      45             :   // the scheduler will avoid scheduling instructions with conflicting resources
      46             :   // in the same cycle. This is for in-order cpus, or the in-order portion of
      47             :   // an out-of-order cpus.
      48             :   int BufferSize;
      49             : 
      50             :   // If the resource has sub-units, a pointer to the first element of an array
      51             :   // of `NumUnits` elements containing the ProcResourceIdx of the sub units.
      52             :   // nullptr if the resource does not have sub-units.
      53             :   const unsigned *SubUnitsIdxBegin;
      54             : 
      55             :   bool operator==(const MCProcResourceDesc &Other) const {
      56             :     return NumUnits == Other.NumUnits && SuperIdx == Other.SuperIdx
      57             :       && BufferSize == Other.BufferSize;
      58             :   }
      59             : };
      60             : 
      61             : /// Identify one of the processor resource kinds consumed by a particular
      62             : /// scheduling class for the specified number of cycles.
      63             : struct MCWriteProcResEntry {
      64             :   uint16_t ProcResourceIdx;
      65             :   uint16_t Cycles;
      66             : 
      67             :   bool operator==(const MCWriteProcResEntry &Other) const {
      68    15007588 :     return ProcResourceIdx == Other.ProcResourceIdx && Cycles == Other.Cycles;
      69             :   }
      70             : };
      71             : 
      72             : /// Specify the latency in cpu cycles for a particular scheduling class and def
      73             : /// index. -1 indicates an invalid latency. Heuristics would typically consider
      74             : /// an instruction with invalid latency to have infinite latency.  Also identify
      75             : /// the WriteResources of this def. When the operand expands to a sequence of
      76             : /// writes, this ID is the last write in the sequence.
      77             : struct MCWriteLatencyEntry {
      78             :   int16_t Cycles;
      79             :   uint16_t WriteResourceID;
      80             : 
      81             :   bool operator==(const MCWriteLatencyEntry &Other) const {
      82     1504563 :     return Cycles == Other.Cycles && WriteResourceID == Other.WriteResourceID;
      83             :   }
      84             : };
      85             : 
      86             : /// Specify the number of cycles allowed after instruction issue before a
      87             : /// particular use operand reads its registers. This effectively reduces the
      88             : /// write's latency. Here we allow negative cycles for corner cases where
      89             : /// latency increases. This rule only applies when the entry's WriteResource
      90             : /// matches the write's WriteResource.
      91             : ///
      92             : /// MCReadAdvanceEntries are sorted first by operand index (UseIdx), then by
      93             : /// WriteResourceIdx.
      94             : struct MCReadAdvanceEntry {
      95             :   unsigned UseIdx;
      96             :   unsigned WriteResourceID;
      97             :   int Cycles;
      98             : 
      99             :   bool operator==(const MCReadAdvanceEntry &Other) const {
     100      255322 :     return UseIdx == Other.UseIdx && WriteResourceID == Other.WriteResourceID
     101      187779 :       && Cycles == Other.Cycles;
     102             :   }
     103             : };
     104             : 
     105             : /// Summarize the scheduling resources required for an instruction of a
     106             : /// particular scheduling class.
     107             : ///
     108             : /// Defined as an aggregate struct for creating tables with initializer lists.
     109             : struct MCSchedClassDesc {
     110             :   static const unsigned short InvalidNumMicroOps = (1U << 14) - 1;
     111             :   static const unsigned short VariantNumMicroOps = InvalidNumMicroOps - 1;
     112             : 
     113             : #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
     114             :   const char* Name;
     115             : #endif
     116             :   uint16_t NumMicroOps : 14;
     117             :   bool     BeginGroup : 1;
     118             :   bool     EndGroup : 1;
     119             :   uint16_t WriteProcResIdx; // First index into WriteProcResTable.
     120             :   uint16_t NumWriteProcResEntries;
     121             :   uint16_t WriteLatencyIdx; // First index into WriteLatencyTable.
     122             :   uint16_t NumWriteLatencyEntries;
     123             :   uint16_t ReadAdvanceIdx; // First index into ReadAdvanceTable.
     124             :   uint16_t NumReadAdvanceEntries;
     125             : 
     126             :   bool isValid() const {
     127    24902351 :     return NumMicroOps != InvalidNumMicroOps;
     128             :   }
     129             :   bool isVariant() const {
     130    14225492 :     return NumMicroOps == VariantNumMicroOps;
     131             :   }
     132             : };
     133             : 
     134             : /// Specify the cost of a register definition in terms of number of physical
     135             : /// register allocated at register renaming stage. For example, AMD Jaguar.
     136             : /// natively supports 128-bit data types, and operations on 256-bit registers
     137             : /// (i.e. YMM registers) are internally split into two COPs (complex operations)
     138             : /// and each COP updates a physical register. Basically, on Jaguar, a YMM
     139             : /// register write effectively consumes two physical registers. That means,
     140             : /// the cost of a YMM write in the BtVer2 model is 2.
     141             : struct MCRegisterCostEntry {
     142             :   unsigned RegisterClassID;
     143             :   unsigned Cost;
     144             : };
     145             : 
     146             : /// A register file descriptor.
     147             : ///
     148             : /// This struct allows to describe processor register files. In particular, it
     149             : /// helps describing the size of the register file, as well as the cost of
     150             : /// allocating a register file at register renaming stage.
     151             : /// FIXME: this struct can be extended to provide information about the number
     152             : /// of read/write ports to the register file.  A value of zero for field
     153             : /// 'NumPhysRegs' means: this register file has an unbounded number of physical
     154             : /// registers.
     155             : struct MCRegisterFileDesc {
     156             :   const char *Name;
     157             :   uint16_t NumPhysRegs;
     158             :   uint16_t NumRegisterCostEntries;
     159             :   // Index of the first cost entry in MCExtraProcessorInfo::RegisterCostTable.
     160             :   uint16_t RegisterCostEntryIdx;
     161             : };
     162             : 
     163             : /// Provide extra details about the machine processor.
     164             : ///
     165             : /// This is a collection of "optional" processor information that is not
     166             : /// normally used by the LLVM machine schedulers, but that can be consumed by
     167             : /// external tools like llvm-mca to improve the quality of the peformance
     168             : /// analysis.
     169             : struct MCExtraProcessorInfo {
     170             :   // Actual size of the reorder buffer in hardware.
     171             :   unsigned ReorderBufferSize;
     172             :   // Number of instructions retired per cycle.
     173             :   unsigned MaxRetirePerCycle;
     174             :   const MCRegisterFileDesc *RegisterFiles;
     175             :   unsigned NumRegisterFiles;
     176             :   const MCRegisterCostEntry *RegisterCostTable;
     177             :   unsigned NumRegisterCostEntries;
     178             : 
     179             :   struct PfmCountersInfo {
     180             :     // An optional name of a performance counter that can be used to measure
     181             :     // cycles.
     182             :     const char *CycleCounter;
     183             : 
     184             :     // For each MCProcResourceDesc defined by the processor, an optional list of
     185             :     // names of performance counters that can be used to measure the resource
     186             :     // utilization.
     187             :     const char **IssueCounters;
     188             :   };
     189             :   PfmCountersInfo PfmCounters;
     190             : };
     191             : 
     192             : /// Machine model for scheduling, bundling, and heuristics.
     193             : ///
     194             : /// The machine model directly provides basic information about the
     195             : /// microarchitecture to the scheduler in the form of properties. It also
     196             : /// optionally refers to scheduler resource tables and itinerary
     197             : /// tables. Scheduler resource tables model the latency and cost for each
     198             : /// instruction type. Itinerary tables are an independent mechanism that
     199             : /// provides a detailed reservation table describing each cycle of instruction
     200             : /// execution. Subtargets may define any or all of the above categories of data
     201             : /// depending on the type of CPU and selected scheduler.
     202             : struct MCSchedModel {
     203             :   // IssueWidth is the maximum number of instructions that may be scheduled in
     204             :   // the same per-cycle group.
     205             :   unsigned IssueWidth;
     206             :   static const unsigned DefaultIssueWidth = 1;
     207             : 
     208             :   // MicroOpBufferSize is the number of micro-ops that the processor may buffer
     209             :   // for out-of-order execution.
     210             :   //
     211             :   // "0" means operations that are not ready in this cycle are not considered
     212             :   // for scheduling (they go in the pending queue). Latency is paramount. This
     213             :   // may be more efficient if many instructions are pending in a schedule.
     214             :   //
     215             :   // "1" means all instructions are considered for scheduling regardless of
     216             :   // whether they are ready in this cycle. Latency still causes issue stalls,
     217             :   // but we balance those stalls against other heuristics.
     218             :   //
     219             :   // "> 1" means the processor is out-of-order. This is a machine independent
     220             :   // estimate of highly machine specific characteristics such as the register
     221             :   // renaming pool and reorder buffer.
     222             :   unsigned MicroOpBufferSize;
     223             :   static const unsigned DefaultMicroOpBufferSize = 0;
     224             : 
     225             :   // LoopMicroOpBufferSize is the number of micro-ops that the processor may
     226             :   // buffer for optimized loop execution. More generally, this represents the
     227             :   // optimal number of micro-ops in a loop body. A loop may be partially
     228             :   // unrolled to bring the count of micro-ops in the loop body closer to this
     229             :   // number.
     230             :   unsigned LoopMicroOpBufferSize;
     231             :   static const unsigned DefaultLoopMicroOpBufferSize = 0;
     232             : 
     233             :   // LoadLatency is the expected latency of load instructions.
     234             :   unsigned LoadLatency;
     235             :   static const unsigned DefaultLoadLatency = 4;
     236             : 
     237             :   // HighLatency is the expected latency of "very high latency" operations.
     238             :   // See TargetInstrInfo::isHighLatencyDef().
     239             :   // By default, this is set to an arbitrarily high number of cycles
     240             :   // likely to have some impact on scheduling heuristics.
     241             :   unsigned HighLatency;
     242             :   static const unsigned DefaultHighLatency = 10;
     243             : 
     244             :   // MispredictPenalty is the typical number of extra cycles the processor
     245             :   // takes to recover from a branch misprediction.
     246             :   unsigned MispredictPenalty;
     247             :   static const unsigned DefaultMispredictPenalty = 10;
     248             : 
     249             :   bool PostRAScheduler; // default value is false
     250             : 
     251             :   bool CompleteModel;
     252             : 
     253             :   unsigned ProcID;
     254             :   const MCProcResourceDesc *ProcResourceTable;
     255             :   const MCSchedClassDesc *SchedClassTable;
     256             :   unsigned NumProcResourceKinds;
     257             :   unsigned NumSchedClasses;
     258             :   // Instruction itinerary tables used by InstrItineraryData.
     259             :   friend class InstrItineraryData;
     260             :   const InstrItinerary *InstrItineraries;
     261             : 
     262             :   const MCExtraProcessorInfo *ExtraProcessorInfo;
     263             : 
     264             :   bool hasExtraProcessorInfo() const { return ExtraProcessorInfo; }
     265             : 
     266             :   unsigned getProcessorID() const { return ProcID; }
     267             : 
     268             :   /// Does this machine model include instruction-level scheduling.
     269             :   bool hasInstrSchedModel() const { return SchedClassTable; }
     270             : 
     271             :   const MCExtraProcessorInfo &getExtraProcessorInfo() const {
     272             :     assert(hasExtraProcessorInfo() &&
     273             :            "No extra information available for this model");
     274             :     return *ExtraProcessorInfo;
     275             :   }
     276             : 
     277             :   /// Return true if this machine model data for all instructions with a
     278             :   /// scheduling class (itinerary class or SchedRW list).
     279             :   bool isComplete() const { return CompleteModel; }
     280             : 
     281             :   /// Return true if machine supports out of order execution.
     282       16463 :   bool isOutOfOrder() const { return MicroOpBufferSize > 1; }
     283             : 
     284             :   unsigned getNumProcResourceKinds() const {
     285             :     return NumProcResourceKinds;
     286             :   }
     287             : 
     288             :   const MCProcResourceDesc *getProcResource(unsigned ProcResourceIdx) const {
     289             :     assert(hasInstrSchedModel() && "No scheduling machine model");
     290             : 
     291             :     assert(ProcResourceIdx < NumProcResourceKinds && "bad proc resource idx");
     292    15313140 :     return &ProcResourceTable[ProcResourceIdx];
     293             :   }
     294             : 
     295             :   const MCSchedClassDesc *getSchedClassDesc(unsigned SchedClassIdx) const {
     296             :     assert(hasInstrSchedModel() && "No scheduling machine model");
     297             : 
     298             :     assert(SchedClassIdx < NumSchedClasses && "bad scheduling class idx");
     299    14463246 :     return &SchedClassTable[SchedClassIdx];
     300             :   }
     301             : 
     302             :   /// Returns the latency value for the scheduling class.
     303             :   static int computeInstrLatency(const MCSubtargetInfo &STI,
     304             :                                  const MCSchedClassDesc &SCDesc);
     305             : 
     306             :   int computeInstrLatency(const MCSubtargetInfo &STI, unsigned SClass) const;
     307             : 
     308             :   // Returns the reciprocal throughput information from a MCSchedClassDesc.
     309             :   static Optional<double>
     310             :   getReciprocalThroughput(const MCSubtargetInfo &STI,
     311             :                           const MCSchedClassDesc &SCDesc);
     312             : 
     313             :   static Optional<double>
     314             :   getReciprocalThroughput(unsigned SchedClass, const InstrItineraryData &IID);
     315             : 
     316             :   /// Returns the default initialized model.
     317             :   static const MCSchedModel &GetDefaultSchedModel() { return Default; }
     318             :   static const MCSchedModel Default;
     319             : };
     320             : 
     321             : } // namespace llvm
     322             : 
     323             : #endif

Generated by: LCOV version 1.13