LCOV - code coverage report
Current view: top level - lib/Target/AMDGPU - SIMachineScheduler.h (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 45 62 72.6 %
Date: 2018-10-20 13:21:21 Functions: 9 25 36.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===-- SIMachineScheduler.h - SI Scheduler Interface -----------*- C++ -*-===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : /// \file
      11             : /// SI Machine Scheduler interface
      12             : //
      13             : //===----------------------------------------------------------------------===//
      14             : 
      15             : #ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H
      16             : #define LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H
      17             : 
      18             : #include "SIInstrInfo.h"
      19             : #include "llvm/CodeGen/MachineBasicBlock.h"
      20             : #include "llvm/CodeGen/MachineScheduler.h"
      21             : #include "llvm/CodeGen/RegisterPressure.h"
      22             : #include "llvm/CodeGen/ScheduleDAG.h"
      23             : #include <cassert>
      24             : #include <cstdint>
      25             : #include <map>
      26             : #include <memory>
      27             : #include <set>
      28             : #include <vector>
      29             : 
      30             : namespace llvm {
      31             : 
      32             : enum SIScheduleCandReason {
      33             :   NoCand,
      34             :   RegUsage,
      35             :   Latency,
      36             :   Successor,
      37             :   Depth,
      38             :   NodeOrder
      39             : };
      40             : 
      41             : struct SISchedulerCandidate {
      42             :   // The reason for this candidate.
      43             :   SIScheduleCandReason Reason = NoCand;
      44             : 
      45             :   // Set of reasons that apply to multiple candidates.
      46             :   uint32_t RepeatReasonSet = 0;
      47             : 
      48          25 :   SISchedulerCandidate() = default;
      49             : 
      50             :   bool isRepeat(SIScheduleCandReason R) { return RepeatReasonSet & (1 << R); }
      51          15 :   void setRepeat(SIScheduleCandReason R) { RepeatReasonSet |= (1 << R); }
      52             : };
      53             : 
      54             : class SIScheduleDAGMI;
      55             : class SIScheduleBlockCreator;
      56             : 
      57             : enum SIScheduleBlockLinkKind {
      58             :   NoData,
      59             :   Data
      60             : };
      61             : 
      62             : class SIScheduleBlock {
      63             :   SIScheduleDAGMI *DAG;
      64             :   SIScheduleBlockCreator *BC;
      65             : 
      66             :   std::vector<SUnit*> SUnits;
      67             :   std::map<unsigned, unsigned> NodeNum2Index;
      68             :   std::vector<SUnit*> TopReadySUs;
      69             :   std::vector<SUnit*> ScheduledSUnits;
      70             : 
      71             :   /// The top of the unscheduled zone.
      72             :   IntervalPressure TopPressure;
      73             :   RegPressureTracker TopRPTracker;
      74             : 
      75             :   // Pressure: number of said class of registers needed to
      76             :   // store the live virtual and real registers.
      77             :   // We do care only of SGPR32 and VGPR32 and do track only virtual registers.
      78             :   // Pressure of additional registers required inside the block.
      79             :   std::vector<unsigned> InternalAdditionnalPressure;
      80             :   // Pressure of input and output registers
      81             :   std::vector<unsigned> LiveInPressure;
      82             :   std::vector<unsigned> LiveOutPressure;
      83             :   // Registers required by the block, and outputs.
      84             :   // We do track only virtual registers.
      85             :   // Note that some registers are not 32 bits,
      86             :   // and thus the pressure is not equal
      87             :   // to the number of live registers.
      88             :   std::set<unsigned> LiveInRegs;
      89             :   std::set<unsigned> LiveOutRegs;
      90             : 
      91             :   bool Scheduled = false;
      92             :   bool HighLatencyBlock = false;
      93             : 
      94             :   std::vector<unsigned> HasLowLatencyNonWaitedParent;
      95             : 
      96             :   // Unique ID, the index of the Block in the SIScheduleDAGMI Blocks table.
      97             :   unsigned ID;
      98             : 
      99             :   std::vector<SIScheduleBlock*> Preds;  // All blocks predecessors.
     100             :   // All blocks successors, and the kind of link
     101             :   std::vector<std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind>> Succs;
     102             :   unsigned NumHighLatencySuccessors = 0;
     103             : 
     104             : public:
     105           5 :   SIScheduleBlock(SIScheduleDAGMI *DAG, SIScheduleBlockCreator *BC,
     106           5 :                   unsigned ID):
     107          20 :     DAG(DAG), BC(BC), TopRPTracker(TopPressure), ID(ID) {}
     108             : 
     109          10 :   ~SIScheduleBlock() = default;
     110             : 
     111           0 :   unsigned getID() const { return ID; }
     112             : 
     113             :   /// Functions for Block construction.
     114             :   void addUnit(SUnit *SU);
     115             : 
     116             :   // When all SUs have been added.
     117             :   void finalizeUnits();
     118             : 
     119             :   // Add block pred, which has instruction predecessor of SU.
     120             :   void addPred(SIScheduleBlock *Pred);
     121             :   void addSucc(SIScheduleBlock *Succ, SIScheduleBlockLinkKind Kind);
     122             : 
     123             :   const std::vector<SIScheduleBlock*>& getPreds() const { return Preds; }
     124             :   ArrayRef<std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind>>
     125             :     getSuccs() const { return Succs; }
     126             : 
     127             :   unsigned Height;  // Maximum topdown path length to block without outputs
     128             :   unsigned Depth;   // Maximum bottomup path length to block without inputs
     129             : 
     130           0 :   unsigned getNumHighLatencySuccessors() const {
     131           0 :     return NumHighLatencySuccessors;
     132             :   }
     133             : 
     134           0 :   bool isHighLatencyBlock() { return HighLatencyBlock; }
     135             : 
     136             :   // This is approximative.
     137             :   // Ideally should take into accounts some instructions (rcp, etc)
     138             :   // are 4 times slower.
     139           8 :   int getCost() { return SUnits.size(); }
     140             : 
     141             :   // The block Predecessors and Successors must be all registered
     142             :   // before fastSchedule().
     143             :   // Fast schedule with no particular requirement.
     144             :   void fastSchedule();
     145             : 
     146          15 :   std::vector<SUnit*> getScheduledUnits() { return ScheduledSUnits; }
     147             : 
     148             :   // Complete schedule that will try to minimize reg pressure and
     149             :   // low latencies, and will fill liveins and liveouts.
     150             :   // Needs all MIs to be grouped between BeginBlock and EndBlock.
     151             :   // The MIs can be moved after the scheduling,
     152             :   // it is just used to allow correct track of live registers.
     153             :   void schedule(MachineBasicBlock::iterator BeginBlock,
     154             :                 MachineBasicBlock::iterator EndBlock);
     155             : 
     156             :   bool isScheduled() { return Scheduled; }
     157             : 
     158             :   // Needs the block to be scheduled inside
     159             :   // TODO: find a way to compute it.
     160             :   std::vector<unsigned> &getInternalAdditionnalRegUsage() {
     161             :     return InternalAdditionnalPressure;
     162             :   }
     163             : 
     164           5 :   std::set<unsigned> &getInRegs() { return LiveInRegs; }
     165          10 :   std::set<unsigned> &getOutRegs() { return LiveOutRegs; }
     166             : 
     167             :   void printDebug(bool Full);
     168             : 
     169             : private:
     170             :   struct SISchedCandidate : SISchedulerCandidate {
     171             :     // The best SUnit candidate.
     172             :     SUnit *SU = nullptr;
     173             : 
     174             :     unsigned SGPRUsage;
     175             :     unsigned VGPRUsage;
     176             :     bool IsLowLatency;
     177             :     unsigned LowLatencyOffset;
     178             :     bool HasLowLatencyNonWaitedParent;
     179             : 
     180          16 :     SISchedCandidate() = default;
     181             : 
     182           0 :     bool isValid() const { return SU; }
     183             : 
     184             :     // Copy the status of another candidate without changing policy.
     185             :     void setBest(SISchedCandidate &Best) {
     186             :       assert(Best.Reason != NoCand && "uninitialized Sched candidate");
     187          19 :       SU = Best.SU;
     188          19 :       Reason = Best.Reason;
     189          19 :       SGPRUsage = Best.SGPRUsage;
     190          19 :       VGPRUsage = Best.VGPRUsage;
     191          19 :       IsLowLatency = Best.IsLowLatency;
     192          19 :       LowLatencyOffset = Best.LowLatencyOffset;
     193          19 :       HasLowLatencyNonWaitedParent = Best.HasLowLatencyNonWaitedParent;
     194             :     }
     195             :   };
     196             : 
     197             :   void undoSchedule();
     198             : 
     199             :   void undoReleaseSucc(SUnit *SU, SDep *SuccEdge);
     200             :   void releaseSucc(SUnit *SU, SDep *SuccEdge);
     201             :   // InOrOutBlock: restrict to links pointing inside the block (true),
     202             :   // or restrict to links pointing outside the block (false).
     203             :   void releaseSuccessors(SUnit *SU, bool InOrOutBlock);
     204             : 
     205             :   void nodeScheduled(SUnit *SU);
     206             :   void tryCandidateTopDown(SISchedCandidate &Cand, SISchedCandidate &TryCand);
     207             :   void tryCandidateBottomUp(SISchedCandidate &Cand, SISchedCandidate &TryCand);
     208             :   SUnit* pickNode();
     209             :   void traceCandidate(const SISchedCandidate &Cand);
     210             :   void initRegPressure(MachineBasicBlock::iterator BeginBlock,
     211             :                        MachineBasicBlock::iterator EndBlock);
     212             : };
     213             : 
     214             : struct SIScheduleBlocks {
     215             :   std::vector<SIScheduleBlock*> Blocks;
     216             :   std::vector<int> TopDownIndex2Block;
     217             :   std::vector<int> TopDownBlock2Index;
     218             : };
     219             : 
     220             : enum SISchedulerBlockCreatorVariant {
     221             :   LatenciesAlone,
     222             :   LatenciesGrouped,
     223             :   LatenciesAlonePlusConsecutive
     224             : };
     225             : 
     226           4 : class SIScheduleBlockCreator {
     227             :   SIScheduleDAGMI *DAG;
     228             :   // unique_ptr handles freeing memory for us.
     229             :   std::vector<std::unique_ptr<SIScheduleBlock>> BlockPtrs;
     230             :   std::map<SISchedulerBlockCreatorVariant,
     231             :            SIScheduleBlocks> Blocks;
     232             :   std::vector<SIScheduleBlock*> CurrentBlocks;
     233             :   std::vector<int> Node2CurrentBlock;
     234             : 
     235             :   // Topological sort
     236             :   // Maps topological index to the node number.
     237             :   std::vector<int> TopDownIndex2Block;
     238             :   std::vector<int> TopDownBlock2Index;
     239             :   std::vector<int> BottomUpIndex2Block;
     240             : 
     241             :   // 0 -> Color not given.
     242             :   // 1 to SUnits.size() -> Reserved group (you should only add elements to them).
     243             :   // Above -> Other groups.
     244             :   int NextReservedID;
     245             :   int NextNonReservedID;
     246             :   std::vector<int> CurrentColoring;
     247             :   std::vector<int> CurrentTopDownReservedDependencyColoring;
     248             :   std::vector<int> CurrentBottomUpReservedDependencyColoring;
     249             : 
     250             : public:
     251             :   SIScheduleBlockCreator(SIScheduleDAGMI *DAG);
     252             :   ~SIScheduleBlockCreator();
     253             : 
     254             :   SIScheduleBlocks
     255             :   getBlocks(SISchedulerBlockCreatorVariant BlockVariant);
     256             : 
     257             :   bool isSUInBlock(SUnit *SU, unsigned ID);
     258             : 
     259             : private:
     260             :   // Give a Reserved color to every high latency.
     261             :   void colorHighLatenciesAlone();
     262             : 
     263             :   // Create groups of high latencies with a Reserved color.
     264             :   void colorHighLatenciesGroups();
     265             : 
     266             :   // Compute coloring for topdown and bottom traversals with
     267             :   // different colors depending on dependencies on Reserved colors.
     268             :   void colorComputeReservedDependencies();
     269             : 
     270             :   // Give color to all non-colored SUs according to Reserved groups dependencies.
     271             :   void colorAccordingToReservedDependencies();
     272             : 
     273             :   // Divides Blocks having no bottom up or top down dependencies on Reserved groups.
     274             :   // The new colors are computed according to the dependencies on the other blocks
     275             :   // formed with colorAccordingToReservedDependencies.
     276             :   void colorEndsAccordingToDependencies();
     277             : 
     278             :   // Cut groups into groups with SUs in consecutive order (except for Reserved groups).
     279             :   void colorForceConsecutiveOrderInGroup();
     280             : 
     281             :   // Merge Constant loads that have all their users into another group to the group.
     282             :   // (TODO: else if all their users depend on the same group, put them there)
     283             :   void colorMergeConstantLoadsNextGroup();
     284             : 
     285             :   // Merge SUs that have all their users into another group to the group
     286             :   void colorMergeIfPossibleNextGroup();
     287             : 
     288             :   // Merge SUs that have all their users into another group to the group,
     289             :   // but only for Reserved groups.
     290             :   void colorMergeIfPossibleNextGroupOnlyForReserved();
     291             : 
     292             :   // Merge SUs that have all their users into another group to the group,
     293             :   // but only if the group is no more than a few SUs.
     294             :   void colorMergeIfPossibleSmallGroupsToNextGroup();
     295             : 
     296             :   // Divides Blocks with important size.
     297             :   // Idea of implementation: attribute new colors depending on topdown and
     298             :   // bottom up links to other blocks.
     299             :   void cutHugeBlocks();
     300             : 
     301             :   // Put in one group all instructions with no users in this scheduling region
     302             :   // (we'd want these groups be at the end).
     303             :   void regroupNoUserInstructions();
     304             : 
     305             :   // Give Reserved color to export instructions
     306             :   void colorExports();
     307             : 
     308             :   void createBlocksForVariant(SISchedulerBlockCreatorVariant BlockVariant);
     309             : 
     310             :   void topologicalSort();
     311             : 
     312             :   void scheduleInsideBlocks();
     313             : 
     314             :   void fillStats();
     315             : };
     316             : 
     317             : enum SISchedulerBlockSchedulerVariant {
     318             :   BlockLatencyRegUsage,
     319             :   BlockRegUsageLatency,
     320             :   BlockRegUsage
     321             : };
     322             : 
     323             : class SIScheduleBlockScheduler {
     324             :   SIScheduleDAGMI *DAG;
     325             :   SISchedulerBlockSchedulerVariant Variant;
     326             :   std::vector<SIScheduleBlock*> Blocks;
     327             : 
     328             :   std::vector<std::map<unsigned, unsigned>> LiveOutRegsNumUsages;
     329             :   std::set<unsigned> LiveRegs;
     330             :   // Num of schedulable unscheduled blocks reading the register.
     331             :   std::map<unsigned, unsigned> LiveRegsConsumers;
     332             : 
     333             :   std::vector<unsigned> LastPosHighLatencyParentScheduled;
     334             :   int LastPosWaitedHighLatency;
     335             : 
     336             :   std::vector<SIScheduleBlock*> BlocksScheduled;
     337             :   unsigned NumBlockScheduled;
     338             :   std::vector<SIScheduleBlock*> ReadyBlocks;
     339             : 
     340             :   unsigned VregCurrentUsage;
     341             :   unsigned SregCurrentUsage;
     342             : 
     343             :   // Currently is only approximation.
     344             :   unsigned maxVregUsage;
     345             :   unsigned maxSregUsage;
     346             : 
     347             :   std::vector<unsigned> BlockNumPredsLeft;
     348             :   std::vector<unsigned> BlockNumSuccsLeft;
     349             : 
     350             : public:
     351             :   SIScheduleBlockScheduler(SIScheduleDAGMI *DAG,
     352             :                            SISchedulerBlockSchedulerVariant Variant,
     353             :                            SIScheduleBlocks BlocksStruct);
     354           4 :   ~SIScheduleBlockScheduler() = default;
     355             : 
     356           2 :   std::vector<SIScheduleBlock*> getBlocks() { return BlocksScheduled; }
     357             : 
     358           0 :   unsigned getVGPRUsage() { return maxVregUsage; }
     359           0 :   unsigned getSGPRUsage() { return maxSregUsage; }
     360             : 
     361             : private:
     362             :   struct SIBlockSchedCandidate : SISchedulerCandidate {
     363             :     // The best Block candidate.
     364             :     SIScheduleBlock *Block = nullptr;
     365             : 
     366             :     bool IsHighLatency;
     367             :     int VGPRUsageDiff;
     368             :     unsigned NumSuccessors;
     369             :     unsigned NumHighLatencySuccessors;
     370             :     unsigned LastPosHighLatParentScheduled;
     371             :     unsigned Height;
     372             : 
     373          12 :     SIBlockSchedCandidate() = default;
     374             : 
     375           0 :     bool isValid() const { return Block; }
     376             : 
     377             :     // Copy the status of another candidate without changing policy.
     378             :     void setBest(SIBlockSchedCandidate &Best) {
     379             :       assert(Best.Reason != NoCand && "uninitialized Sched candidate");
     380           5 :       Block = Best.Block;
     381           5 :       Reason = Best.Reason;
     382           5 :       IsHighLatency = Best.IsHighLatency;
     383           5 :       VGPRUsageDiff = Best.VGPRUsageDiff;
     384           5 :       NumSuccessors = Best.NumSuccessors;
     385           5 :       NumHighLatencySuccessors = Best.NumHighLatencySuccessors;
     386           5 :       LastPosHighLatParentScheduled = Best.LastPosHighLatParentScheduled;
     387           5 :       Height = Best.Height;
     388             :     }
     389             :   };
     390             : 
     391             :   bool tryCandidateLatency(SIBlockSchedCandidate &Cand,
     392             :                            SIBlockSchedCandidate &TryCand);
     393             :   bool tryCandidateRegUsage(SIBlockSchedCandidate &Cand,
     394             :                             SIBlockSchedCandidate &TryCand);
     395             :   SIScheduleBlock *pickBlock();
     396             : 
     397             :   void addLiveRegs(std::set<unsigned> &Regs);
     398             :   void decreaseLiveRegs(SIScheduleBlock *Block, std::set<unsigned> &Regs);
     399             :   void releaseBlockSuccs(SIScheduleBlock *Parent);
     400             :   void blockScheduled(SIScheduleBlock *Block);
     401             : 
     402             :   // Check register pressure change
     403             :   // by scheduling a block with these LiveIn and LiveOut.
     404             :   std::vector<int> checkRegUsageImpact(std::set<unsigned> &InRegs,
     405             :                                        std::set<unsigned> &OutRegs);
     406             : 
     407             :   void schedule();
     408             : };
     409             : 
     410           6 : struct SIScheduleBlockResult {
     411             :   std::vector<unsigned> SUs;
     412             :   unsigned MaxSGPRUsage;
     413             :   unsigned MaxVGPRUsage;
     414             : };
     415             : 
     416             : class SIScheduler {
     417             :   SIScheduleDAGMI *DAG;
     418             :   SIScheduleBlockCreator BlockCreator;
     419             : 
     420             : public:
     421           2 :   SIScheduler(SIScheduleDAGMI *DAG) : DAG(DAG), BlockCreator(DAG) {}
     422             : 
     423           2 :   ~SIScheduler() = default;
     424             : 
     425             :   struct SIScheduleBlockResult
     426             :   scheduleVariant(SISchedulerBlockCreatorVariant BlockVariant,
     427             :                   SISchedulerBlockSchedulerVariant ScheduleVariant);
     428             : };
     429             : 
     430           3 : class SIScheduleDAGMI final : public ScheduleDAGMILive {
     431             :   const SIInstrInfo *SITII;
     432             :   const SIRegisterInfo *SITRI;
     433             : 
     434             :   std::vector<SUnit> SUnitsLinksBackup;
     435             : 
     436             :   // For moveLowLatencies. After all Scheduling variants are tested.
     437             :   std::vector<unsigned> ScheduledSUnits;
     438             :   std::vector<unsigned> ScheduledSUnitsInv;
     439             : 
     440             :   unsigned VGPRSetID;
     441             :   unsigned SGPRSetID;
     442             : 
     443             : public:
     444             :   SIScheduleDAGMI(MachineSchedContext *C);
     445             : 
     446             :   ~SIScheduleDAGMI() override;
     447             : 
     448             :   // Entry point for the schedule.
     449             :   void schedule() override;
     450             : 
     451             :   // To init Block's RPTracker.
     452          15 :   void initRPTracker(RegPressureTracker &RPTracker) {
     453          15 :     RPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin, false, false);
     454          15 :   }
     455             : 
     456           0 :   MachineBasicBlock *getBB() { return BB; }
     457           0 :   MachineBasicBlock::iterator getCurrentTop() { return CurrentTop; }
     458           0 :   MachineBasicBlock::iterator getCurrentBottom() { return CurrentBottom; }
     459           0 :   LiveIntervals *getLIS() { return LIS; }
     460           0 :   MachineRegisterInfo *getMRI() { return &MRI; }
     461           0 :   const TargetRegisterInfo *getTRI() { return TRI; }
     462           0 :   ScheduleDAGTopologicalSort *GetTopo() { return &Topo; }
     463             :   SUnit& getEntrySU() { return EntrySU; }
     464             :   SUnit& getExitSU() { return ExitSU; }
     465             : 
     466             :   void restoreSULinksLeft();
     467             : 
     468             :   template<typename _Iterator> void fillVgprSgprCost(_Iterator First,
     469             :                                                      _Iterator End,
     470             :                                                      unsigned &VgprUsage,
     471             :                                                      unsigned &SgprUsage);
     472             : 
     473           2 :   std::set<unsigned> getInRegs() {
     474             :     std::set<unsigned> InRegs;
     475          13 :     for (const auto &RegMaskPair : RPTracker.getPressure().LiveInRegs) {
     476          11 :       InRegs.insert(RegMaskPair.RegUnit);
     477             :     }
     478           2 :     return InRegs;
     479             :   }
     480             : 
     481           2 :   std::set<unsigned> getOutRegs() {
     482             :     std::set<unsigned> OutRegs;
     483           5 :     for (const auto &RegMaskPair : RPTracker.getPressure().LiveOutRegs) {
     484           3 :       OutRegs.insert(RegMaskPair.RegUnit);
     485             :     }
     486           2 :     return OutRegs;
     487             :   };
     488             : 
     489           0 :   unsigned getVGPRSetID() const { return VGPRSetID; }
     490           0 :   unsigned getSGPRSetID() const { return SGPRSetID; }
     491             : 
     492             : private:
     493             :   void topologicalSort();
     494             :   // After scheduling is done, improve low latency placements.
     495             :   void moveLowLatencies();
     496             : 
     497             : public:
     498             :   // Some stats for scheduling inside blocks.
     499             :   std::vector<unsigned> IsLowLatencySU;
     500             :   std::vector<unsigned> LowLatencyOffset;
     501             :   std::vector<unsigned> IsHighLatencySU;
     502             :   // Topological sort
     503             :   // Maps topological index to the node number.
     504             :   std::vector<int> TopDownIndex2SU;
     505             :   std::vector<int> BottomUpIndex2SU;
     506             : };
     507             : 
     508             : } // end namespace llvm
     509             : 
     510             : #endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H

Generated by: LCOV version 1.13