LCOV - code coverage report
Current view: top level - lib/Target/AMDGPU - GCNHazardRecognizer.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 248 251 98.8 %
Date: 2018-10-20 13:21:21 Functions: 28 29 96.6 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===-- GCNHazardRecognizers.cpp - GCN Hazard Recognizer Impls ------------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : // This file implements hazard recognizers for scheduling on GCN processors.
      11             : //
      12             : //===----------------------------------------------------------------------===//
      13             : 
      14             : #include "GCNHazardRecognizer.h"
      15             : #include "AMDGPUSubtarget.h"
      16             : #include "SIDefines.h"
      17             : #include "SIInstrInfo.h"
      18             : #include "SIRegisterInfo.h"
      19             : #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
      20             : #include "Utils/AMDGPUBaseInfo.h"
      21             : #include "llvm/ADT/iterator_range.h"
      22             : #include "llvm/CodeGen/MachineFunction.h"
      23             : #include "llvm/CodeGen/MachineInstr.h"
      24             : #include "llvm/CodeGen/MachineOperand.h"
      25             : #include "llvm/CodeGen/ScheduleDAG.h"
      26             : #include "llvm/MC/MCInstrDesc.h"
      27             : #include "llvm/Support/ErrorHandling.h"
      28             : #include <algorithm>
      29             : #include <cassert>
      30             : #include <limits>
      31             : #include <set>
      32             : #include <vector>
      33             : 
      34             : using namespace llvm;
      35             : 
      36             : //===----------------------------------------------------------------------===//
      37             : // Hazard Recoginizer Implementation
      38             : //===----------------------------------------------------------------------===//
      39             : 
      40       35754 : GCNHazardRecognizer::GCNHazardRecognizer(const MachineFunction &MF) :
      41             :   CurrCycleInstr(nullptr),
      42             :   MF(MF),
      43       35754 :   ST(MF.getSubtarget<GCNSubtarget>()),
      44       35754 :   TII(*ST.getInstrInfo()),
      45             :   TRI(TII.getRegisterInfo()),
      46             :   ClauseUses(TRI.getNumRegUnits()),
      47       71508 :   ClauseDefs(TRI.getNumRegUnits()) {
      48       35754 :   MaxLookAhead = 5;
      49       35754 : }
      50             : 
      51      216559 : void GCNHazardRecognizer::EmitInstruction(SUnit *SU) {
      52      216559 :   EmitInstruction(SU->getInstr());
      53      216559 : }
      54             : 
      55      576322 : void GCNHazardRecognizer::EmitInstruction(MachineInstr *MI) {
      56      576322 :   CurrCycleInstr = MI;
      57      576322 : }
      58             : 
      59             : static bool isDivFMas(unsigned Opcode) {
      60      744722 :   return Opcode == AMDGPU::V_DIV_FMAS_F32 || Opcode == AMDGPU::V_DIV_FMAS_F64;
      61             : }
      62             : 
      63             : static bool isSGetReg(unsigned Opcode) {
      64             :   return Opcode == AMDGPU::S_GETREG_B32;
      65             : }
      66             : 
      67             : static bool isSSetReg(unsigned Opcode) {
      68      743278 :   return Opcode == AMDGPU::S_SETREG_B32 || Opcode == AMDGPU::S_SETREG_IMM32_B32;
      69             : }
      70             : 
      71             : static bool isRWLane(unsigned Opcode) {
      72      744703 :   return Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32;
      73             : }
      74             : 
      75             : static bool isRFE(unsigned Opcode) {
      76             :   return Opcode == AMDGPU::S_RFE_B64;
      77             : }
      78             : 
      79             : static bool isSMovRel(unsigned Opcode) {
      80      131836 :   switch (Opcode) {
      81             :   case AMDGPU::S_MOVRELS_B32:
      82             :   case AMDGPU::S_MOVRELS_B64:
      83             :   case AMDGPU::S_MOVRELD_B32:
      84             :   case AMDGPU::S_MOVRELD_B64:
      85             :     return true;
      86             :   default:
      87             :     return false;
      88             :   }
      89             : }
      90             : 
      91             : static bool isSendMsgTraceDataOrGDS(const MachineInstr &MI) {
      92      148286 :   switch (MI.getOpcode()) {
      93             :   case AMDGPU::S_SENDMSG:
      94             :   case AMDGPU::S_SENDMSGHALT:
      95             :   case AMDGPU::S_TTRACEDATA:
      96             :     return true;
      97             :   default:
      98             :     // TODO: GDS
      99             :     return false;
     100             :   }
     101             : }
     102             : 
     103             : static unsigned getHWReg(const SIInstrInfo *TII, const MachineInstr &RegInstr) {
     104             :   const MachineOperand *RegOp = TII->getNamedOperand(RegInstr,
     105             :                                                      AMDGPU::OpName::simm16);
     106         270 :   return RegOp->getImm() & AMDGPU::Hwreg::ID_MASK_;
     107             : }
     108             : 
     109             : ScheduleHazardRecognizer::HazardType
     110      219018 : GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
     111      219018 :   MachineInstr *MI = SU->getInstr();
     112             : 
     113      219018 :   if (SIInstrInfo::isSMRD(*MI) && checkSMRDHazards(MI) > 0)
     114             :     return NoopHazard;
     115             : 
     116             :   // FIXME: Should flat be considered vmem?
     117      196716 :   if ((SIInstrInfo::isVMEM(*MI) ||
     118             :        SIInstrInfo::isFLAT(*MI))
     119       34939 :       && checkVMEMHazards(MI) > 0)
     120             :     return NoopHazard;
     121             : 
     122      218480 :   if (SIInstrInfo::isVALU(*MI) && checkVALUHazards(MI) > 0)
     123             :     return NoopHazard;
     124             : 
     125      216885 :   if (SIInstrInfo::isDPP(*MI) && checkDPPHazards(MI) > 0)
     126             :     return NoopHazard;
     127             : 
     128      433218 :   if (isDivFMas(MI->getOpcode()) && checkDivFMasHazards(MI) > 0)
     129             :     return NoopHazard;
     130             : 
     131      433180 :   if (isRWLane(MI->getOpcode()) && checkRWLaneHazards(MI) > 0)
     132             :     return NoopHazard;
     133             : 
     134      433156 :   if (isSGetReg(MI->getOpcode()) && checkGetRegHazards(MI) > 0)
     135             :     return NoopHazard;
     136             : 
     137      433156 :   if (isSSetReg(MI->getOpcode()) && checkSetRegHazards(MI) > 0)
     138             :     return NoopHazard;
     139             : 
     140      433156 :   if (isRFE(MI->getOpcode()) && checkRFEHazards(MI) > 0)
     141             :     return NoopHazard;
     142             : 
     143      216578 :   if (ST.hasReadM0MovRelInterpHazard() &&
     144      216593 :       (TII.isVINTRP(*MI) || isSMovRel(MI->getOpcode())) &&
     145          15 :       checkReadM0Hazards(MI) > 0)
     146             :     return NoopHazard;
     147             : 
     148      433174 :   if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(*MI) &&
     149          28 :       checkReadM0Hazards(MI) > 0)
     150             :     return NoopHazard;
     151             : 
     152      216559 :   if (MI->isInlineAsm() && checkInlineAsmHazards(MI) > 0)
     153             :     return NoopHazard;
     154             : 
     155      216559 :   if (checkAnyInstHazards(MI) > 0)
     156           0 :     return NoopHazard;
     157             : 
     158             :   return NoHazard;
     159             : }
     160             : 
     161      216559 : unsigned GCNHazardRecognizer::PreEmitNoops(SUnit *SU) {
     162      216559 :   return PreEmitNoops(SU->getInstr());
     163             : }
     164             : 
     165      576322 : unsigned GCNHazardRecognizer::PreEmitNoops(MachineInstr *MI) {
     166      576324 :   int WaitStates = std::max(0, checkAnyInstHazards(MI));
     167             : 
     168      576322 :   if (SIInstrInfo::isSMRD(*MI))
     169       48220 :     return std::max(WaitStates, checkSMRDHazards(MI));
     170             : 
     171      528113 :   if (SIInstrInfo::isVALU(*MI))
     172      238922 :     WaitStates = std::max(WaitStates, checkVALUHazards(MI));
     173             : 
     174      469164 :   if (SIInstrInfo::isVMEM(*MI) || SIInstrInfo::isFLAT(*MI))
     175       85258 :     WaitStates = std::max(WaitStates, checkVMEMHazards(MI));
     176             : 
     177      528113 :   if (SIInstrInfo::isDPP(*MI))
     178         329 :     WaitStates = std::max(WaitStates, checkDPPHazards(MI));
     179             : 
     180     1056226 :   if (isDivFMas(MI->getOpcode()))
     181         280 :     WaitStates = std::max(WaitStates, checkDivFMasHazards(MI));
     182             : 
     183     1056226 :   if (isRWLane(MI->getOpcode()))
     184         110 :     WaitStates = std::max(WaitStates, checkRWLaneHazards(MI));
     185             : 
     186      528113 :   if (MI->isInlineAsm())
     187        2049 :     return std::max(WaitStates, checkInlineAsmHazards(MI));
     188             : 
     189      526065 :   if (isSGetReg(MI->getOpcode()))
     190          56 :     return std::max(WaitStates, checkGetRegHazards(MI));
     191             : 
     192      526021 :   if (isSSetReg(MI->getOpcode()))
     193         152 :     return std::max(WaitStates, checkSetRegHazards(MI));
     194             : 
     195      525877 :   if (isRFE(MI->getOpcode()))
     196          10 :     return std::max(WaitStates, checkRFEHazards(MI));
     197             : 
     198     1051738 :   if (ST.hasReadM0MovRelInterpHazard() && (TII.isVINTRP(*MI) ||
     199             :                                            isSMovRel(MI->getOpcode())))
     200          41 :     return std::max(WaitStates, checkReadM0Hazards(MI));
     201             : 
     202      525838 :   if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(*MI))
     203          58 :     return std::max(WaitStates, checkReadM0Hazards(MI));
     204             : 
     205      525795 :   return WaitStates;
     206             : }
     207             : 
     208        1774 : void GCNHazardRecognizer::EmitNoop() {
     209        1774 :   EmittedInstrs.push_front(nullptr);
     210        1774 : }
     211             : 
     212     1099305 : void GCNHazardRecognizer::AdvanceCycle() {
     213             :   // When the scheduler detects a stall, it will call AdvanceCycle() without
     214             :   // emitting any instructions.
     215     1099305 :   if (!CurrCycleInstr)
     216      525544 :     return;
     217             : 
     218             :   // Do not track non-instructions which do not affect the wait states.
     219             :   // If included, these instructions can lead to buffer overflow such that
     220             :   // detectable hazards are missed.
     221     1152644 :   if (CurrCycleInstr->getOpcode() == AMDGPU::IMPLICIT_DEF)
     222             :     return;
     223             :   else if (CurrCycleInstr->isDebugInstr())
     224             :     return;
     225             : 
     226      573761 :   unsigned NumWaitStates = TII.getNumWaitStates(*CurrCycleInstr);
     227             : 
     228             :   // Keep track of emitted instructions
     229      573761 :   EmittedInstrs.push_front(CurrCycleInstr);
     230             : 
     231             :   // Add a nullptr for each additional wait state after the first.  Make sure
     232             :   // not to add more than getMaxLookAhead() items to the list, since we
     233             :   // truncate the list to that size right after this loop.
     234      573761 :   for (unsigned i = 1, e = std::min(NumWaitStates, getMaxLookAhead());
     235      573974 :        i < e; ++i) {
     236         213 :     EmittedInstrs.push_front(nullptr);
     237             :   }
     238             : 
     239             :   // getMaxLookahead() is the largest number of wait states we will ever need
     240             :   // to insert, so there is no point in keeping track of more than that many
     241             :   // wait states.
     242      573761 :   EmittedInstrs.resize(getMaxLookAhead());
     243             : 
     244      573761 :   CurrCycleInstr = nullptr;
     245             : }
     246             : 
     247           0 : void GCNHazardRecognizer::RecedeCycle() {
     248           0 :   llvm_unreachable("hazard recognizer does not support bottom-up scheduling.");
     249             : }
     250             : 
     251             : //===----------------------------------------------------------------------===//
     252             : // Helper Functions
     253             : //===----------------------------------------------------------------------===//
     254             : 
     255      664243 : int GCNHazardRecognizer::getWaitStatesSince(
     256             :     function_ref<bool(MachineInstr *)> IsHazard) {
     257             :   int WaitStates = 0;
     258     3818408 :   for (MachineInstr *MI : EmittedInstrs) {
     259     3166468 :     if (MI) {
     260     2834847 :       if (IsHazard(MI))
     261             :         return WaitStates;
     262             : 
     263     2822544 :       unsigned Opcode = MI->getOpcode();
     264     2822544 :       if (Opcode == AMDGPU::INLINEASM)
     265             :         continue;
     266             :     }
     267     3150061 :     ++WaitStates;
     268             :   }
     269             :   return std::numeric_limits<int>::max();
     270             : }
     271             : 
     272      390816 : int GCNHazardRecognizer::getWaitStatesSinceDef(
     273             :     unsigned Reg, function_ref<bool(MachineInstr *)> IsHazardDef) {
     274      390816 :   const SIRegisterInfo *TRI = ST.getRegisterInfo();
     275             : 
     276             :   auto IsHazardFn = [IsHazardDef, TRI, Reg] (MachineInstr *MI) {
     277             :     return IsHazardDef(MI) && MI->modifiesRegister(Reg, TRI);
     278      390816 :   };
     279             : 
     280      390816 :   return getWaitStatesSince(IsHazardFn);
     281             : }
     282             : 
     283         204 : int GCNHazardRecognizer::getWaitStatesSinceSetReg(
     284             :     function_ref<bool(MachineInstr *)> IsHazard) {
     285             :   auto IsHazardFn = [IsHazard] (MachineInstr *MI) {
     286        1358 :     return isSSetReg(MI->getOpcode()) && IsHazard(MI);
     287         204 :   };
     288             : 
     289         204 :   return getWaitStatesSince(IsHazardFn);
     290             : }
     291             : 
     292             : //===----------------------------------------------------------------------===//
     293             : // No-op Hazard Detection
     294             : //===----------------------------------------------------------------------===//
     295             : 
     296       16633 : static void addRegUnits(const SIRegisterInfo &TRI,
     297             :                         BitVector &BV, unsigned Reg) {
     298       44019 :   for (MCRegUnitIterator RUI(Reg, &TRI); RUI.isValid(); ++RUI)
     299             :     BV.set(*RUI);
     300       16633 : }
     301             : 
     302             : static void addRegsToSet(const SIRegisterInfo &TRI,
     303             :                          iterator_range<MachineInstr::const_mop_iterator> Ops,
     304             :                          BitVector &Set) {
     305       34550 :   for (const MachineOperand &Op : Ops) {
     306       24800 :     if (Op.isReg())
     307       16633 :       addRegUnits(TRI, Set, Op.getReg());
     308             :   }
     309             : }
     310             : 
     311        4875 : void GCNHazardRecognizer::addClauseInst(const MachineInstr &MI) {
     312             :   // XXX: Do we need to worry about implicit operands
     313        4875 :   addRegsToSet(TRI, MI.defs(), ClauseDefs);
     314        4875 :   addRegsToSet(TRI, MI.uses(), ClauseUses);
     315        4875 : }
     316             : 
     317      142213 : int GCNHazardRecognizer::checkSoftClauseHazards(MachineInstr *MEM) {
     318             :   // SMEM soft clause are only present on VI+, and only matter if xnack is
     319             :   // enabled.
     320      142213 :   if (!ST.isXNACKEnabled())
     321             :     return 0;
     322             : 
     323             :   bool IsSMRD = TII.isSMRD(*MEM);
     324             : 
     325        2534 :   resetClause();
     326             : 
     327             :   // A soft-clause is any group of consecutive SMEM instructions.  The
     328             :   // instructions in this group may return out of order and/or may be
     329             :   // replayed (i.e. the same instruction issued more than once).
     330             :   //
     331             :   // In order to handle these situations correctly we need to make sure
     332             :   // that when a clause has more than one instruction, no instruction in the
     333             :   // clause writes to a register that is read another instruction in the clause
     334             :   // (including itself). If we encounter this situaion, we need to break the
     335             :   // clause by inserting a non SMEM instruction.
     336             : 
     337        6793 :   for (MachineInstr *MI : EmittedInstrs) {
     338             :     // When we hit a non-SMEM instruction then we have passed the start of the
     339             :     // clause and we can stop.
     340        5654 :     if (!MI)
     341             :       break;
     342             : 
     343        4436 :     if (IsSMRD != SIInstrInfo::isSMRD(*MI))
     344             :       break;
     345             : 
     346        4259 :     addClauseInst(*MI);
     347             :   }
     348             : 
     349        2534 :   if (ClauseDefs.none())
     350             :     return 0;
     351             : 
     352             :   // We need to make sure not to put loads and stores in the same clause if they
     353             :   // use the same address. For now, just start a new clause whenever we see a
     354             :   // store.
     355        1011 :   if (MEM->mayStore())
     356             :     return 1;
     357             : 
     358         616 :   addClauseInst(*MEM);
     359             : 
     360             :   // If the set of defs and uses intersect then we cannot add this instruction
     361             :   // to the clause, so we have a hazard.
     362         616 :   return ClauseDefs.anyCommon(ClauseUses) ? 1 : 0;
     363             : }
     364             : 
     365       69290 : int GCNHazardRecognizer::checkSMRDHazards(MachineInstr *SMRD) {
     366       69290 :   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
     367             :   int WaitStatesNeeded = 0;
     368             : 
     369       69290 :   WaitStatesNeeded = checkSoftClauseHazards(SMRD);
     370             : 
     371             :   // This SMRD hazard only affects SI.
     372       69290 :   if (ST.getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS)
     373             :     return WaitStatesNeeded;
     374             : 
     375             :   // A read of an SGPR by SMRD instruction requires 4 wait states when the
     376             :   // SGPR was written by a VALU instruction.
     377             :   int SmrdSgprWaitStates = 4;
     378       13962 :   auto IsHazardDefFn = [this] (MachineInstr *MI) { return TII.isVALU(*MI); };
     379       13962 :   auto IsBufferHazardDefFn = [this] (MachineInstr *MI) { return TII.isSALU(*MI); };
     380             : 
     381       13962 :   bool IsBufferSMRD = TII.isBufferSMRD(*SMRD);
     382             : 
     383       55825 :   for (const MachineOperand &Use : SMRD->uses()) {
     384       41863 :     if (!Use.isReg())
     385       27803 :       continue;
     386             :     int WaitStatesNeededForUse =
     387       14060 :         SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn);
     388       14060 :     WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
     389             : 
     390             :     // This fixes what appears to be undocumented hardware behavior in SI where
     391             :     // s_mov writing a descriptor and s_buffer_load_dword reading the descriptor
     392             :     // needs some number of nops in between. We don't know how many we need, but
     393             :     // let's use 4. This wasn't discovered before probably because the only
     394             :     // case when this happens is when we expand a 64-bit pointer into a full
     395             :     // descriptor and use s_buffer_load_dword instead of s_load_dword, which was
     396             :     // probably never encountered in the closed-source land.
     397       14060 :     if (IsBufferSMRD) {
     398             :       int WaitStatesNeededForUse =
     399         263 :         SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(),
     400         263 :                                                    IsBufferHazardDefFn);
     401         263 :       WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
     402             :     }
     403             :   }
     404             : 
     405       13962 :   return WaitStatesNeeded;
     406             : }
     407             : 
     408      119830 : int GCNHazardRecognizer::checkVMEMHazards(MachineInstr* VMEM) {
     409      119830 :   if (ST.getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS)
     410             :     return 0;
     411             : 
     412       72923 :   int WaitStatesNeeded = checkSoftClauseHazards(VMEM);
     413             : 
     414             :   // A read of an SGPR by a VMEM instruction requires 5 wait states when the
     415             :   // SGPR was written by a VALU Instruction.
     416             :   const int VmemSgprWaitStates = 5;
     417       72923 :   auto IsHazardDefFn = [this] (MachineInstr *MI) { return TII.isVALU(*MI); };
     418             : 
     419      597454 :   for (const MachineOperand &Use : VMEM->uses()) {
     420      524531 :     if (!Use.isReg() || TRI.isVGPR(MF.getRegInfo(), Use.getReg()))
     421      371521 :       continue;
     422             : 
     423             :     int WaitStatesNeededForUse =
     424      153010 :         VmemSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn);
     425      153010 :     WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
     426             :   }
     427       72923 :   return WaitStatesNeeded;
     428             : }
     429             : 
     430         736 : int GCNHazardRecognizer::checkDPPHazards(MachineInstr *DPP) {
     431         736 :   const SIRegisterInfo *TRI = ST.getRegisterInfo();
     432         736 :   const SIInstrInfo *TII = ST.getInstrInfo();
     433             : 
     434             :   // Check for DPP VGPR read after VALU VGPR write and EXEC write.
     435             :   int DppVgprWaitStates = 2;
     436             :   int DppExecWaitStates = 5;
     437         736 :   int WaitStatesNeeded = 0;
     438         736 :   auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); };
     439             : 
     440        5888 :   for (const MachineOperand &Use : DPP->uses()) {
     441        5152 :     if (!Use.isReg() || !TRI->isVGPR(MF.getRegInfo(), Use.getReg()))
     442        3680 :       continue;
     443             :     int WaitStatesNeededForUse =
     444        1472 :         DppVgprWaitStates - getWaitStatesSinceDef(Use.getReg());
     445        1472 :     WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
     446             :   }
     447             : 
     448         736 :   WaitStatesNeeded = std::max(
     449             :       WaitStatesNeeded,
     450         736 :       DppExecWaitStates - getWaitStatesSinceDef(AMDGPU::EXEC, IsHazardDefFn));
     451             : 
     452         736 :   return WaitStatesNeeded;
     453             : }
     454             : 
     455         392 : int GCNHazardRecognizer::checkDivFMasHazards(MachineInstr *DivFMas) {
     456         392 :   const SIInstrInfo *TII = ST.getInstrInfo();
     457             : 
     458             :   // v_div_fmas requires 4 wait states after a write to vcc from a VALU
     459             :   // instruction.
     460             :   const int DivFMasWaitStates = 4;
     461         392 :   auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); };
     462         392 :   int WaitStatesNeeded = getWaitStatesSinceDef(AMDGPU::VCC, IsHazardDefFn);
     463             : 
     464         392 :   return DivFMasWaitStates - WaitStatesNeeded;
     465             : }
     466             : 
     467          56 : int GCNHazardRecognizer::checkGetRegHazards(MachineInstr *GetRegInstr) {
     468          56 :   const SIInstrInfo *TII = ST.getInstrInfo();
     469             :   unsigned GetRegHWReg = getHWReg(TII, *GetRegInstr);
     470             : 
     471             :   const int GetRegWaitStates = 2;
     472             :   auto IsHazardFn = [TII, GetRegHWReg] (MachineInstr *MI) {
     473          16 :     return GetRegHWReg == getHWReg(TII, *MI);
     474          56 :   };
     475          56 :   int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn);
     476             : 
     477          56 :   return GetRegWaitStates - WaitStatesNeeded;
     478             : }
     479             : 
     480         144 : int GCNHazardRecognizer::checkSetRegHazards(MachineInstr *SetRegInstr) {
     481         144 :   const SIInstrInfo *TII = ST.getInstrInfo();
     482             :   unsigned HWReg = getHWReg(TII, *SetRegInstr);
     483             : 
     484             :   const int SetRegWaitStates =
     485         144 :       ST.getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS ? 1 : 2;
     486             :   auto IsHazardFn = [TII, HWReg] (MachineInstr *MI) {
     487          48 :     return HWReg == getHWReg(TII, *MI);
     488         144 :   };
     489         144 :   int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn);
     490         144 :   return SetRegWaitStates - WaitStatesNeeded;
     491             : }
     492             : 
     493     1196858 : int GCNHazardRecognizer::createsVALUHazard(const MachineInstr &MI) {
     494     1196858 :   if (!MI.mayStore())
     495             :     return -1;
     496             : 
     497       89772 :   const SIInstrInfo *TII = ST.getInstrInfo();
     498       89772 :   unsigned Opcode = MI.getOpcode();
     499             :   const MCInstrDesc &Desc = MI.getDesc();
     500             : 
     501       89772 :   int VDataIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata);
     502             :   int VDataRCID = -1;
     503       89772 :   if (VDataIdx != -1)
     504       33830 :     VDataRCID = Desc.OpInfo[VDataIdx].RegClass;
     505             : 
     506       89772 :   if (TII->isMUBUF(MI) || TII->isMTBUF(MI)) {
     507             :     // There is no hazard if the instruction does not use vector regs
     508             :     // (like wbinvl1)
     509       23056 :     if (VDataIdx == -1)
     510             :       return -1;
     511             :     // For MUBUF/MTBUF instructions this hazard only exists if the
     512             :     // instruction is not using a register in the soffset field.
     513             :     const MachineOperand *SOffset =
     514             :         TII->getNamedOperand(MI, AMDGPU::OpName::soffset);
     515             :     // If we have no soffset operand, then assume this field has been
     516             :     // hardcoded to zero.
     517       22944 :     if (AMDGPU::getRegBitWidth(VDataRCID) > 64 &&
     518       10132 :         (!SOffset || !SOffset->isReg()))
     519             :       return VDataIdx;
     520             :   }
     521             : 
     522             :   // MIMG instructions create a hazard if they don't use a 256-bit T# and
     523             :   // the store size is greater than 8 bytes and they have more than two bits
     524             :   // of their dmask set.
     525             :   // All our MIMG definitions use a 256-bit T#, so we can skip checking for them.
     526             :   if (TII->isMIMG(MI)) {
     527             :     int SRsrcIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::srsrc);
     528             :     assert(SRsrcIdx != -1 &&
     529             :            AMDGPU::getRegBitWidth(Desc.OpInfo[SRsrcIdx].RegClass) == 256);
     530             :     (void)SRsrcIdx;
     531             :   }
     532             : 
     533       79669 :   if (TII->isFLAT(MI)) {
     534       10885 :     int DataIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata);
     535       10885 :     if (AMDGPU::getRegBitWidth(Desc.OpInfo[DataIdx].RegClass) > 64)
     536        8115 :       return DataIdx;
     537             :   }
     538             : 
     539             :   return -1;
     540             : }
     541             : 
     542      299812 : int GCNHazardRecognizer::checkVALUHazardsHelper(const MachineOperand &Def,
     543             :                                                 const MachineRegisterInfo &MRI) {
     544             :   // Helper to check for the hazard where VMEM instructions that store more than
     545             :   // 8 bytes can have there store data over written by the next instruction.
     546      299812 :   const SIRegisterInfo *TRI = ST.getRegisterInfo();
     547             : 
     548             :   const int VALUWaitStates = 1;
     549      299812 :   int WaitStatesNeeded = 0;
     550             : 
     551      299812 :   if (!TRI->isVGPR(MRI, Def.getReg()))
     552             :     return WaitStatesNeeded;
     553      273223 :   unsigned Reg = Def.getReg();
     554             :   auto IsHazardFn = [this, Reg, TRI] (MachineInstr *MI) {
     555             :     int DataIdx = createsVALUHazard(*MI);
     556             :     return DataIdx >= 0 &&
     557             :     TRI->regsOverlap(MI->getOperand(DataIdx).getReg(), Reg);
     558      273223 :   };
     559             :   int WaitStatesNeededForDef =
     560      273223 :     VALUWaitStates - getWaitStatesSince(IsHazardFn);
     561      273223 :   WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef);
     562             : 
     563      273223 :   return WaitStatesNeeded;
     564             : }
     565             : 
     566      344629 : int GCNHazardRecognizer::checkVALUHazards(MachineInstr *VALU) {
     567             :   // This checks for the hazard where VMEM instructions that store more than
     568             :   // 8 bytes can have there store data over written by the next instruction.
     569      689258 :   if (!ST.has12DWordStoreHazard())
     570             :     return 0;
     571             : 
     572      281876 :   const MachineRegisterInfo &MRI = MF.getRegInfo();
     573      281876 :   int WaitStatesNeeded = 0;
     574             : 
     575      579999 :   for (const MachineOperand &Def : VALU->defs()) {
     576      299747 :     WaitStatesNeeded = std::max(WaitStatesNeeded, checkVALUHazardsHelper(Def, MRI));
     577             :   }
     578             : 
     579      281876 :   return WaitStatesNeeded;
     580             : }
     581             : 
     582        2807 : int GCNHazardRecognizer::checkInlineAsmHazards(MachineInstr *IA) {
     583             :   // This checks for hazards associated with inline asm statements.
     584             :   // Since inline asms can contain just about anything, we use this
     585             :   // to call/leverage other check*Hazard routines. Note that
     586             :   // this function doesn't attempt to address all possible inline asm
     587             :   // hazards (good luck), but is a collection of what has been
     588             :   // problematic thus far.
     589             : 
     590             :   // see checkVALUHazards()
     591        5614 :   if (!ST.has12DWordStoreHazard())
     592             :     return 0;
     593             : 
     594        2251 :   const MachineRegisterInfo &MRI = MF.getRegInfo();
     595        2251 :   int WaitStatesNeeded = 0;
     596             : 
     597        7913 :   for (unsigned I = InlineAsm::MIOp_FirstOperand, E = IA->getNumOperands();
     598        7913 :        I != E; ++I) {
     599        5662 :     const MachineOperand &Op = IA->getOperand(I);
     600        5662 :     if (Op.isReg() && Op.isDef()) {
     601        1690 :       WaitStatesNeeded = std::max(WaitStatesNeeded, checkVALUHazardsHelper(Op, MRI));
     602             :     }
     603             :   }
     604             : 
     605        2251 :   return WaitStatesNeeded;
     606             : }
     607             : 
     608         145 : int GCNHazardRecognizer::checkRWLaneHazards(MachineInstr *RWLane) {
     609         145 :   const SIInstrInfo *TII = ST.getInstrInfo();
     610         145 :   const SIRegisterInfo *TRI = ST.getRegisterInfo();
     611         145 :   const MachineRegisterInfo &MRI = MF.getRegInfo();
     612             : 
     613             :   const MachineOperand *LaneSelectOp =
     614         145 :       TII->getNamedOperand(*RWLane, AMDGPU::OpName::src1);
     615             : 
     616         145 :   if (!LaneSelectOp->isReg() || !TRI->isSGPRReg(MRI, LaneSelectOp->getReg()))
     617          69 :     return 0;
     618             : 
     619          76 :   unsigned LaneSelectReg = LaneSelectOp->getReg();
     620             :   auto IsHazardFn = [TII] (MachineInstr *MI) {
     621             :     return TII->isVALU(*MI);
     622          76 :   };
     623             : 
     624             :   const int RWLaneWaitStates = 4;
     625          76 :   int WaitStatesSince = getWaitStatesSinceDef(LaneSelectReg, IsHazardFn);
     626          76 :   return RWLaneWaitStates - WaitStatesSince;
     627             : }
     628             : 
     629           8 : int GCNHazardRecognizer::checkRFEHazards(MachineInstr *RFE) {
     630           8 :   if (ST.getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS)
     631             :     return 0;
     632             : 
     633           4 :   const SIInstrInfo *TII = ST.getInstrInfo();
     634             : 
     635             :   const int RFEWaitStates = 1;
     636             : 
     637             :   auto IsHazardFn = [TII] (MachineInstr *MI) {
     638           6 :     return getHWReg(TII, *MI) == AMDGPU::Hwreg::ID_TRAPSTS;
     639           4 :   };
     640           4 :   int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn);
     641           4 :   return RFEWaitStates - WaitStatesNeeded;
     642             : }
     643             : 
     644      792881 : int GCNHazardRecognizer::checkAnyInstHazards(MachineInstr *MI) {
     645             :   if (MI->isDebugInstr())
     646             :     return 0;
     647             : 
     648      792816 :   const SIRegisterInfo *TRI = ST.getRegisterInfo();
     649     1585632 :   if (!ST.hasSMovFedHazard())
     650             :     return 0;
     651             : 
     652             :   // Check for any instruction reading an SGPR after a write from
     653             :   // s_mov_fed_b32.
     654             :   int MovFedWaitStates = 1;
     655      141523 :   int WaitStatesNeeded = 0;
     656             : 
     657      661579 :   for (const MachineOperand &Use : MI->uses()) {
     658      520056 :     if (!Use.isReg() || TRI->isVGPR(MF.getRegInfo(), Use.getReg()))
     659      299366 :       continue;
     660             :     auto IsHazardFn = [] (MachineInstr *MI) {
     661     1805224 :       return MI->getOpcode() == AMDGPU::S_MOV_FED_B32;
     662             :     };
     663             :     int WaitStatesNeededForUse =
     664      220690 :         MovFedWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardFn);
     665      220690 :     WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
     666             :   }
     667             : 
     668      141523 :   return WaitStatesNeeded;
     669             : }
     670             : 
     671         117 : int GCNHazardRecognizer::checkReadM0Hazards(MachineInstr *MI) {
     672         117 :   const SIInstrInfo *TII = ST.getInstrInfo();
     673             :   const int SMovRelWaitStates = 1;
     674             :   auto IsHazardFn = [TII] (MachineInstr *MI) {
     675             :     return TII->isSALU(*MI);
     676         117 :   };
     677         117 :   return SMovRelWaitStates - getWaitStatesSinceDef(AMDGPU::M0, IsHazardFn);
     678             : }

Generated by: LCOV version 1.13