LCOV - code coverage report
Current view: top level - lib/Target/AMDGPU - GCNHazardRecognizer.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 247 250 98.8 %
Date: 2018-02-25 19:55:18 Functions: 30 31 96.8 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===-- GCNHazardRecognizers.cpp - GCN Hazard Recognizer Impls ------------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : // This file implements hazard recognizers for scheduling on GCN processors.
      11             : //
      12             : //===----------------------------------------------------------------------===//
      13             : 
      14             : #include "GCNHazardRecognizer.h"
      15             : #include "AMDGPUSubtarget.h"
      16             : #include "SIDefines.h"
      17             : #include "SIInstrInfo.h"
      18             : #include "SIRegisterInfo.h"
      19             : #include "Utils/AMDGPUBaseInfo.h"
      20             : #include "llvm/ADT/iterator_range.h"
      21             : #include "llvm/CodeGen/MachineFunction.h"
      22             : #include "llvm/CodeGen/MachineInstr.h"
      23             : #include "llvm/CodeGen/MachineOperand.h"
      24             : #include "llvm/CodeGen/ScheduleDAG.h"
      25             : #include "llvm/MC/MCInstrDesc.h"
      26             : #include "llvm/Support/ErrorHandling.h"
      27             : #include <algorithm>
      28             : #include <cassert>
      29             : #include <limits>
      30             : #include <set>
      31             : #include <vector>
      32             : 
      33             : using namespace llvm;
      34             : 
      35             : //===----------------------------------------------------------------------===//
      36             : // Hazard Recoginizer Implementation
      37             : //===----------------------------------------------------------------------===//
      38             : 
      39       29716 : GCNHazardRecognizer::GCNHazardRecognizer(const MachineFunction &MF) :
      40             :   CurrCycleInstr(nullptr),
      41             :   MF(MF),
      42       29716 :   ST(MF.getSubtarget<SISubtarget>()),
      43             :   TII(*ST.getInstrInfo()),
      44             :   TRI(TII.getRegisterInfo()),
      45             :   ClauseUses(TRI.getNumRegUnits()),
      46      148580 :   ClauseDefs(TRI.getNumRegUnits()) {
      47       29716 :   MaxLookAhead = 5;
      48       29716 : }
      49             : 
      50      186795 : void GCNHazardRecognizer::EmitInstruction(SUnit *SU) {
      51      186795 :   EmitInstruction(SU->getInstr());
      52      186795 : }
      53             : 
      54      467842 : void GCNHazardRecognizer::EmitInstruction(MachineInstr *MI) {
      55      467842 :   CurrCycleInstr = MI;
      56      467842 : }
      57             : 
      58             : static bool isDivFMas(unsigned Opcode) {
      59      599107 :   return Opcode == AMDGPU::V_DIV_FMAS_F32 || Opcode == AMDGPU::V_DIV_FMAS_F64;
      60             : }
      61             : 
      62             : static bool isSGetReg(unsigned Opcode) {
      63             :   return Opcode == AMDGPU::S_GETREG_B32;
      64             : }
      65             : 
      66             : static bool isSSetReg(unsigned Opcode) {
      67      597869 :   return Opcode == AMDGPU::S_SETREG_B32 || Opcode == AMDGPU::S_SETREG_IMM32_B32;
      68             : }
      69             : 
      70             : static bool isRWLane(unsigned Opcode) {
      71      599091 :   return Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32;
      72             : }
      73             : 
      74             : static bool isRFE(unsigned Opcode) {
      75             :   return Opcode == AMDGPU::S_RFE_B64;
      76             : }
      77             : 
      78             : static bool isSMovRel(unsigned Opcode) {
      79             :   switch (Opcode) {
      80             :   case AMDGPU::S_MOVRELS_B32:
      81             :   case AMDGPU::S_MOVRELS_B64:
      82             :   case AMDGPU::S_MOVRELD_B32:
      83             :   case AMDGPU::S_MOVRELD_B64:
      84             :     return true;
      85             :   default:
      86             :     return false;
      87             :   }
      88             : }
      89             : 
      90             : static bool isSendMsgTraceDataOrGDS(const MachineInstr &MI) {
      91      124569 :   switch (MI.getOpcode()) {
      92             :   case AMDGPU::S_SENDMSG:
      93             :   case AMDGPU::S_SENDMSGHALT:
      94             :   case AMDGPU::S_TTRACEDATA:
      95             :     return true;
      96             :   default:
      97             :     // TODO: GDS
      98             :     return false;
      99             :   }
     100             : }
     101             : 
     102             : static unsigned getHWReg(const SIInstrInfo *TII, const MachineInstr &RegInstr) {
     103             :   const MachineOperand *RegOp = TII->getNamedOperand(RegInstr,
     104             :                                                      AMDGPU::OpName::simm16);
     105         264 :   return RegOp->getImm() & AMDGPU::Hwreg::ID_MASK_;
     106             : }
     107             : 
     108             : ScheduleHazardRecognizer::HazardType
     109      188477 : GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
     110      188477 :   MachineInstr *MI = SU->getInstr();
     111             : 
     112      188477 :   if (SIInstrInfo::isSMRD(*MI) && checkSMRDHazards(MI) > 0)
     113             :     return NoopHazard;
     114             : 
     115             :   // FIXME: Should flat be considered vmem?
     116      166018 :   if ((SIInstrInfo::isVMEM(*MI) ||
     117             :        SIInstrInfo::isFLAT(*MI))
     118       32036 :       && checkVMEMHazards(MI) > 0)
     119             :     return NoopHazard;
     120             : 
     121      188241 :   if (SIInstrInfo::isVALU(*MI) && checkVALUHazards(MI) > 0)
     122             :     return NoopHazard;
     123             : 
     124      186845 :   if (SIInstrInfo::isDPP(*MI) && checkDPPHazards(MI) > 0)
     125             :     return NoopHazard;
     126             : 
     127      373662 :   if (isDivFMas(MI->getOpcode()) && checkDivFMasHazards(MI) > 0)
     128             :     return NoopHazard;
     129             : 
     130      373630 :   if (isRWLane(MI->getOpcode()) && checkRWLaneHazards(MI) > 0)
     131             :     return NoopHazard;
     132             : 
     133      373622 :   if (isSGetReg(MI->getOpcode()) && checkGetRegHazards(MI) > 0)
     134             :     return NoopHazard;
     135             : 
     136      373622 :   if (isSSetReg(MI->getOpcode()) && checkSetRegHazards(MI) > 0)
     137             :     return NoopHazard;
     138             : 
     139      373622 :   if (isRFE(MI->getOpcode()) && checkRFEHazards(MI) > 0)
     140             :     return NoopHazard;
     141             : 
     142      218037 :   if (ST.hasReadM0MovRelInterpHazard() &&
     143      186824 :       (TII.isVINTRP(*MI) || isSMovRel(MI->getOpcode())) &&
     144          13 :       checkReadM0Hazards(MI) > 0)
     145             :     return NoopHazard;
     146             : 
     147      373638 :   if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(*MI) &&
     148          24 :       checkReadM0Hazards(MI) > 0)
     149             :     return NoopHazard;
     150             : 
     151      186795 :   if (MI->isInlineAsm() && checkInlineAsmHazards(MI) > 0)
     152             :     return NoopHazard;
     153             : 
     154      186795 :   if (checkAnyInstHazards(MI) > 0)
     155             :     return NoopHazard;
     156             : 
     157      186795 :   return NoHazard;
     158             : }
     159             : 
     160      186795 : unsigned GCNHazardRecognizer::PreEmitNoops(SUnit *SU) {
     161      186795 :   return PreEmitNoops(SU->getInstr());
     162             : }
     163             : 
     164      467842 : unsigned GCNHazardRecognizer::PreEmitNoops(MachineInstr *MI) {
     165      935684 :   int WaitStates = std::max(0, checkAnyInstHazards(MI));
     166             : 
     167      467842 :   if (SIInstrInfo::isSMRD(*MI))
     168      111132 :     return std::max(WaitStates, checkSMRDHazards(MI));
     169             : 
     170      412276 :   if (SIInstrInfo::isVALU(*MI))
     171      404298 :     WaitStates = std::max(WaitStates, checkVALUHazards(MI));
     172             : 
     173      353940 :   if (SIInstrInfo::isVMEM(*MI) || SIInstrInfo::isFLAT(*MI))
     174      157530 :     WaitStates = std::max(WaitStates, checkVMEMHazards(MI));
     175             : 
     176      412276 :   if (SIInstrInfo::isDPP(*MI))
     177          58 :     WaitStates = std::max(WaitStates, checkDPPHazards(MI));
     178             : 
     179      824552 :   if (isDivFMas(MI->getOpcode()))
     180         478 :     WaitStates = std::max(WaitStates, checkDivFMasHazards(MI));
     181             : 
     182      824552 :   if (isRWLane(MI->getOpcode()))
     183          52 :     WaitStates = std::max(WaitStates, checkRWLaneHazards(MI));
     184             : 
     185      412276 :   if (MI->isInlineAsm())
     186        3660 :     return std::max(WaitStates, checkInlineAsmHazards(MI));
     187             : 
     188      410446 :   if (isSGetReg(MI->getOpcode()))
     189          88 :     return std::max(WaitStates, checkGetRegHazards(MI));
     190             : 
     191      410402 :   if (isSSetReg(MI->getOpcode()))
     192         276 :     return std::max(WaitStates, checkSetRegHazards(MI));
     193             : 
     194      410264 :   if (isRFE(MI->getOpcode()))
     195          16 :     return std::max(WaitStates, checkRFEHazards(MI));
     196             : 
     197      876959 :   if (ST.hasReadM0MovRelInterpHazard() && (TII.isVINTRP(*MI) ||
     198             :                                            isSMovRel(MI->getOpcode())))
     199          56 :     return std::max(WaitStates, checkReadM0Hazards(MI));
     200             : 
     201      410228 :   if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(*MI))
     202          60 :     return std::max(WaitStates, checkReadM0Hazards(MI));
     203             : 
     204      410198 :   return WaitStates;
     205             : }
     206             : 
     207         872 : void GCNHazardRecognizer::EmitNoop() {
     208        1744 :   EmittedInstrs.push_front(nullptr);
     209         872 : }
     210             : 
     211      952617 : void GCNHazardRecognizer::AdvanceCycle() {
     212             :   // When the scheduler detects a stall, it will call AdvanceCycle() without
     213             :   // emitting any instructions.
     214      952617 :   if (!CurrCycleInstr)
     215      484775 :     return;
     216             : 
     217      467842 :   unsigned NumWaitStates = TII.getNumWaitStates(*CurrCycleInstr);
     218             : 
     219             :   // Keep track of emitted instructions
     220      467842 :   EmittedInstrs.push_front(CurrCycleInstr);
     221             : 
     222             :   // Add a nullptr for each additional wait state after the first.  Make sure
     223             :   // not to add more than getMaxLookAhead() items to the list, since we
     224             :   // truncate the list to that size right after this loop.
     225      935684 :   for (unsigned i = 1, e = std::min(NumWaitStates, getMaxLookAhead());
     226      467842 :        i < e; ++i) {
     227           0 :     EmittedInstrs.push_front(nullptr);
     228             :   }
     229             : 
     230             :   // getMaxLookahead() is the largest number of wait states we will ever need
     231             :   // to insert, so there is no point in keeping track of more than that many
     232             :   // wait states.
     233      467842 :   EmittedInstrs.resize(getMaxLookAhead());
     234             : 
     235      467842 :   CurrCycleInstr = nullptr;
     236             : }
     237             : 
     238           0 : void GCNHazardRecognizer::RecedeCycle() {
     239           0 :   llvm_unreachable("hazard recognizer does not support bottom-up scheduling.");
     240             : }
     241             : 
     242             : //===----------------------------------------------------------------------===//
     243             : // Helper Functions
     244             : //===----------------------------------------------------------------------===//
     245             : 
     246      538118 : int GCNHazardRecognizer::getWaitStatesSince(
     247             :     function_ref<bool(MachineInstr *)> IsHazard) {
     248             :   int WaitStates = 0;
     249     3641181 :   for (MachineInstr *MI : EmittedInstrs) {
     250     2575208 :     if (MI) {
     251     2338643 :       if (IsHazard(MI))
     252             :         return WaitStates;
     253             : 
     254     2328380 :       unsigned Opcode = MI->getOpcode();
     255     2328380 :       if (Opcode == AMDGPU::DBG_VALUE || Opcode == AMDGPU::IMPLICIT_DEF ||
     256             :           Opcode == AMDGPU::INLINEASM)
     257        4335 :         continue;
     258             :     }
     259     2560610 :     ++WaitStates;
     260             :   }
     261             :   return std::numeric_limits<int>::max();
     262             : }
     263             : 
     264      319825 : int GCNHazardRecognizer::getWaitStatesSinceDef(
     265             :     unsigned Reg, function_ref<bool(MachineInstr *)> IsHazardDef) {
     266      319825 :   const SIRegisterInfo *TRI = ST.getRegisterInfo();
     267             : 
     268     1615829 :   auto IsHazardFn = [IsHazardDef, TRI, Reg] (MachineInstr *MI) {
     269     1615829 :     return IsHazardDef(MI) && MI->modifiesRegister(Reg, TRI);
     270     1679385 :   };
     271             : 
     272      319825 :   return getWaitStatesSince(IsHazardFn);
     273             : }
     274             : 
     275         198 : int GCNHazardRecognizer::getWaitStatesSinceSetReg(
     276             :     function_ref<bool(MachineInstr *)> IsHazard) {
     277             :   auto IsHazardFn = [IsHazard] (MachineInstr *MI) {
     278        1382 :     return isSSetReg(MI->getOpcode()) && IsHazard(MI);
     279         198 :   };
     280             : 
     281         198 :   return getWaitStatesSince(IsHazardFn);
     282             : }
     283             : 
     284             : //===----------------------------------------------------------------------===//
     285             : // No-op Hazard Detection
     286             : //===----------------------------------------------------------------------===//
     287             : 
     288        5936 : static void addRegUnits(const SIRegisterInfo &TRI,
     289             :                         BitVector &BV, unsigned Reg) {
     290       17382 :   for (MCRegUnitIterator RUI(Reg, &TRI); RUI.isValid(); ++RUI)
     291             :     BV.set(*RUI);
     292        5936 : }
     293             : 
     294             : static void addRegsToSet(const SIRegisterInfo &TRI,
     295             :                          iterator_range<MachineInstr::const_mop_iterator> Ops,
     296             :                          BitVector &Set) {
     297       22510 :   for (const MachineOperand &Op : Ops) {
     298        9505 :     if (Op.isReg())
     299        5936 :       addRegUnits(TRI, Set, Op.getReg());
     300             :   }
     301             : }
     302             : 
     303        1750 : void GCNHazardRecognizer::addClauseInst(const MachineInstr &MI) {
     304             :   // XXX: Do we need to worry about implicit operands
     305        3500 :   addRegsToSet(TRI, MI.defs(), ClauseDefs);
     306        3500 :   addRegsToSet(TRI, MI.uses(), ClauseUses);
     307        1750 : }
     308             : 
     309      144802 : int GCNHazardRecognizer::checkSoftClauseHazards(MachineInstr *MEM) {
     310             :   // SMEM soft clause are only present on VI+, and only matter if xnack is
     311             :   // enabled.
     312      144802 :   if (!ST.isXNACKEnabled())
     313             :     return 0;
     314             : 
     315             :   bool IsSMRD = TII.isSMRD(*MEM);
     316             : 
     317         943 :   resetClause();
     318             : 
     319             :   // A soft-clause is any group of consecutive SMEM instructions.  The
     320             :   // instructions in this group may return out of order and/or may be
     321             :   // replayed (i.e. the same instruction issued more than once).
     322             :   //
     323             :   // In order to handle these situations correctly we need to make sure
     324             :   // that when a clause has more than one instruction, no instruction in the
     325             :   // clause writes to a register that is read another instruction in the clause
     326             :   // (including itself). If we encounter this situaion, we need to break the
     327             :   // clause by inserting a non SMEM instruction.
     328             : 
     329        3382 :   for (MachineInstr *MI : EmittedInstrs) {
     330             :     // When we hit a non-SMEM instruction then we have passed the start of the
     331             :     // clause and we can stop.
     332        2095 :     if (!MI)
     333             :       break;
     334             : 
     335        1674 :     if (IsSMRD != SIInstrInfo::isSMRD(*MI))
     336             :       break;
     337             : 
     338        1496 :     addClauseInst(*MI);
     339             :   }
     340             : 
     341         943 :   if (ClauseDefs.none())
     342             :     return 0;
     343             : 
     344             :   // We need to make sure not to put loads and stores in the same clause if they
     345             :   // use the same address. For now, just start a new clause whenever we see a
     346             :   // store.
     347         365 :   if (MEM->mayStore())
     348             :     return 1;
     349             : 
     350         254 :   addClauseInst(*MEM);
     351             : 
     352             :   // If the set of defs and uses intersect then we cannot add this instruction
     353             :   // to the clause, so we have a hazard.
     354         254 :   return ClauseDefs.anyCommon(ClauseUses) ? 1 : 0;
     355             : }
     356             : 
     357       79523 : int GCNHazardRecognizer::checkSMRDHazards(MachineInstr *SMRD) {
     358       79523 :   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
     359             :   int WaitStatesNeeded = 0;
     360             : 
     361       79523 :   WaitStatesNeeded = checkSoftClauseHazards(SMRD);
     362             : 
     363             :   // This SMRD hazard only affects SI.
     364       79523 :   if (ST.getGeneration() != SISubtarget::SOUTHERN_ISLANDS)
     365             :     return WaitStatesNeeded;
     366             : 
     367             :   // A read of an SGPR by SMRD instruction requires 4 wait states when the
     368             :   // SGPR was written by a VALU instruction.
     369             :   int SmrdSgprWaitStates = 4;
     370       20087 :   auto IsHazardDefFn = [this] (MachineInstr *MI) { return TII.isVALU(*MI); };
     371       20087 :   auto IsBufferHazardDefFn = [this] (MachineInstr *MI) { return TII.isSALU(*MI); };
     372             : 
     373       20087 :   bool IsBufferSMRD = TII.isBufferSMRD(*SMRD);
     374             : 
     375      140557 :   for (const MachineOperand &Use : SMRD->uses()) {
     376       60235 :     if (!Use.isReg())
     377       40095 :       continue;
     378             :     int WaitStatesNeededForUse =
     379       20140 :         SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn);
     380       20140 :     WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
     381             : 
     382             :     // This fixes what appears to be undocumented hardware behavior in SI where
     383             :     // s_mov writing a descriptor and s_buffer_load_dword reading the descriptor
     384             :     // needs some number of nops in between. We don't know how many we need, but
     385             :     // let's use 4. This wasn't discovered before probably because the only
     386             :     // case when this happens is when we expand a 64-bit pointer into a full
     387             :     // descriptor and use s_buffer_load_dword instead of s_load_dword, which was
     388             :     // probably never encountered in the closed-source land.
     389       20140 :     if (IsBufferSMRD) {
     390             :       int WaitStatesNeededForUse =
     391         374 :         SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(),
     392         187 :                                                    IsBufferHazardDefFn);
     393         187 :       WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
     394             :     }
     395             :   }
     396             : 
     397       20087 :   return WaitStatesNeeded;
     398             : }
     399             : 
     400      110801 : int GCNHazardRecognizer::checkVMEMHazards(MachineInstr* VMEM) {
     401      110801 :   if (ST.getGeneration() < SISubtarget::VOLCANIC_ISLANDS)
     402             :     return 0;
     403             : 
     404       65279 :   int WaitStatesNeeded = checkSoftClauseHazards(VMEM);
     405             : 
     406             :   // A read of an SGPR by a VMEM instruction requires 5 wait states when the
     407             :   // SGPR was written by a VALU Instruction.
     408             :   const int VmemSgprWaitStates = 5;
     409       65279 :   auto IsHazardDefFn = [this] (MachineInstr *MI) { return TII.isVALU(*MI); };
     410             : 
     411     1015375 :   for (const MachineOperand &Use : VMEM->uses()) {
     412      475048 :     if (!Use.isReg() || TRI.isVGPR(MF.getRegInfo(), Use.getReg()))
     413      334673 :       continue;
     414             : 
     415             :     int WaitStatesNeededForUse =
     416      140375 :         VmemSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn);
     417      140375 :     WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
     418             :   }
     419       65279 :   return WaitStatesNeeded;
     420             : }
     421             : 
     422          50 : int GCNHazardRecognizer::checkDPPHazards(MachineInstr *DPP) {
     423          50 :   const SIRegisterInfo *TRI = ST.getRegisterInfo();
     424             :   const SIInstrInfo *TII = ST.getInstrInfo();
     425             : 
     426             :   // Check for DPP VGPR read after VALU VGPR write and EXEC write.
     427             :   int DppVgprWaitStates = 2;
     428             :   int DppExecWaitStates = 5;
     429          50 :   int WaitStatesNeeded = 0;
     430          50 :   auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); };
     431             : 
     432         750 :   for (const MachineOperand &Use : DPP->uses()) {
     433         350 :     if (!Use.isReg() || !TRI->isVGPR(MF.getRegInfo(), Use.getReg()))
     434         250 :       continue;
     435             :     int WaitStatesNeededForUse =
     436         100 :         DppVgprWaitStates - getWaitStatesSinceDef(Use.getReg());
     437         100 :     WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
     438             :   }
     439             : 
     440          50 :   WaitStatesNeeded = std::max(
     441             :       WaitStatesNeeded,
     442         100 :       DppExecWaitStates - getWaitStatesSinceDef(AMDGPU::EXEC, IsHazardDefFn));
     443             : 
     444          50 :   return WaitStatesNeeded;
     445             : }
     446             : 
     447         353 : int GCNHazardRecognizer::checkDivFMasHazards(MachineInstr *DivFMas) {
     448         353 :   const SIInstrInfo *TII = ST.getInstrInfo();
     449             : 
     450             :   // v_div_fmas requires 4 wait states after a write to vcc from a VALU
     451             :   // instruction.
     452             :   const int DivFMasWaitStates = 4;
     453         353 :   auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); };
     454         353 :   int WaitStatesNeeded = getWaitStatesSinceDef(AMDGPU::VCC, IsHazardDefFn);
     455             : 
     456         353 :   return DivFMasWaitStates - WaitStatesNeeded;
     457             : }
     458             : 
     459          56 : int GCNHazardRecognizer::checkGetRegHazards(MachineInstr *GetRegInstr) {
     460          56 :   const SIInstrInfo *TII = ST.getInstrInfo();
     461             :   unsigned GetRegHWReg = getHWReg(TII, *GetRegInstr);
     462             : 
     463             :   const int GetRegWaitStates = 2;
     464             :   auto IsHazardFn = [TII, GetRegHWReg] (MachineInstr *MI) {
     465             :     return GetRegHWReg == getHWReg(TII, *MI);
     466          72 :   };
     467          56 :   int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn);
     468             : 
     469          56 :   return GetRegWaitStates - WaitStatesNeeded;
     470             : }
     471             : 
     472         138 : int GCNHazardRecognizer::checkSetRegHazards(MachineInstr *SetRegInstr) {
     473         138 :   const SIInstrInfo *TII = ST.getInstrInfo();
     474             :   unsigned HWReg = getHWReg(TII, *SetRegInstr);
     475             : 
     476             :   const int SetRegWaitStates =
     477         138 :       ST.getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS ? 1 : 2;
     478             :   auto IsHazardFn = [TII, HWReg] (MachineInstr *MI) {
     479             :     return HWReg == getHWReg(TII, *MI);
     480         186 :   };
     481         138 :   int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn);
     482         138 :   return SetRegWaitStates - WaitStatesNeeded;
     483             : }
     484             : 
     485      978427 : int GCNHazardRecognizer::createsVALUHazard(const MachineInstr &MI) {
     486      978427 :   if (!MI.mayStore())
     487             :     return -1;
     488             : 
     489       40719 :   const SIInstrInfo *TII = ST.getInstrInfo();
     490       40719 :   unsigned Opcode = MI.getOpcode();
     491             :   const MCInstrDesc &Desc = MI.getDesc();
     492             : 
     493       40719 :   int VDataIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata);
     494             :   int VDataRCID = -1;
     495       40719 :   if (VDataIdx != -1)
     496       31311 :     VDataRCID = Desc.OpInfo[VDataIdx].RegClass;
     497             : 
     498       59727 :   if (TII->isMUBUF(MI) || TII->isMTBUF(MI)) {
     499             :     // There is no hazard if the instruction does not use vector regs
     500             :     // (like wbinvl1)
     501       21711 :     if (VDataIdx == -1)
     502             :       return -1;
     503             :     // For MUBUF/MTBUF instructions this hazard only exists if the
     504             :     // instruction is not using a register in the soffset field.
     505             :     const MachineOperand *SOffset =
     506             :         TII->getNamedOperand(MI, AMDGPU::OpName::soffset);
     507             :     // If we have no soffset operand, then assume this field has been
     508             :     // hardcoded to zero.
     509       21705 :     if (AMDGPU::getRegBitWidth(VDataRCID) > 64 &&
     510       10117 :         (!SOffset || !SOffset->isReg()))
     511             :       return VDataIdx;
     512             :   }
     513             : 
     514             :   // MIMG instructions create a hazard if they don't use a 256-bit T# and
     515             :   // the store size is greater than 8 bytes and they have more than two bits
     516             :   // of their dmask set.
     517             :   // All our MIMG definitions use a 256-bit T#, so we can skip checking for them.
     518             :   if (TII->isMIMG(MI)) {
     519             :     int SRsrcIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::srsrc);
     520             :     assert(SRsrcIdx != -1 &&
     521             :            AMDGPU::getRegBitWidth(Desc.OpInfo[SRsrcIdx].RegClass) == 256);
     522             :     (void)SRsrcIdx;
     523             :   }
     524             : 
     525       30671 :   if (TII->isFLAT(MI)) {
     526        9559 :     int DataIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata);
     527        9559 :     if (AMDGPU::getRegBitWidth(Desc.OpInfo[DataIdx].RegClass) > 64)
     528             :       return DataIdx;
     529             :   }
     530             : 
     531             :   return -1;
     532             : }
     533             : 
     534      247799 : int GCNHazardRecognizer::checkVALUHazardsHelper(const MachineOperand &Def,
     535             :                                                 const MachineRegisterInfo &MRI) {
     536             :   // Helper to check for the hazard where VMEM instructions that store more than
     537             :   // 8 bytes can have there store data over written by the next instruction.
     538      247799 :   const SIRegisterInfo *TRI = ST.getRegisterInfo();
     539             : 
     540             :   const int VALUWaitStates = 1;
     541      247799 :   int WaitStatesNeeded = 0;
     542             : 
     543      247799 :   if (!TRI->isVGPR(MRI, Def.getReg()))
     544             :     return WaitStatesNeeded;
     545      218095 :   unsigned Reg = Def.getReg();
     546     1012225 :   auto IsHazardFn = [this, Reg, TRI] (MachineInstr *MI) {
     547      978427 :     int DataIdx = createsVALUHazard(*MI);
     548      995326 :     return DataIdx >= 0 &&
     549     1029124 :     TRI->regsOverlap(MI->getOperand(DataIdx).getReg(), Reg);
     550      218095 :   };
     551             :   int WaitStatesNeededForDef =
     552      218095 :     VALUWaitStates - getWaitStatesSince(IsHazardFn);
     553      218095 :   WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef);
     554             : 
     555      218095 :   return WaitStatesNeeded;
     556             : }
     557             : 
     558      289727 : int GCNHazardRecognizer::checkVALUHazards(MachineInstr *VALU) {
     559             :   // This checks for the hazard where VMEM instructions that store more than
     560             :   // 8 bytes can have there store data over written by the next instruction.
     561      579454 :   if (!ST.has12DWordStoreHazard())
     562             :     return 0;
     563             : 
     564      225650 :   const MachineRegisterInfo &MRI = MF.getRegInfo();
     565      225650 :   int WaitStatesNeeded = 0;
     566             : 
     567      718692 :   for (const MachineOperand &Def : VALU->defs()) {
     568      493042 :     WaitStatesNeeded = std::max(WaitStatesNeeded, checkVALUHazardsHelper(Def, MRI));
     569             :   }
     570             : 
     571      225650 :   return WaitStatesNeeded;
     572             : }
     573             : 
     574        2487 : int GCNHazardRecognizer::checkInlineAsmHazards(MachineInstr *IA) {
     575             :   // This checks for hazards associated with inline asm statements.
     576             :   // Since inline asms can contain just about anything, we use this
     577             :   // to call/leverage other check*Hazard routines. Note that
     578             :   // this function doesn't attempt to address all possible inline asm
     579             :   // hazards (good luck), but is a collection of what has been
     580             :   // problematic thus far.
     581             : 
     582             :   // see checkVALUHazards()
     583        4974 :   if (!ST.has12DWordStoreHazard())
     584             :     return 0;
     585             : 
     586        1943 :   const MachineRegisterInfo &MRI = MF.getRegInfo();
     587        1943 :   int WaitStatesNeeded = 0;
     588             : 
     589        6283 :   for (unsigned I = InlineAsm::MIOp_FirstOperand, E = IA->getNumOperands();
     590        6283 :        I != E; ++I) {
     591        4340 :     const MachineOperand &Op = IA->getOperand(I);
     592        6504 :     if (Op.isReg() && Op.isDef()) {
     593        2556 :       WaitStatesNeeded = std::max(WaitStatesNeeded, checkVALUHazardsHelper(Op, MRI));
     594             :     }
     595             :   }
     596             : 
     597        1943 :   return WaitStatesNeeded;
     598             : }
     599             : 
     600          35 : int GCNHazardRecognizer::checkRWLaneHazards(MachineInstr *RWLane) {
     601          35 :   const SIInstrInfo *TII = ST.getInstrInfo();
     602             :   const SIRegisterInfo *TRI = ST.getRegisterInfo();
     603          35 :   const MachineRegisterInfo &MRI = MF.getRegInfo();
     604             : 
     605          35 :   const MachineOperand *LaneSelectOp =
     606             :       TII->getNamedOperand(*RWLane, AMDGPU::OpName::src1);
     607             : 
     608          35 :   if (!LaneSelectOp->isReg() || !TRI->isSGPRReg(MRI, LaneSelectOp->getReg()))
     609             :     return 0;
     610             : 
     611          32 :   unsigned LaneSelectReg = LaneSelectOp->getReg();
     612             :   auto IsHazardFn = [TII] (MachineInstr *MI) {
     613             :     return TII->isVALU(*MI);
     614          32 :   };
     615             : 
     616             :   const int RWLaneWaitStates = 4;
     617          32 :   int WaitStatesSince = getWaitStatesSinceDef(LaneSelectReg, IsHazardFn);
     618          32 :   return RWLaneWaitStates - WaitStatesSince;
     619             : }
     620             : 
     621           8 : int GCNHazardRecognizer::checkRFEHazards(MachineInstr *RFE) {
     622           8 :   if (ST.getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS)
     623             :     return 0;
     624             : 
     625             :   const SIInstrInfo *TII = ST.getInstrInfo();
     626             : 
     627             :   const int RFEWaitStates = 1;
     628             : 
     629           6 :   auto IsHazardFn = [TII] (MachineInstr *MI) {
     630             :     return getHWReg(TII, *MI) == AMDGPU::Hwreg::ID_TRAPSTS;
     631          10 :   };
     632           4 :   int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn);
     633           4 :   return RFEWaitStates - WaitStatesNeeded;
     634             : }
     635             : 
     636      654637 : int GCNHazardRecognizer::checkAnyInstHazards(MachineInstr *MI) {
     637      654637 :   if (MI->isDebugValue())
     638             :     return 0;
     639             : 
     640      654616 :   const SIRegisterInfo *TRI = ST.getRegisterInfo();
     641      654616 :   if (!ST.hasSMovFedHazard())
     642             :     return 0;
     643             : 
     644             :   // Check for any instruction reading an SGPR after a write from
     645             :   // s_mov_fed_b32.
     646             :   int MovFedWaitStates = 1;
     647       97151 :   int WaitStatesNeeded = 0;
     648             : 
     649      835671 :   for (const MachineOperand &Use : MI->uses()) {
     650      369260 :     if (!Use.isReg() || TRI->isVGPR(MF.getRegInfo(), Use.getReg()))
     651      210767 :       continue;
     652             :     auto IsHazardFn = [] (MachineInstr *MI) {
     653      657902 :       return MI->getOpcode() == AMDGPU::S_MOV_FED_B32;
     654      657902 :     };
     655             :     int WaitStatesNeededForUse =
     656      158493 :         MovFedWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardFn);
     657      158493 :     WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
     658             :   }
     659             : 
     660       97151 :   return WaitStatesNeeded;
     661             : }
     662             : 
     663          95 : int GCNHazardRecognizer::checkReadM0Hazards(MachineInstr *MI) {
     664          95 :   const SIInstrInfo *TII = ST.getInstrInfo();
     665             :   const int SMovRelWaitStates = 1;
     666             :   auto IsHazardFn = [TII] (MachineInstr *MI) {
     667             :     return TII->isSALU(*MI);
     668          95 :   };
     669          95 :   return SMovRelWaitStates - getWaitStatesSinceDef(AMDGPU::M0, IsHazardFn);
     670             : }

Generated by: LCOV version 1.13