LCOV - code coverage report
Current view: top level - lib/Target/AMDGPU - R600EmitClauseMarkers.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 123 140 87.9 %
Date: 2017-09-14 15:23:50 Functions: 12 13 92.3 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===-- R600EmitClauseMarkers.cpp - Emit CF_ALU ---------------------------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : /// \file
      11             : /// Add CF_ALU. R600 Alu instructions are grouped in clause which can hold
      12             : /// 128 Alu instructions ; these instructions can access up to 4 prefetched
      13             : /// 4 lines of 16 registers from constant buffers. Such ALU clauses are
      14             : /// initiated by CF_ALU instructions.
      15             : //===----------------------------------------------------------------------===//
      16             : 
      17             : #include "AMDGPU.h"
      18             : #include "AMDGPUSubtarget.h"
      19             : #include "R600Defines.h"
      20             : #include "R600InstrInfo.h"
      21             : #include "R600RegisterInfo.h"
      22             : #include "llvm/ADT/SmallVector.h"
      23             : #include "llvm/ADT/StringRef.h"
      24             : #include "llvm/CodeGen/MachineBasicBlock.h"
      25             : #include "llvm/CodeGen/MachineFunction.h"
      26             : #include "llvm/CodeGen/MachineFunctionPass.h"
      27             : #include "llvm/CodeGen/MachineInstr.h"
      28             : #include "llvm/CodeGen/MachineInstrBuilder.h"
      29             : #include "llvm/CodeGen/MachineOperand.h"
      30             : #include "llvm/Pass.h"
      31             : #include "llvm/Support/ErrorHandling.h"
      32             : #include <cassert>
      33             : #include <cstdint>
      34             : #include <utility>
      35             : #include <vector>
      36             : 
      37             : using namespace llvm;
      38             : 
      39             : namespace llvm {
      40             : 
      41             :   void initializeR600EmitClauseMarkersPass(PassRegistry&);
      42             : 
      43             : } // end namespace llvm
      44             : 
      45             : namespace {
      46             : 
      47         243 : class R600EmitClauseMarkers : public MachineFunctionPass {
      48             : private:
      49             :   const R600InstrInfo *TII = nullptr;
      50             :   int Address = 0;
      51             : 
      52       47247 :   unsigned OccupiedDwords(MachineInstr &MI) const {
      53       94494 :     switch (MI.getOpcode()) {
      54             :     case AMDGPU::INTERP_PAIR_XY:
      55             :     case AMDGPU::INTERP_PAIR_ZW:
      56             :     case AMDGPU::INTERP_VEC_LOAD:
      57             :     case AMDGPU::DOT_4:
      58             :       return 4;
      59           0 :     case AMDGPU::KILL:
      60             :       return 0;
      61             :     default:
      62             :       break;
      63             :     }
      64             : 
      65             :     // These will be expanded to two ALU instructions in the
      66             :     // ExpandSpecialInstructions pass.
      67       47215 :     if (TII->isLDSRetInstr(MI.getOpcode()))
      68             :       return 2;
      69             : 
      70      139136 :     if (TII->isVector(MI) || TII->isCubeOp(MI.getOpcode()) ||
      71       92730 :         TII->isReductionOp(MI.getOpcode()))
      72             :       return 4;
      73             : 
      74       46365 :     unsigned NumLiteral = 0;
      75      917315 :     for (MachineInstr::mop_iterator It = MI.operands_begin(),
      76       92730 :                                     E = MI.operands_end();
      77      917315 :          It != E; ++It) {
      78      870950 :       MachineOperand &MO = *It;
      79      870950 :       if (MO.isReg() && MO.getReg() == AMDGPU::ALU_LITERAL_X)
      80       20104 :         ++NumLiteral;
      81             :     }
      82       46365 :     return 1 + NumLiteral;
      83             :   }
      84             : 
      85       60546 :   bool isALU(const MachineInstr &MI) const {
      86      121092 :     if (TII->isALUInstr(MI.getOpcode()))
      87             :       return true;
      88       20076 :     if (TII->isVector(MI) || TII->isCubeOp(MI.getOpcode()))
      89             :       return true;
      90       20070 :     switch (MI.getOpcode()) {
      91             :     case AMDGPU::PRED_X:
      92             :     case AMDGPU::INTERP_PAIR_XY:
      93             :     case AMDGPU::INTERP_PAIR_ZW:
      94             :     case AMDGPU::INTERP_VEC_LOAD:
      95             :     case AMDGPU::COPY:
      96             :     case AMDGPU::DOT_4:
      97             :       return true;
      98        9749 :     default:
      99             :       return false;
     100             :     }
     101             :   }
     102             : 
     103             :   bool IsTrivialInst(MachineInstr &MI) const {
     104       50461 :     switch (MI.getOpcode()) {
     105             :     case AMDGPU::KILL:
     106             :     case AMDGPU::RETURN:
     107             :     case AMDGPU::IMPLICIT_DEF:
     108             :       return true;
     109             :     default:
     110             :       return false;
     111             :     }
     112             :   }
     113             : 
     114             :   std::pair<unsigned, unsigned> getAccessedBankLine(unsigned Sel) const {
     115             :     // Sel is (512 + (kc_bank << 12) + ConstIndex) << 2
     116             :     // (See also R600ISelLowering.cpp)
     117             :     // ConstIndex value is in [0, 4095];
     118       11586 :     return std::pair<unsigned, unsigned>(
     119       11586 :         ((Sel >> 2) - 512) >> 12, // KC_BANK
     120             :         // Line Number of ConstIndex
     121             :         // A line contains 16 constant registers however KCX bank can lock
     122             :         // two line at the same time ; thus we want to get an even line number.
     123             :         // Line number can be retrieved with (>>4), using (>>5) <<1 generates
     124             :         // an even number.
     125       11586 :         ((((Sel >> 2) - 512) & 4095) >> 5) << 1);
     126             :   }
     127             : 
     128             :   bool
     129       47247 :   SubstituteKCacheBank(MachineInstr &MI,
     130             :                        std::vector<std::pair<unsigned, unsigned>> &CachedConsts,
     131             :                        bool UpdateInstr = true) const {
     132       94494 :     std::vector<std::pair<unsigned, unsigned>> UsedKCache;
     133             : 
     134       94528 :     if (!TII->isALUInstr(MI.getOpcode()) && MI.getOpcode() != AMDGPU::DOT_4)
     135             :       return true;
     136             : 
     137             :     const SmallVectorImpl<std::pair<MachineOperand *, int64_t>> &Consts =
     138       47245 :         TII->getSrcs(MI);
     139             :     assert(
     140             :         (TII->isALUInstr(MI.getOpcode()) || MI.getOpcode() == AMDGPU::DOT_4) &&
     141             :         "Can't assign Const");
     142      186789 :     for (unsigned i = 0, n = Consts.size(); i < n; ++i) {
     143      184598 :       if (Consts[i].first->getReg() != AMDGPU::ALU_CONST)
     144      173012 :         continue;
     145       23172 :       unsigned Sel = Consts[i].second;
     146       11586 :       unsigned Chan = Sel & 3, Index = ((Sel >> 2) - 512) & 31;
     147       11586 :       unsigned KCacheIndex = Index * 4 + Chan;
     148       23172 :       const std::pair<unsigned, unsigned> &BankLine = getAccessedBankLine(Sel);
     149       14658 :       if (CachedConsts.empty()) {
     150        3072 :         CachedConsts.push_back(BankLine);
     151        9216 :         UsedKCache.push_back(std::pair<unsigned, unsigned>(0, KCacheIndex));
     152        3072 :         continue;
     153             :       }
     154       25542 :       if (CachedConsts[0] == BankLine) {
     155       25542 :         UsedKCache.push_back(std::pair<unsigned, unsigned>(0, KCacheIndex));
     156        8514 :         continue;
     157             :       }
     158           0 :       if (CachedConsts.size() == 1) {
     159           0 :         CachedConsts.push_back(BankLine);
     160           0 :         UsedKCache.push_back(std::pair<unsigned, unsigned>(1, KCacheIndex));
     161           0 :         continue;
     162             :       }
     163           0 :       if (CachedConsts[1] == BankLine) {
     164           0 :         UsedKCache.push_back(std::pair<unsigned, unsigned>(1, KCacheIndex));
     165           0 :         continue;
     166             :       }
     167             :       return false;
     168             :     }
     169             : 
     170       47245 :     if (!UpdateInstr)
     171             :       return true;
     172             : 
     173      185397 :     for (unsigned i = 0, j = 0, n = Consts.size(); i < n; ++i) {
     174      183670 :       if (Consts[i].first->getReg() != AMDGPU::ALU_CONST)
     175       80249 :         continue;
     176       23172 :       switch(UsedKCache[j].first) {
     177       11586 :       case 0:
     178       46344 :         Consts[i].first->setReg(
     179       11586 :             AMDGPU::R600_KC0RegClass.getRegister(UsedKCache[j].second));
     180       11586 :         break;
     181           0 :       case 1:
     182           0 :         Consts[i].first->setReg(
     183           0 :             AMDGPU::R600_KC1RegClass.getRegister(UsedKCache[j].second));
     184           0 :         break;
     185           0 :       default:
     186           0 :         llvm_unreachable("Wrong Cache Line");
     187             :       }
     188       11586 :       j++;
     189             :     }
     190             :     return true;
     191             :   }
     192             : 
     193       46783 :   bool canClauseLocalKillFitInClause(
     194             :                         unsigned AluInstCount,
     195             :                         std::vector<std::pair<unsigned, unsigned>> KCacheBanks,
     196             :                         MachineBasicBlock::iterator Def,
     197             :                         MachineBasicBlock::iterator BBEnd) {
     198       93566 :     const R600RegisterInfo &TRI = TII->getRegisterInfo();
     199             :     //TODO: change this to defs?
     200      869598 :     for (MachineInstr::const_mop_iterator
     201       46783 :            MOI = Def->operands_begin(),
     202       93566 :            MOE = Def->operands_end(); MOI != MOE; ++MOI) {
     203     1097149 :       if (!MOI->isReg() || !MOI->isDef() ||
     204       44301 :           TRI.isPhysRegLiveAcrossClauses(MOI->getReg()))
     205             :         continue;
     206             : 
     207             :       // Def defines a clause local register, so check that its use will fit
     208             :       // in the clause.
     209         232 :       unsigned LastUseCount = 0;
     210         696 :       for (MachineBasicBlock::iterator UseI = Def; UseI != BBEnd; ++UseI) {
     211         464 :         AluInstCount += OccupiedDwords(*UseI);
     212             :         // Make sure we won't need to end the clause due to KCache limitations.
     213         464 :         if (!SubstituteKCacheBank(*UseI, KCacheBanks, false))
     214           0 :           return false;
     215             : 
     216             :         // We have reached the maximum instruction limit before finding the
     217             :         // use that kills this register, so we cannot use this def in the
     218             :         // current clause.
     219         464 :         if (AluInstCount >= TII->getMaxAlusPerClause())
     220             :           return false;
     221             : 
     222             :         // TODO: Is this true? kill flag appears to work OK below
     223             :         // Register kill flags have been cleared by the time we get to this
     224             :         // pass, but it is safe to assume that all uses of this register
     225             :         // occur in the same basic block as its definition, because
     226             :         // it is illegal for the scheduler to schedule them in
     227             :         // different blocks.
     228         928 :         if (UseI->readsRegister(MOI->getReg()))
     229         232 :           LastUseCount = AluInstCount;
     230             : 
     231             :         // Exit early if the current use kills the register
     232         928 :         if (UseI != Def && UseI->killsRegister(MOI->getReg()))
     233             :           break;
     234             :       }
     235         232 :       if (LastUseCount)
     236         232 :         return LastUseCount <= TII->getMaxAlusPerClause();
     237           0 :       llvm_unreachable("Clause local register live at end of clause.");
     238             :     }
     239             :     return true;
     240             :   }
     241             : 
     242             :   MachineBasicBlock::iterator
     243        3623 :   MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) {
     244        3623 :     MachineBasicBlock::iterator ClauseHead = I;
     245        7246 :     std::vector<std::pair<unsigned, unsigned>> KCacheBanks;
     246        3623 :     bool PushBeforeModifier = false;
     247        3623 :     unsigned AluInstCount = 0;
     248       54407 :     for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) {
     249      100922 :       if (IsTrivialInst(*I))
     250         294 :         continue;
     251       50167 :       if (!isALU(*I))
     252             :         break;
     253       47174 :       if (AluInstCount > TII->getMaxAlusPerClause())
     254             :         break;
     255       93988 :       if (I->getOpcode() == AMDGPU::PRED_X) {
     256             :         // We put PRED_X in its own clause to ensure that ifcvt won't create
     257             :         // clauses with more than 128 insts.
     258             :         // IfCvt is indeed checking that "then" and "else" branches of an if
     259             :         // statement have less than ~60 insts thus converted clauses can't be
     260             :         // bigger than ~121 insts (predicate setter needs to be in the same
     261             :         // clause as predicated alus).
     262         165 :         if (AluInstCount > 0)
     263             :           break;
     264         168 :         if (TII->getFlagOp(*I).getImm() & MO_FLAG_PUSH)
     265          82 :           PushBeforeModifier = true;
     266          84 :         AluInstCount ++;
     267          84 :         continue;
     268             :       }
     269             :       // XXX: GROUP_BARRIER instructions cannot be in the same ALU clause as:
     270             :       //
     271             :       // * KILL or INTERP instructions
     272             :       // * Any instruction that sets UPDATE_EXEC_MASK or UPDATE_PRED bits
     273             :       // * Uses waterfalling (i.e. INDEX_MODE = AR.X)
     274             :       //
     275             :       // XXX: These checks have not been implemented yet.
     276       93574 :       if (TII->mustBeLastInClause(I->getOpcode())) {
     277           4 :         I++;
     278             :         break;
     279             :       }
     280             : 
     281             :       // If this instruction defines a clause local register, make sure
     282             :       // its use can fit in this clause.
     283       93566 :       if (!canClauseLocalKillFitInClause(AluInstCount, KCacheBanks, I, E))
     284             :         break;
     285             : 
     286       46783 :       if (!SubstituteKCacheBank(*I, KCacheBanks))
     287             :         break;
     288       46783 :       AluInstCount += OccupiedDwords(*I);
     289             :     }
     290        3623 :     unsigned Opcode = PushBeforeModifier ?
     291             :         AMDGPU::CF_ALU_PUSH_BEFORE : AMDGPU::CF_ALU;
     292       18115 :     BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead), TII->get(Opcode))
     293             :     // We don't use the ADDR field until R600ControlFlowFinalizer pass, where
     294             :     // it is safe to assume it is 0. However if we always put 0 here, the ifcvt
     295             :     // pass may assume that identical ALU clause starter at the beginning of a
     296             :     // true and false branch can be factorized which is not the case.
     297        7246 :         .addImm(Address++) // ADDR
     298        7246 :         .addImm(KCacheBanks.empty()?0:KCacheBanks[0].first) // KB0
     299       10869 :         .addImm((KCacheBanks.size() < 2)?0:KCacheBanks[1].first) // KB1
     300        7246 :         .addImm(KCacheBanks.empty()?0:2) // KM0
     301       10869 :         .addImm((KCacheBanks.size() < 2)?0:2) // KM1
     302        7246 :         .addImm(KCacheBanks.empty()?0:KCacheBanks[0].second) // KLINE0
     303       10869 :         .addImm((KCacheBanks.size() < 2)?0:KCacheBanks[1].second) // KLINE1
     304        7246 :         .addImm(AluInstCount) // COUNT
     305        3623 :         .addImm(1); // Enabled
     306        7246 :     return I;
     307             :   }
     308             : 
     309             : public:
     310             :   static char ID;
     311             : 
     312         244 :   R600EmitClauseMarkers() : MachineFunctionPass(ID) {
     313         244 :     initializeR600EmitClauseMarkersPass(*PassRegistry::getPassRegistry());
     314         244 :   }
     315             : 
     316        2057 :   bool runOnMachineFunction(MachineFunction &MF) override {
     317        2057 :     const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>();
     318        2057 :     TII = ST.getInstrInfo();
     319             : 
     320        4114 :     for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
     321        4277 :                                                     BB != BB_E; ++BB) {
     322        2220 :       MachineBasicBlock &MBB = *BB;
     323        2220 :       MachineBasicBlock::iterator I = MBB.begin();
     324        8874 :       if (I != MBB.end() && I->getOpcode() == AMDGPU::CF_ALU)
     325           0 :         continue; // BB was already parsed
     326       12599 :       for (MachineBasicBlock::iterator E = MBB.end(); I != E;) {
     327       10379 :         if (isALU(*I)) {
     328        3623 :           auto next = MakeALUClause(MBB, I);
     329             :           assert(next != I);
     330        3623 :           I = next;
     331             :         } else
     332             :           ++I;
     333             :       }
     334             :     }
     335        2057 :     return false;
     336             :   }
     337             : 
     338         244 :   StringRef getPassName() const override {
     339         244 :     return "R600 Emit Clause Markers Pass";
     340             :   }
     341             : };
     342             : 
     343             : char R600EmitClauseMarkers::ID = 0;
     344             : 
     345             : } // end anonymous namespace
     346             : 
     347         244 : INITIALIZE_PASS_BEGIN(R600EmitClauseMarkers, "emitclausemarkers",
     348             :                       "R600 Emit Clause Markters", false, false)
     349        1220 : INITIALIZE_PASS_END(R600EmitClauseMarkers, "emitclausemarkers",
     350             :                       "R600 Emit Clause Markters", false, false)
     351             : 
     352         244 : FunctionPass *llvm::createR600EmitClauseMarkers() {
     353         244 :   return new R600EmitClauseMarkers();
     354             : }

Generated by: LCOV version 1.13