LCOV - code coverage report
Current view: top level - lib/Target/AMDGPU - R600EmitClauseMarkers.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 84 124 67.7 %
Date: 2018-10-20 13:21:21 Functions: 9 13 69.2 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===-- R600EmitClauseMarkers.cpp - Emit CF_ALU ---------------------------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : /// \file
      11             : /// Add CF_ALU. R600 Alu instructions are grouped in clause which can hold
      12             : /// 128 Alu instructions ; these instructions can access up to 4 prefetched
      13             : /// 4 lines of 16 registers from constant buffers. Such ALU clauses are
      14             : /// initiated by CF_ALU instructions.
      15             : //===----------------------------------------------------------------------===//
      16             : 
      17             : #include "AMDGPU.h"
      18             : #include "AMDGPUSubtarget.h"
      19             : #include "R600Defines.h"
      20             : #include "R600InstrInfo.h"
      21             : #include "R600RegisterInfo.h"
      22             : #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
      23             : #include "llvm/ADT/SmallVector.h"
      24             : #include "llvm/ADT/StringRef.h"
      25             : #include "llvm/CodeGen/MachineBasicBlock.h"
      26             : #include "llvm/CodeGen/MachineFunction.h"
      27             : #include "llvm/CodeGen/MachineFunctionPass.h"
      28             : #include "llvm/CodeGen/MachineInstr.h"
      29             : #include "llvm/CodeGen/MachineInstrBuilder.h"
      30             : #include "llvm/CodeGen/MachineOperand.h"
      31             : #include "llvm/Pass.h"
      32             : #include "llvm/Support/ErrorHandling.h"
      33             : #include <cassert>
      34             : #include <cstdint>
      35             : #include <utility>
      36             : #include <vector>
      37             : 
      38             : using namespace llvm;
      39             : 
      40             : namespace llvm {
      41             : 
      42             :   void initializeR600EmitClauseMarkersPass(PassRegistry&);
      43             : 
      44             : } // end namespace llvm
      45             : 
      46             : namespace {
      47             : 
      48             : class R600EmitClauseMarkers : public MachineFunctionPass {
      49             : private:
      50             :   const R600InstrInfo *TII = nullptr;
      51             :   int Address = 0;
      52             : 
      53           0 :   unsigned OccupiedDwords(MachineInstr &MI) const {
      54           0 :     switch (MI.getOpcode()) {
      55             :     case R600::INTERP_PAIR_XY:
      56             :     case R600::INTERP_PAIR_ZW:
      57             :     case R600::INTERP_VEC_LOAD:
      58             :     case R600::DOT_4:
      59             :       return 4;
      60           0 :     case R600::KILL:
      61           0 :       return 0;
      62             :     default:
      63             :       break;
      64             :     }
      65             : 
      66             :     // These will be expanded to two ALU instructions in the
      67             :     // ExpandSpecialInstructions pass.
      68           0 :     if (TII->isLDSRetInstr(MI.getOpcode()))
      69           0 :       return 2;
      70             : 
      71           0 :     if (TII->isVector(MI) || TII->isCubeOp(MI.getOpcode()) ||
      72           0 :         TII->isReductionOp(MI.getOpcode()))
      73           0 :       return 4;
      74             : 
      75             :     unsigned NumLiteral = 0;
      76           0 :     for (MachineInstr::mop_iterator It = MI.operands_begin(),
      77           0 :                                     E = MI.operands_end();
      78           0 :          It != E; ++It) {
      79             :       MachineOperand &MO = *It;
      80           0 :       if (MO.isReg() && MO.getReg() == R600::ALU_LITERAL_X)
      81           0 :         ++NumLiteral;
      82             :     }
      83           0 :     return 1 + NumLiteral;
      84             :   }
      85             : 
      86           0 :   bool isALU(const MachineInstr &MI) const {
      87           0 :     if (TII->isALUInstr(MI.getOpcode()))
      88           0 :       return true;
      89           0 :     if (TII->isVector(MI) || TII->isCubeOp(MI.getOpcode()))
      90           0 :       return true;
      91           0 :     switch (MI.getOpcode()) {
      92             :     case R600::PRED_X:
      93             :     case R600::INTERP_PAIR_XY:
      94             :     case R600::INTERP_PAIR_ZW:
      95             :     case R600::INTERP_VEC_LOAD:
      96             :     case R600::COPY:
      97             :     case R600::DOT_4:
      98             :       return true;
      99           0 :     default:
     100           0 :       return false;
     101             :     }
     102             :   }
     103             : 
     104           0 :   bool IsTrivialInst(MachineInstr &MI) const {
     105      105746 :     switch (MI.getOpcode()) {
     106             :     case R600::KILL:
     107             :     case R600::RETURN:
     108             :     case R600::IMPLICIT_DEF:
     109             :       return true;
     110           0 :     default:
     111           0 :       return false;
     112             :     }
     113             :   }
     114             : 
     115           0 :   std::pair<unsigned, unsigned> getAccessedBankLine(unsigned Sel) const {
     116             :     // Sel is (512 + (kc_bank << 12) + ConstIndex) << 2
     117             :     // (See also R600ISelLowering.cpp)
     118             :     // ConstIndex value is in [0, 4095];
     119             :     return std::pair<unsigned, unsigned>(
     120       12190 :         ((Sel >> 2) - 512) >> 12, // KC_BANK
     121             :         // Line Number of ConstIndex
     122             :         // A line contains 16 constant registers however KCX bank can lock
     123             :         // two line at the same time ; thus we want to get an even line number.
     124             :         // Line number can be retrieved with (>>4), using (>>5) <<1 generates
     125             :         // an even number.
     126       12190 :         ((((Sel >> 2) - 512) & 4095) >> 5) << 1);
     127             :   }
     128             : 
     129             :   bool
     130       49441 :   SubstituteKCacheBank(MachineInstr &MI,
     131             :                        std::vector<std::pair<unsigned, unsigned>> &CachedConsts,
     132             :                        bool UpdateInstr = true) const {
     133             :     std::vector<std::pair<unsigned, unsigned>> UsedKCache;
     134             : 
     135       98882 :     if (!TII->isALUInstr(MI.getOpcode()) && MI.getOpcode() != R600::DOT_4)
     136             :       return true;
     137             : 
     138             :     const SmallVectorImpl<std::pair<MachineOperand *, int64_t>> &Consts =
     139       49439 :         TII->getSrcs(MI);
     140             :     assert(
     141             :         (TII->isALUInstr(MI.getOpcode()) || MI.getOpcode() == R600::DOT_4) &&
     142             :         "Can't assign Const");
     143      145369 :     for (unsigned i = 0, n = Consts.size(); i < n; ++i) {
     144      191860 :       if (Consts[i].first->getReg() != R600::ALU_CONST)
     145             :         continue;
     146       12190 :       unsigned Sel = Consts[i].second;
     147       12190 :       unsigned Chan = Sel & 3, Index = ((Sel >> 2) - 512) & 31;
     148       12190 :       unsigned KCacheIndex = Index * 4 + Chan;
     149       12190 :       const std::pair<unsigned, unsigned> &BankLine = getAccessedBankLine(Sel);
     150       12190 :       if (CachedConsts.empty()) {
     151        3319 :         CachedConsts.push_back(BankLine);
     152        3319 :         UsedKCache.push_back(std::pair<unsigned, unsigned>(0, KCacheIndex));
     153        3319 :         continue;
     154             :       }
     155        8871 :       if (CachedConsts[0] == BankLine) {
     156        8871 :         UsedKCache.push_back(std::pair<unsigned, unsigned>(0, KCacheIndex));
     157        8871 :         continue;
     158             :       }
     159           0 :       if (CachedConsts.size() == 1) {
     160           0 :         CachedConsts.push_back(BankLine);
     161           0 :         UsedKCache.push_back(std::pair<unsigned, unsigned>(1, KCacheIndex));
     162           0 :         continue;
     163             :       }
     164           0 :       if (CachedConsts[1] == BankLine) {
     165           0 :         UsedKCache.push_back(std::pair<unsigned, unsigned>(1, KCacheIndex));
     166           0 :         continue;
     167             :       }
     168             :       return false;
     169             :     }
     170             : 
     171       49439 :     if (!UpdateInstr)
     172             :       return true;
     173             : 
     174      144201 :     for (unsigned i = 0, j = 0, n = Consts.size(); i < n; ++i) {
     175      190692 :       if (Consts[i].first->getReg() != R600::ALU_CONST)
     176             :         continue;
     177       24380 :       switch(UsedKCache[j].first) {
     178       12190 :       case 0:
     179       24380 :         Consts[i].first->setReg(
     180             :             R600::R600_KC0RegClass.getRegister(UsedKCache[j].second));
     181       12190 :         break;
     182           0 :       case 1:
     183           0 :         Consts[i].first->setReg(
     184             :             R600::R600_KC1RegClass.getRegister(UsedKCache[j].second));
     185           0 :         break;
     186           0 :       default:
     187           0 :         llvm_unreachable("Wrong Cache Line");
     188             :       }
     189       12190 :       j++;
     190             :     }
     191             :     return true;
     192             :   }
     193             : 
     194       48857 :   bool canClauseLocalKillFitInClause(
     195             :                         unsigned AluInstCount,
     196             :                         std::vector<std::pair<unsigned, unsigned>> KCacheBanks,
     197             :                         MachineBasicBlock::iterator Def,
     198             :                         MachineBasicBlock::iterator BBEnd) {
     199       48857 :     const R600RegisterInfo &TRI = TII->getRegisterInfo();
     200             :     //TODO: change this to defs?
     201      904995 :     for (MachineInstr::const_mop_iterator
     202       48857 :            MOI = Def->operands_begin(),
     203      953852 :            MOE = Def->operands_end(); MOI != MOE; ++MOI) {
     204      951569 :       if (!MOI->isReg() || !MOI->isDef() ||
     205       46282 :           TRI.isPhysRegLiveAcrossClauses(MOI->getReg()))
     206             :         continue;
     207             : 
     208             :       // Def defines a clause local register, so check that its use will fit
     209             :       // in the clause.
     210             :       unsigned LastUseCount = 0;
     211         584 :       for (MachineBasicBlock::iterator UseI = Def; UseI != BBEnd; ++UseI) {
     212         584 :         AluInstCount += OccupiedDwords(*UseI);
     213             :         // Make sure we won't need to end the clause due to KCache limitations.
     214         584 :         if (!SubstituteKCacheBank(*UseI, KCacheBanks, false))
     215             :           return false;
     216             : 
     217             :         // We have reached the maximum instruction limit before finding the
     218             :         // use that kills this register, so we cannot use this def in the
     219             :         // current clause.
     220         584 :         if (AluInstCount >= TII->getMaxAlusPerClause())
     221             :           return false;
     222             : 
     223             :         // TODO: Is this true? kill flag appears to work OK below
     224             :         // Register kill flags have been cleared by the time we get to this
     225             :         // pass, but it is safe to assume that all uses of this register
     226             :         // occur in the same basic block as its definition, because
     227             :         // it is illegal for the scheduler to schedule them in
     228             :         // different blocks.
     229         584 :         if (UseI->readsRegister(MOI->getReg()))
     230             :           LastUseCount = AluInstCount;
     231             : 
     232             :         // Exit early if the current use kills the register
     233         584 :         if (UseI != Def && UseI->killsRegister(MOI->getReg()))
     234             :           break;
     235             :       }
     236         292 :       if (LastUseCount)
     237         292 :         return LastUseCount <= TII->getMaxAlusPerClause();
     238           0 :       llvm_unreachable("Clause local register live at end of clause.");
     239             :     }
     240             :     return true;
     241             :   }
     242             : 
     243             :   MachineBasicBlock::iterator
     244        3962 :   MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) {
     245             :     MachineBasicBlock::iterator ClauseHead = I;
     246             :     std::vector<std::pair<unsigned, unsigned>> KCacheBanks;
     247             :     bool PushBeforeModifier = false;
     248             :     unsigned AluInstCount = 0;
     249       53205 :     for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) {
     250             :       if (IsTrivialInst(*I))
     251             :         continue;
     252       52573 :       if (!isALU(*I))
     253             :         break;
     254       49254 :       if (AluInstCount > TII->getMaxAlusPerClause())
     255             :         break;
     256       98060 :       if (I->getOpcode() == R600::PRED_X) {
     257             :         // We put PRED_X in its own clause to ensure that ifcvt won't create
     258             :         // clauses with more than 128 insts.
     259             :         // IfCvt is indeed checking that "then" and "else" branches of an if
     260             :         // statement have less than ~60 insts thus converted clauses can't be
     261             :         // bigger than ~121 insts (predicate setter needs to be in the same
     262             :         // clause as predicated alus).
     263         169 :         if (AluInstCount > 0)
     264             :           break;
     265          86 :         if (TII->getFlagOp(*I).getImm() & MO_FLAG_PUSH)
     266             :           PushBeforeModifier = true;
     267             :         AluInstCount ++;
     268          86 :         continue;
     269             :       }
     270             :       // XXX: GROUP_BARRIER instructions cannot be in the same ALU clause as:
     271             :       //
     272             :       // * KILL or INTERP instructions
     273             :       // * Any instruction that sets UPDATE_EXEC_MASK or UPDATE_PRED bits
     274             :       // * Uses waterfalling (i.e. INDEX_MODE = AR.X)
     275             :       //
     276             :       // XXX: These checks have not been implemented yet.
     277       48861 :       if (TII->mustBeLastInClause(I->getOpcode())) {
     278             :         I++;
     279             :         break;
     280             :       }
     281             : 
     282             :       // If this instruction defines a clause local register, make sure
     283             :       // its use can fit in this clause.
     284       97714 :       if (!canClauseLocalKillFitInClause(AluInstCount, KCacheBanks, I, E))
     285             :         break;
     286             : 
     287       48857 :       if (!SubstituteKCacheBank(*I, KCacheBanks))
     288             :         break;
     289       48857 :       AluInstCount += OccupiedDwords(*I);
     290             :     }
     291        3962 :     unsigned Opcode = PushBeforeModifier ?
     292             :         R600::CF_ALU_PUSH_BEFORE : R600::CF_ALU;
     293        3962 :     BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead), TII->get(Opcode))
     294             :     // We don't use the ADDR field until R600ControlFlowFinalizer pass, where
     295             :     // it is safe to assume it is 0. However if we always put 0 here, the ifcvt
     296             :     // pass may assume that identical ALU clause starter at the beginning of a
     297             :     // true and false branch can be factorized which is not the case.
     298        3962 :         .addImm(Address++) // ADDR
     299        3962 :         .addImm(KCacheBanks.empty()?0:KCacheBanks[0].first) // KB0
     300        7924 :         .addImm((KCacheBanks.size() < 2)?0:KCacheBanks[1].first) // KB1
     301        3962 :         .addImm(KCacheBanks.empty()?0:2) // KM0
     302        7924 :         .addImm((KCacheBanks.size() < 2)?0:2) // KM1
     303        3962 :         .addImm(KCacheBanks.empty()?0:KCacheBanks[0].second) // KLINE0
     304        7924 :         .addImm((KCacheBanks.size() < 2)?0:KCacheBanks[1].second) // KLINE1
     305        3962 :         .addImm(AluInstCount) // COUNT
     306             :         .addImm(1); // Enabled
     307        3962 :     return I;
     308             :   }
     309             : 
     310             : public:
     311             :   static char ID;
     312             : 
     313         282 :   R600EmitClauseMarkers() : MachineFunctionPass(ID) {
     314         282 :     initializeR600EmitClauseMarkersPass(*PassRegistry::getPassRegistry());
     315         282 :   }
     316             : 
     317        2297 :   bool runOnMachineFunction(MachineFunction &MF) override {
     318        2297 :     const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>();
     319        2297 :     TII = ST.getInstrInfo();
     320             : 
     321             :     for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
     322        4765 :                                                     BB != BB_E; ++BB) {
     323             :       MachineBasicBlock &MBB = *BB;
     324             :       MachineBasicBlock::iterator I = MBB.begin();
     325        2468 :       if (I != MBB.end() && I->getOpcode() == R600::CF_ALU)
     326             :         continue; // BB was already parsed
     327       13878 :       for (MachineBasicBlock::iterator E = MBB.end(); I != E;) {
     328       11410 :         if (isALU(*I)) {
     329        3962 :           auto next = MakeALUClause(MBB, I);
     330             :           assert(next != I);
     331        3962 :           I = next;
     332             :         } else
     333             :           ++I;
     334             :       }
     335             :     }
     336        2297 :     return false;
     337             :   }
     338             : 
     339         282 :   StringRef getPassName() const override {
     340         282 :     return "R600 Emit Clause Markers Pass";
     341             :   }
     342             : };
     343             : 
     344             : char R600EmitClauseMarkers::ID = 0;
     345             : 
     346             : } // end anonymous namespace
     347             : 
     348         282 : INITIALIZE_PASS_BEGIN(R600EmitClauseMarkers, "emitclausemarkers",
     349             :                       "R600 Emit Clause Markters", false, false)
     350         564 : INITIALIZE_PASS_END(R600EmitClauseMarkers, "emitclausemarkers",
     351             :                       "R600 Emit Clause Markters", false, false)
     352             : 
     353         282 : FunctionPass *llvm::createR600EmitClauseMarkers() {
     354         282 :   return new R600EmitClauseMarkers();
     355             : }

Generated by: LCOV version 1.13