LCOV - code coverage report
Current view: top level - lib/Target/AMDGPU - SIOptimizeExecMaskingPreRA.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 108 113 95.6 %
Date: 2017-09-14 15:23:50 Functions: 10 12 83.3 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===-- SIOptimizeExecMaskingPreRA.cpp ------------------------------------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : /// \file
      11             : /// \brief This pass removes redundant S_OR_B64 instructions enabling lanes in
      12             : /// the exec. If two SI_END_CF (lowered as S_OR_B64) come together without any
      13             : /// vector instructions between them we can only keep outer SI_END_CF, given
      14             : /// that CFG is structured and exec bits of the outer end statement are always
      15             : /// not less than exec bit of the inner one.
      16             : ///
      17             : /// This needs to be done before the RA to eliminate saved exec bits registers
      18             : /// but after register coalescer to have no vector registers copies in between
      19             : /// of different end cf statements.
      20             : ///
      21             : //===----------------------------------------------------------------------===//
      22             : 
      23             : #include "AMDGPU.h"
      24             : #include "AMDGPUSubtarget.h"
      25             : #include "SIInstrInfo.h"
      26             : #include "llvm/CodeGen/LiveIntervalAnalysis.h"
      27             : #include "llvm/CodeGen/MachineFunctionPass.h"
      28             : 
      29             : using namespace llvm;
      30             : 
      31             : #define DEBUG_TYPE "si-optimize-exec-masking-pre-ra"
      32             : 
      33             : namespace {
      34             : 
      35        1413 : class SIOptimizeExecMaskingPreRA : public MachineFunctionPass {
      36             : public:
      37             :   static char ID;
      38             : 
      39             : public:
      40        1421 :   SIOptimizeExecMaskingPreRA() : MachineFunctionPass(ID) {
      41        1421 :     initializeSIOptimizeExecMaskingPreRAPass(*PassRegistry::getPassRegistry());
      42        1421 :   }
      43             : 
      44             :   bool runOnMachineFunction(MachineFunction &MF) override;
      45             : 
      46        1421 :   StringRef getPassName() const override {
      47        1421 :     return "SI optimize exec mask operations pre-RA";
      48             :   }
      49             : 
      50        1421 :   void getAnalysisUsage(AnalysisUsage &AU) const override {
      51        1421 :     AU.addRequired<LiveIntervals>();
      52        1421 :     AU.setPreservesAll();
      53        1421 :     MachineFunctionPass::getAnalysisUsage(AU);
      54        1421 :   }
      55             : };
      56             : 
      57             : } // End anonymous namespace.
      58             : 
      59       53042 : INITIALIZE_PASS_BEGIN(SIOptimizeExecMaskingPreRA, DEBUG_TYPE,
      60             :                       "SI optimize exec mask operations pre-RA", false, false)
      61       53042 : INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
      62      316801 : INITIALIZE_PASS_END(SIOptimizeExecMaskingPreRA, DEBUG_TYPE,
      63             :                     "SI optimize exec mask operations pre-RA", false, false)
      64             : 
      65             : char SIOptimizeExecMaskingPreRA::ID = 0;
      66             : 
      67             : char &llvm::SIOptimizeExecMaskingPreRAID = SIOptimizeExecMaskingPreRA::ID;
      68             : 
      69           0 : FunctionPass *llvm::createSIOptimizeExecMaskingPreRAPass() {
      70           0 :   return new SIOptimizeExecMaskingPreRA();
      71             : }
      72             : 
      73        1042 : static bool isEndCF(const MachineInstr& MI, const SIRegisterInfo* TRI) {
      74        2179 :   return MI.getOpcode() == AMDGPU::S_OR_B64 &&
      75        1137 :          MI.modifiesRegister(AMDGPU::EXEC, TRI);
      76             : }
      77             : 
      78             : static bool isFullExecCopy(const MachineInstr& MI) {
      79           5 :   return MI.isFullCopy() && MI.getOperand(1).getReg() == AMDGPU::EXEC;
      80             : }
      81             : 
      82          10 : static unsigned getOrNonExecReg(const MachineInstr &MI,
      83             :                                 const SIInstrInfo &TII) {
      84          10 :   auto Op = TII.getNamedOperand(MI, AMDGPU::OpName::src1);
      85          10 :   if (Op->isReg() && Op->getReg() != AMDGPU::EXEC)
      86             :      return Op->getReg();
      87           0 :   Op = TII.getNamedOperand(MI, AMDGPU::OpName::src0);
      88           0 :   if (Op->isReg() && Op->getReg() != AMDGPU::EXEC)
      89             :      return Op->getReg();
      90             :   return AMDGPU::NoRegister;
      91             : }
      92             : 
      93           7 : static MachineInstr* getOrExecSource(const MachineInstr &MI,
      94             :                                      const SIInstrInfo &TII,
      95             :                                      const MachineRegisterInfo &MRI) {
      96           7 :   auto SavedExec = getOrNonExecReg(MI, TII);
      97           7 :   if (SavedExec == AMDGPU::NoRegister)
      98             :     return nullptr;
      99           7 :   auto SaveExecInst = MRI.getUniqueVRegDef(SavedExec);
     100           7 :   if (!SaveExecInst || !isFullExecCopy(*SaveExecInst))
     101             :     return nullptr;
     102             :   return SaveExecInst;
     103             : }
     104             : 
     105       14673 : bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) {
     106       14673 :   if (skipFunction(*MF.getFunction()))
     107             :     return false;
     108             : 
     109       14672 :   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
     110       14672 :   const SIRegisterInfo *TRI = ST.getRegisterInfo();
     111       14672 :   const SIInstrInfo *TII = ST.getInstrInfo();
     112       14672 :   MachineRegisterInfo &MRI = MF.getRegInfo();
     113       14672 :   LiveIntervals *LIS = &getAnalysis<LiveIntervals>();
     114       29344 :   DenseSet<unsigned> RecalcRegs({AMDGPU::EXEC_LO, AMDGPU::EXEC_HI});
     115       14672 :   bool Changed = false;
     116             : 
     117       60633 :   for (MachineBasicBlock &MBB : MF) {
     118             : 
     119             :     // Try to remove unneeded instructions before s_endpgm.
     120       16617 :     if (MBB.succ_empty()) {
     121       44028 :       if (MBB.empty() || MBB.back().getOpcode() != AMDGPU::S_ENDPGM)
     122         961 :         continue;
     123             : 
     124       27470 :       SmallVector<MachineBasicBlock*, 4> Blocks({&MBB});
     125             : 
     126       27673 :       while (!Blocks.empty()) {
     127       13938 :         auto CurBB = Blocks.pop_back_val();
     128       27876 :         auto I = CurBB->rbegin(), E = CurBB->rend();
     129       13938 :         if (I != E) {
     130       41709 :           if (I->isUnconditionalBranch() || I->getOpcode() == AMDGPU::S_ENDPGM)
     131             :             ++I;
     132         310 :           else if (I->isBranch())
     133       13310 :             continue;
     134             :         }
     135             : 
     136       14043 :         while (I != E) {
     137       26830 :           if (I->isDebugValue())
     138           0 :             continue;
     139       17097 :           if (I->mayStore() || I->isBarrier() || I->isCall() ||
     140       14290 :               I->hasUnmodeledSideEffects() || I->hasOrderedMemoryRef())
     141             :             break;
     142             : 
     143             :           DEBUG(dbgs() << "Removing no effect instruction: " << *I << '\n');
     144             : 
     145         614 :           for (auto &Op : I->operands()) {
     146         404 :             if (Op.isReg())
     147         750 :               RecalcRegs.insert(Op.getReg());
     148             :           }
     149             : 
     150         105 :           auto Next = std::next(I);
     151         210 :           LIS->RemoveMachineInstrFromMaps(*I);
     152         105 :           I->eraseFromParent();
     153         105 :           I = Next;
     154             : 
     155         105 :           Changed = true;
     156             :         }
     157             : 
     158       13938 :         if (I != E)
     159       13310 :           continue;
     160             : 
     161             :         // Try to ascend predecessors.
     162        1646 :         for (auto *Pred : CurBB->predecessors()) {
     163         780 :           if (Pred->succ_size() == 1)
     164         203 :             Blocks.push_back(Pred);
     165             :         }
     166             :       }
     167             :       continue;
     168             :     }
     169             : 
     170             :     // Try to collapse adjacent endifs.
     171        3842 :     auto Lead = MBB.begin(), E = MBB.end();
     172        5834 :     if (MBB.succ_size() != 1 || Lead == E || !isEndCF(*Lead, TRI))
     173        1830 :       continue;
     174             : 
     175          91 :     const MachineBasicBlock* Succ = *MBB.succ_begin();
     176         117 :     if (!MBB.isLayoutSuccessor(Succ))
     177          26 :       continue;
     178             : 
     179          65 :     auto I = std::next(Lead);
     180             : 
     181         195 :     for ( ; I != E; ++I)
     182         632 :       if (!TII->isSALU(*I) || I->readsRegister(AMDGPU::EXEC, TRI))
     183             :         break;
     184             : 
     185          65 :     if (I != E)
     186          55 :       continue;
     187             : 
     188          10 :     const auto NextLead = Succ->begin();
     189          41 :     if (NextLead == Succ->end() || !isEndCF(*NextLead, TRI) ||
     190           4 :         !getOrExecSource(*NextLead, *TII, MRI))
     191           7 :       continue;
     192             : 
     193             :     DEBUG(dbgs() << "Redundant EXEC = S_OR_B64 found: " << *Lead << '\n');
     194             : 
     195           3 :     auto SaveExec = getOrExecSource(*Lead, *TII, MRI);
     196           3 :     unsigned SaveExecReg = getOrNonExecReg(*Lead, *TII);
     197          33 :     for (auto &Op : Lead->operands()) {
     198          12 :       if (Op.isReg())
     199          24 :         RecalcRegs.insert(Op.getReg());
     200             :     }
     201             : 
     202           6 :     LIS->RemoveMachineInstrFromMaps(*Lead);
     203           3 :     Lead->eraseFromParent();
     204           3 :     if (SaveExecReg) {
     205           3 :       LIS->removeInterval(SaveExecReg);
     206             :       LIS->createAndComputeVirtRegInterval(SaveExecReg);
     207             :     }
     208             : 
     209           3 :     Changed = true;
     210             : 
     211             :     // If the only use of saved exec in the removed instruction is S_AND_B64
     212             :     // fold the copy now.
     213           6 :     if (!SaveExec || !SaveExec->isFullCopy())
     214           1 :       continue;
     215             : 
     216           2 :     unsigned SavedExec = SaveExec->getOperand(0).getReg();
     217           2 :     bool SafeToReplace = true;
     218          10 :     for (auto& U : MRI.use_nodbg_instructions(SavedExec)) {
     219           2 :       if (U.getParent() != SaveExec->getParent()) {
     220             :         SafeToReplace = false;
     221             :         break;
     222             :       }
     223             : 
     224             :       DEBUG(dbgs() << "Redundant EXEC COPY: " << *SaveExec << '\n');
     225             :     }
     226             : 
     227           2 :     if (SafeToReplace) {
     228           4 :       LIS->RemoveMachineInstrFromMaps(*SaveExec);
     229           2 :       SaveExec->eraseFromParent();
     230           2 :       MRI.replaceRegWith(SavedExec, AMDGPU::EXEC);
     231           2 :       LIS->removeInterval(SavedExec);
     232             :     }
     233             :   }
     234             : 
     235       14672 :   if (Changed) {
     236        1741 :     for (auto Reg : RecalcRegs) {
     237         442 :       if (TargetRegisterInfo::isVirtualRegister(Reg)) {
     238          98 :         LIS->removeInterval(Reg);
     239          98 :         if (!MRI.reg_empty(Reg))
     240             :           LIS->createAndComputeVirtRegInterval(Reg);
     241             :       } else {
     242        1128 :         for (MCRegUnitIterator U(Reg, TRI); U.isValid(); ++U)
     243         880 :           LIS->removeRegUnit(*U);
     244             :       }
     245             :     }
     246             :   }
     247             : 
     248       14672 :   return Changed;
     249             : }

Generated by: LCOV version 1.13