LCOV - code coverage report
Current view: top level - lib/Target/AMDGPU - SIOptimizeExecMaskingPreRA.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 94 97 96.9 %
Date: 2018-07-13 00:08:38 Functions: 10 12 83.3 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===-- SIOptimizeExecMaskingPreRA.cpp ------------------------------------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : /// \file
      11             : /// This pass removes redundant S_OR_B64 instructions enabling lanes in
      12             : /// the exec. If two SI_END_CF (lowered as S_OR_B64) come together without any
      13             : /// vector instructions between them we can only keep outer SI_END_CF, given
      14             : /// that CFG is structured and exec bits of the outer end statement are always
      15             : /// not less than exec bit of the inner one.
      16             : ///
      17             : /// This needs to be done before the RA to eliminate saved exec bits registers
      18             : /// but after register coalescer to have no vector registers copies in between
      19             : /// of different end cf statements.
      20             : ///
      21             : //===----------------------------------------------------------------------===//
      22             : 
      23             : #include "AMDGPU.h"
      24             : #include "AMDGPUSubtarget.h"
      25             : #include "SIInstrInfo.h"
      26             : #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
      27             : #include "llvm/CodeGen/LiveIntervals.h"
      28             : #include "llvm/CodeGen/MachineFunctionPass.h"
      29             : 
      30             : using namespace llvm;
      31             : 
      32             : #define DEBUG_TYPE "si-optimize-exec-masking-pre-ra"
      33             : 
      34             : namespace {
      35             : 
      36        1736 : class SIOptimizeExecMaskingPreRA : public MachineFunctionPass {
      37             : public:
      38             :   static char ID;
      39             : 
      40             : public:
      41        1744 :   SIOptimizeExecMaskingPreRA() : MachineFunctionPass(ID) {
      42        1744 :     initializeSIOptimizeExecMaskingPreRAPass(*PassRegistry::getPassRegistry());
      43        1744 :   }
      44             : 
      45             :   bool runOnMachineFunction(MachineFunction &MF) override;
      46             : 
      47        1744 :   StringRef getPassName() const override {
      48        1744 :     return "SI optimize exec mask operations pre-RA";
      49             :   }
      50             : 
      51        1744 :   void getAnalysisUsage(AnalysisUsage &AU) const override {
      52             :     AU.addRequired<LiveIntervals>();
      53             :     AU.setPreservesAll();
      54        1744 :     MachineFunctionPass::getAnalysisUsage(AU);
      55        1744 :   }
      56             : };
      57             : 
      58             : } // End anonymous namespace.
      59             : 
      60       73254 : INITIALIZE_PASS_BEGIN(SIOptimizeExecMaskingPreRA, DEBUG_TYPE,
      61             :                       "SI optimize exec mask operations pre-RA", false, false)
      62       73254 : INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
      63      346058 : INITIALIZE_PASS_END(SIOptimizeExecMaskingPreRA, DEBUG_TYPE,
      64             :                     "SI optimize exec mask operations pre-RA", false, false)
      65             : 
      66             : char SIOptimizeExecMaskingPreRA::ID = 0;
      67             : 
      68             : char &llvm::SIOptimizeExecMaskingPreRAID = SIOptimizeExecMaskingPreRA::ID;
      69             : 
      70           0 : FunctionPass *llvm::createSIOptimizeExecMaskingPreRAPass() {
      71           0 :   return new SIOptimizeExecMaskingPreRA();
      72             : }
      73             : 
      74        1180 : static bool isEndCF(const MachineInstr& MI, const SIRegisterInfo* TRI) {
      75        2468 :   return MI.getOpcode() == AMDGPU::S_OR_B64 &&
      76        1180 :          MI.modifiesRegister(AMDGPU::EXEC, TRI);
      77             : }
      78             : 
      79             : static bool isFullExecCopy(const MachineInstr& MI) {
      80           7 :   return MI.isFullCopy() && MI.getOperand(1).getReg() == AMDGPU::EXEC;
      81             : }
      82             : 
      83          13 : static unsigned getOrNonExecReg(const MachineInstr &MI,
      84             :                                 const SIInstrInfo &TII) {
      85             :   auto Op = TII.getNamedOperand(MI, AMDGPU::OpName::src1);
      86          13 :   if (Op->isReg() && Op->getReg() != AMDGPU::EXEC)
      87             :      return Op->getReg();
      88             :   Op = TII.getNamedOperand(MI, AMDGPU::OpName::src0);
      89           0 :   if (Op->isReg() && Op->getReg() != AMDGPU::EXEC)
      90             :      return Op->getReg();
      91             :   return AMDGPU::NoRegister;
      92             : }
      93             : 
      94           9 : static MachineInstr* getOrExecSource(const MachineInstr &MI,
      95             :                                      const SIInstrInfo &TII,
      96             :                                      const MachineRegisterInfo &MRI) {
      97           9 :   auto SavedExec = getOrNonExecReg(MI, TII);
      98           9 :   if (SavedExec == AMDGPU::NoRegister)
      99             :     return nullptr;
     100           9 :   auto SaveExecInst = MRI.getUniqueVRegDef(SavedExec);
     101           9 :   if (!SaveExecInst || !isFullExecCopy(*SaveExecInst))
     102             :     return nullptr;
     103             :   return SaveExecInst;
     104             : }
     105             : 
     106       17710 : bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) {
     107       17710 :   if (skipFunction(MF.getFunction()))
     108             :     return false;
     109             : 
     110       17708 :   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
     111             :   const SIRegisterInfo *TRI = ST.getRegisterInfo();
     112             :   const SIInstrInfo *TII = ST.getInstrInfo();
     113       17708 :   MachineRegisterInfo &MRI = MF.getRegInfo();
     114       17708 :   LiveIntervals *LIS = &getAnalysis<LiveIntervals>();
     115             :   DenseSet<unsigned> RecalcRegs({AMDGPU::EXEC_LO, AMDGPU::EXEC_HI});
     116             :   bool Changed = false;
     117             : 
     118       37602 :   for (MachineBasicBlock &MBB : MF) {
     119             : 
     120             :     // Try to remove unneeded instructions before s_endpgm.
     121       19894 :     if (MBB.succ_empty()) {
     122       53139 :       if (MBB.empty() || MBB.back().getOpcode() != AMDGPU::S_ENDPGM)
     123        2250 :         continue;
     124             : 
     125       30966 :       SmallVector<MachineBasicBlock*, 4> Blocks({&MBB});
     126             : 
     127       31240 :       while (!Blocks.empty()) {
     128             :         auto CurBB = Blocks.pop_back_val();
     129       15757 :         auto I = CurBB->rbegin(), E = CurBB->rend();
     130       15757 :         if (I != E) {
     131       47136 :           if (I->isUnconditionalBranch() || I->getOpcode() == AMDGPU::S_ENDPGM)
     132             :             ++I;
     133         212 :           else if (I->isBranch())
     134       15020 :             continue;
     135             :         }
     136             : 
     137       15878 :         while (I != E) {
     138           1 :           if (I->isDebugInstr()) {
     139           1 :             I = std::next(I);
     140             :             continue;
     141             :           }
     142             : 
     143       17593 :           if (I->mayStore() || I->isBarrier() || I->isCall() ||
     144       16125 :               I->hasUnmodeledSideEffects() || I->hasOrderedMemoryRef())
     145             :             break;
     146             : 
     147             :           LLVM_DEBUG(dbgs()
     148             :                      << "Removing no effect instruction: " << *I << '\n');
     149             : 
     150        1048 :           for (auto &Op : I->operands()) {
     151         464 :             if (Op.isReg())
     152         870 :               RecalcRegs.insert(Op.getReg());
     153             :           }
     154             : 
     155             :           auto Next = std::next(I);
     156         120 :           LIS->RemoveMachineInstrFromMaps(*I);
     157         120 :           I->eraseFromParent();
     158         120 :           I = Next;
     159             : 
     160             :           Changed = true;
     161             :         }
     162             : 
     163       15757 :         if (I != E)
     164       15020 :           continue;
     165             : 
     166             :         // Try to ascend predecessors.
     167        1235 :         for (auto *Pred : CurBB->predecessors()) {
     168         498 :           if (Pred->succ_size() == 1)
     169         274 :             Blocks.push_back(Pred);
     170             :         }
     171             :       }
     172             :       continue;
     173             :     }
     174             : 
     175             :     // Try to collapse adjacent endifs.
     176             :     auto Lead = MBB.begin(), E = MBB.end();
     177        4219 :     if (MBB.succ_size() != 1 || Lead == E || !isEndCF(*Lead, TRI))
     178        2058 :       continue;
     179             : 
     180         103 :     const MachineBasicBlock* Succ = *MBB.succ_begin();
     181         140 :     if (!MBB.isLayoutSuccessor(Succ))
     182          37 :       continue;
     183             : 
     184          66 :     auto I = std::next(Lead);
     185             : 
     186         195 :     for ( ; I != E; ++I)
     187         314 :       if (!TII->isSALU(*I) || I->readsRegister(AMDGPU::EXEC, TRI))
     188             :         break;
     189             : 
     190          66 :     if (I != E)
     191          55 :       continue;
     192             : 
     193             :     const auto NextLead = Succ->begin();
     194          23 :     if (NextLead == Succ->end() || !isEndCF(*NextLead, TRI) ||
     195           5 :         !getOrExecSource(*NextLead, *TII, MRI))
     196           7 :       continue;
     197             : 
     198             :     LLVM_DEBUG(dbgs() << "Redundant EXEC = S_OR_B64 found: " << *Lead << '\n');
     199             : 
     200           4 :     auto SaveExec = getOrExecSource(*Lead, *TII, MRI);
     201           4 :     unsigned SaveExecReg = getOrNonExecReg(*Lead, *TII);
     202          36 :     for (auto &Op : Lead->operands()) {
     203          16 :       if (Op.isReg())
     204          32 :         RecalcRegs.insert(Op.getReg());
     205             :     }
     206             : 
     207           4 :     LIS->RemoveMachineInstrFromMaps(*Lead);
     208           4 :     Lead->eraseFromParent();
     209           4 :     if (SaveExecReg) {
     210           4 :       LIS->removeInterval(SaveExecReg);
     211             :       LIS->createAndComputeVirtRegInterval(SaveExecReg);
     212             :     }
     213             : 
     214             :     Changed = true;
     215             : 
     216             :     // If the only use of saved exec in the removed instruction is S_AND_B64
     217             :     // fold the copy now.
     218           5 :     if (!SaveExec || !SaveExec->isFullCopy())
     219           1 :       continue;
     220             : 
     221           3 :     unsigned SavedExec = SaveExec->getOperand(0).getReg();
     222             :     bool SafeToReplace = true;
     223          12 :     for (auto& U : MRI.use_nodbg_instructions(SavedExec)) {
     224           3 :       if (U.getParent() != SaveExec->getParent()) {
     225             :         SafeToReplace = false;
     226             :         break;
     227             :       }
     228             : 
     229             :       LLVM_DEBUG(dbgs() << "Redundant EXEC COPY: " << *SaveExec << '\n');
     230             :     }
     231             : 
     232           3 :     if (SafeToReplace) {
     233           3 :       LIS->RemoveMachineInstrFromMaps(*SaveExec);
     234           3 :       SaveExec->eraseFromParent();
     235           3 :       MRI.replaceRegWith(SavedExec, AMDGPU::EXEC);
     236           3 :       LIS->removeInterval(SavedExec);
     237             :     }
     238             :   }
     239             : 
     240       17708 :   if (Changed) {
     241         616 :     for (auto Reg : RecalcRegs) {
     242         518 :       if (TargetRegisterInfo::isVirtualRegister(Reg)) {
     243         114 :         LIS->removeInterval(Reg);
     244         114 :         if (!MRI.reg_empty(Reg))
     245             :           LIS->createAndComputeVirtRegInterval(Reg);
     246             :       } else {
     247        1323 :         for (MCRegUnitIterator U(Reg, TRI); U.isValid(); ++U)
     248         515 :           LIS->removeRegUnit(*U);
     249             :       }
     250             :     }
     251             :   }
     252             : 
     253             :   return Changed;
     254             : }

Generated by: LCOV version 1.13