LCOV - code coverage report
Current view: top level - lib/Target/AMDGPU - SIOptimizeExecMaskingPreRA.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 91 94 96.8 %
Date: 2018-10-20 13:21:21 Functions: 9 10 90.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===-- SIOptimizeExecMaskingPreRA.cpp ------------------------------------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : /// \file
      11             : /// This pass removes redundant S_OR_B64 instructions enabling lanes in
      12             : /// the exec. If two SI_END_CF (lowered as S_OR_B64) come together without any
      13             : /// vector instructions between them we can only keep outer SI_END_CF, given
      14             : /// that CFG is structured and exec bits of the outer end statement are always
      15             : /// not less than exec bit of the inner one.
      16             : ///
      17             : /// This needs to be done before the RA to eliminate saved exec bits registers
      18             : /// but after register coalescer to have no vector registers copies in between
      19             : /// of different end cf statements.
      20             : ///
      21             : //===----------------------------------------------------------------------===//
      22             : 
      23             : #include "AMDGPU.h"
      24             : #include "AMDGPUSubtarget.h"
      25             : #include "SIInstrInfo.h"
      26             : #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
      27             : #include "llvm/CodeGen/LiveIntervals.h"
      28             : #include "llvm/CodeGen/MachineFunctionPass.h"
      29             : 
      30             : using namespace llvm;
      31             : 
      32             : #define DEBUG_TYPE "si-optimize-exec-masking-pre-ra"
      33             : 
      34             : namespace {
      35             : 
      36             : class SIOptimizeExecMaskingPreRA : public MachineFunctionPass {
      37             : public:
      38             :   static char ID;
      39             : 
      40             : public:
      41        1912 :   SIOptimizeExecMaskingPreRA() : MachineFunctionPass(ID) {
      42        1912 :     initializeSIOptimizeExecMaskingPreRAPass(*PassRegistry::getPassRegistry());
      43        1912 :   }
      44             : 
      45             :   bool runOnMachineFunction(MachineFunction &MF) override;
      46             : 
      47        1912 :   StringRef getPassName() const override {
      48        1912 :     return "SI optimize exec mask operations pre-RA";
      49             :   }
      50             : 
      51        1912 :   void getAnalysisUsage(AnalysisUsage &AU) const override {
      52             :     AU.addRequired<LiveIntervals>();
      53             :     AU.setPreservesAll();
      54        1912 :     MachineFunctionPass::getAnalysisUsage(AU);
      55        1912 :   }
      56             : };
      57             : 
      58             : } // End anonymous namespace.
      59             : 
      60       85105 : INITIALIZE_PASS_BEGIN(SIOptimizeExecMaskingPreRA, DEBUG_TYPE,
      61             :                       "SI optimize exec mask operations pre-RA", false, false)
      62       85105 : INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
      63      200936 : INITIALIZE_PASS_END(SIOptimizeExecMaskingPreRA, DEBUG_TYPE,
      64             :                     "SI optimize exec mask operations pre-RA", false, false)
      65             : 
      66             : char SIOptimizeExecMaskingPreRA::ID = 0;
      67             : 
      68             : char &llvm::SIOptimizeExecMaskingPreRAID = SIOptimizeExecMaskingPreRA::ID;
      69             : 
      70           0 : FunctionPass *llvm::createSIOptimizeExecMaskingPreRAPass() {
      71           0 :   return new SIOptimizeExecMaskingPreRA();
      72             : }
      73             : 
      74        1317 : static bool isEndCF(const MachineInstr& MI, const SIRegisterInfo* TRI) {
      75        2743 :   return MI.getOpcode() == AMDGPU::S_OR_B64 &&
      76        1317 :          MI.modifiesRegister(AMDGPU::EXEC, TRI);
      77             : }
      78             : 
      79             : static bool isFullExecCopy(const MachineInstr& MI) {
      80           7 :   return MI.isFullCopy() && MI.getOperand(1).getReg() == AMDGPU::EXEC;
      81             : }
      82             : 
      83          13 : static unsigned getOrNonExecReg(const MachineInstr &MI,
      84             :                                 const SIInstrInfo &TII) {
      85             :   auto Op = TII.getNamedOperand(MI, AMDGPU::OpName::src1);
      86          13 :   if (Op->isReg() && Op->getReg() != AMDGPU::EXEC)
      87             :      return Op->getReg();
      88             :   Op = TII.getNamedOperand(MI, AMDGPU::OpName::src0);
      89           0 :   if (Op->isReg() && Op->getReg() != AMDGPU::EXEC)
      90             :      return Op->getReg();
      91             :   return AMDGPU::NoRegister;
      92             : }
      93             : 
      94           9 : static MachineInstr* getOrExecSource(const MachineInstr &MI,
      95             :                                      const SIInstrInfo &TII,
      96             :                                      const MachineRegisterInfo &MRI) {
      97           9 :   auto SavedExec = getOrNonExecReg(MI, TII);
      98           9 :   if (SavedExec == AMDGPU::NoRegister)
      99             :     return nullptr;
     100           9 :   auto SaveExecInst = MRI.getUniqueVRegDef(SavedExec);
     101           9 :   if (!SaveExecInst || !isFullExecCopy(*SaveExecInst))
     102           2 :     return nullptr;
     103             :   return SaveExecInst;
     104             : }
     105             : 
     106       19561 : bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) {
     107       19561 :   if (skipFunction(MF.getFunction()))
     108             :     return false;
     109             : 
     110       19558 :   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
     111       19558 :   const SIRegisterInfo *TRI = ST.getRegisterInfo();
     112       19558 :   const SIInstrInfo *TII = ST.getInstrInfo();
     113       19558 :   MachineRegisterInfo &MRI = MF.getRegInfo();
     114       19558 :   LiveIntervals *LIS = &getAnalysis<LiveIntervals>();
     115       19558 :   DenseSet<unsigned> RecalcRegs({AMDGPU::EXEC_LO, AMDGPU::EXEC_HI});
     116             :   bool Changed = false;
     117             : 
     118       41575 :   for (MachineBasicBlock &MBB : MF) {
     119             : 
     120             :     // Try to remove unneeded instructions before s_endpgm.
     121       22017 :     if (MBB.succ_empty()) {
     122       19583 :       if (MBB.empty())
     123             :         continue;
     124             : 
     125             :       // Skip this if the endpgm has any implicit uses, otherwise we would need
     126             :       // to be careful to update / remove them.
     127             :       MachineInstr &Term = MBB.back();
     128       39106 :       if (Term.getOpcode() != AMDGPU::S_ENDPGM ||
     129       16669 :           Term.getNumOperands() != 0)
     130             :         continue;
     131             : 
     132       16668 :       SmallVector<MachineBasicBlock*, 4> Blocks({&MBB});
     133             : 
     134       33620 :       while (!Blocks.empty()) {
     135             :         auto CurBB = Blocks.pop_back_val();
     136             :         auto I = CurBB->rbegin(), E = CurBB->rend();
     137       16952 :         if (I != E) {
     138       16941 :           if (I->isUnconditionalBranch() || I->getOpcode() == AMDGPU::S_ENDPGM)
     139             :             ++I;
     140         223 :           else if (I->isBranch())
     141             :             continue;
     142             :         }
     143             : 
     144       17081 :         while (I != E) {
     145             :           if (I->isDebugInstr()) {
     146           1 :             I = std::next(I);
     147             :             continue;
     148             :           }
     149             : 
     150       18994 :           if (I->mayStore() || I->isBarrier() || I->isCall() ||
     151       17364 :               I->hasUnmodeledSideEffects() || I->hasOrderedMemoryRef())
     152             :             break;
     153             : 
     154             :           LLVM_DEBUG(dbgs()
     155             :                      << "Removing no effect instruction: " << *I << '\n');
     156             : 
     157         626 :           for (auto &Op : I->operands()) {
     158         498 :             if (Op.isReg())
     159         471 :               RecalcRegs.insert(Op.getReg());
     160             :           }
     161             : 
     162         128 :           auto Next = std::next(I);
     163         128 :           LIS->RemoveMachineInstrFromMaps(*I);
     164         128 :           I->eraseFromParent();
     165             :           I = Next;
     166             : 
     167             :           Changed = true;
     168             :         }
     169             : 
     170       16952 :         if (I != E)
     171             :           continue;
     172             : 
     173             :         // Try to ascend predecessors.
     174        1312 :         for (auto *Pred : CurBB->predecessors()) {
     175         518 :           if (Pred->succ_size() == 1)
     176         284 :             Blocks.push_back(Pred);
     177             :         }
     178             :       }
     179             :       continue;
     180             :     }
     181             : 
     182             :     // Try to collapse adjacent endifs.
     183             :     auto Lead = MBB.begin(), E = MBB.end();
     184        2434 :     if (MBB.succ_size() != 1 || Lead == E || !isEndCF(*Lead, TRI))
     185        2330 :       continue;
     186             : 
     187         104 :     const MachineBasicBlock* Succ = *MBB.succ_begin();
     188         104 :     if (!MBB.isLayoutSuccessor(Succ))
     189             :       continue;
     190             : 
     191          67 :     auto I = std::next(Lead);
     192             : 
     193         142 :     for ( ; I != E; ++I)
     194         207 :       if (!TII->isSALU(*I) || I->readsRegister(AMDGPU::EXEC, TRI))
     195             :         break;
     196             : 
     197          67 :     if (I != E)
     198             :       continue;
     199             : 
     200             :     const auto NextLead = Succ->begin();
     201          16 :     if (NextLead == Succ->end() || !isEndCF(*NextLead, TRI) ||
     202           5 :         !getOrExecSource(*NextLead, *TII, MRI))
     203           7 :       continue;
     204             : 
     205             :     LLVM_DEBUG(dbgs() << "Redundant EXEC = S_OR_B64 found: " << *Lead << '\n');
     206             : 
     207           4 :     auto SaveExec = getOrExecSource(*Lead, *TII, MRI);
     208           4 :     unsigned SaveExecReg = getOrNonExecReg(*Lead, *TII);
     209          20 :     for (auto &Op : Lead->operands()) {
     210          16 :       if (Op.isReg())
     211          16 :         RecalcRegs.insert(Op.getReg());
     212             :     }
     213             : 
     214           4 :     LIS->RemoveMachineInstrFromMaps(*Lead);
     215           4 :     Lead->eraseFromParent();
     216           4 :     if (SaveExecReg) {
     217           4 :       LIS->removeInterval(SaveExecReg);
     218             :       LIS->createAndComputeVirtRegInterval(SaveExecReg);
     219             :     }
     220             : 
     221             :     Changed = true;
     222             : 
     223             :     // If the only use of saved exec in the removed instruction is S_AND_B64
     224             :     // fold the copy now.
     225           4 :     if (!SaveExec || !SaveExec->isFullCopy())
     226             :       continue;
     227             : 
     228           3 :     unsigned SavedExec = SaveExec->getOperand(0).getReg();
     229             :     bool SafeToReplace = true;
     230           6 :     for (auto& U : MRI.use_nodbg_instructions(SavedExec)) {
     231           3 :       if (U.getParent() != SaveExec->getParent()) {
     232             :         SafeToReplace = false;
     233             :         break;
     234             :       }
     235             : 
     236             :       LLVM_DEBUG(dbgs() << "Redundant EXEC COPY: " << *SaveExec << '\n');
     237             :     }
     238             : 
     239           3 :     if (SafeToReplace) {
     240           3 :       LIS->RemoveMachineInstrFromMaps(*SaveExec);
     241           3 :       SaveExec->eraseFromParent();
     242           3 :       MRI.replaceRegWith(SavedExec, AMDGPU::EXEC);
     243           3 :       LIS->removeInterval(SavedExec);
     244             :     }
     245             :   }
     246             : 
     247       19558 :   if (Changed) {
     248         670 :     for (auto Reg : RecalcRegs) {
     249         563 :       if (TargetRegisterInfo::isVirtualRegister(Reg)) {
     250         122 :         LIS->removeInterval(Reg);
     251         122 :         if (!MRI.reg_empty(Reg))
     252             :           LIS->createAndComputeVirtRegInterval(Reg);
     253             :       } else {
     254        1443 :         for (MCRegUnitIterator U(Reg, TRI); U.isValid(); ++U)
     255         561 :           LIS->removeRegUnit(*U);
     256             :       }
     257             :     }
     258             :   }
     259             : 
     260             :   return Changed;
     261             : }

Generated by: LCOV version 1.13