LCOV - code coverage report
Current view: top level - lib/Target/AMDGPU - SILowerControlFlow.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 203 234 86.8 %
Date: 2017-09-14 15:23:50 Functions: 14 15 93.3 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===-- SILowerControlFlow.cpp - Use predicates for control flow ----------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : /// \file
      11             : /// \brief This pass lowers the pseudo control flow instructions to real
      12             : /// machine instructions.
      13             : ///
      14             : /// All control flow is handled using predicated instructions and
      15             : /// a predicate stack.  Each Scalar ALU controls the operations of 64 Vector
      16             : /// ALUs.  The Scalar ALU can update the predicate for any of the Vector ALUs
      17             : /// by writting to the 64-bit EXEC register (each bit corresponds to a
      18             : /// single vector ALU).  Typically, for predicates, a vector ALU will write
      19             : /// to its bit of the VCC register (like EXEC VCC is 64-bits, one for each
      20             : /// Vector ALU) and then the ScalarALU will AND the VCC register with the
      21             : /// EXEC to update the predicates.
      22             : ///
      23             : /// For example:
      24             : /// %VCC = V_CMP_GT_F32 %VGPR1, %VGPR2
      25             : /// %SGPR0 = SI_IF %VCC
      26             : ///   %VGPR0 = V_ADD_F32 %VGPR0, %VGPR0
      27             : /// %SGPR0 = SI_ELSE %SGPR0
      28             : ///   %VGPR0 = V_SUB_F32 %VGPR0, %VGPR0
      29             : /// SI_END_CF %SGPR0
      30             : ///
      31             : /// becomes:
      32             : ///
      33             : /// %SGPR0 = S_AND_SAVEEXEC_B64 %VCC  // Save and update the exec mask
      34             : /// %SGPR0 = S_XOR_B64 %SGPR0, %EXEC  // Clear live bits from saved exec mask
      35             : /// S_CBRANCH_EXECZ label0            // This instruction is an optional
      36             : ///                                   // optimization which allows us to
      37             : ///                                   // branch if all the bits of
      38             : ///                                   // EXEC are zero.
      39             : /// %VGPR0 = V_ADD_F32 %VGPR0, %VGPR0 // Do the IF block of the branch
      40             : ///
      41             : /// label0:
      42             : /// %SGPR0 = S_OR_SAVEEXEC_B64 %EXEC   // Restore the exec mask for the Then block
      43             : /// %EXEC = S_XOR_B64 %SGPR0, %EXEC    // Clear live bits from saved exec mask
      44             : /// S_BRANCH_EXECZ label1              // Use our branch optimization
      45             : ///                                    // instruction again.
      46             : /// %VGPR0 = V_SUB_F32 %VGPR0, %VGPR   // Do the THEN block
      47             : /// label1:
      48             : /// %EXEC = S_OR_B64 %EXEC, %SGPR0     // Re-enable saved exec mask bits
      49             : //===----------------------------------------------------------------------===//
      50             : 
      51             : #include "AMDGPU.h"
      52             : #include "AMDGPUSubtarget.h"
      53             : #include "SIInstrInfo.h"
      54             : #include "llvm/ADT/SmallVector.h"
      55             : #include "llvm/ADT/StringRef.h"
      56             : #include "llvm/CodeGen/LiveIntervalAnalysis.h"
      57             : #include "llvm/CodeGen/MachineBasicBlock.h"
      58             : #include "llvm/CodeGen/MachineFunction.h"
      59             : #include "llvm/CodeGen/MachineFunctionPass.h"
      60             : #include "llvm/CodeGen/MachineInstr.h"
      61             : #include "llvm/CodeGen/MachineInstrBuilder.h"
      62             : #include "llvm/CodeGen/MachineOperand.h"
      63             : #include "llvm/CodeGen/MachineRegisterInfo.h"
      64             : #include "llvm/CodeGen/Passes.h"
      65             : #include "llvm/CodeGen/SlotIndexes.h"
      66             : #include "llvm/MC/MCRegisterInfo.h"
      67             : #include "llvm/Pass.h"
      68             : #include "llvm/Target/TargetRegisterInfo.h"
      69             : #include <cassert>
      70             : #include <iterator>
      71             : 
      72             : using namespace llvm;
      73             : 
      74             : #define DEBUG_TYPE "si-lower-control-flow"
      75             : 
      76             : namespace {
      77             : 
      78        1454 : class SILowerControlFlow : public MachineFunctionPass {
      79             : private:
      80             :   const SIRegisterInfo *TRI = nullptr;
      81             :   const SIInstrInfo *TII = nullptr;
      82             :   LiveIntervals *LIS = nullptr;
      83             :   MachineRegisterInfo *MRI = nullptr;
      84             : 
      85             :   void emitIf(MachineInstr &MI);
      86             :   void emitElse(MachineInstr &MI);
      87             :   void emitBreak(MachineInstr &MI);
      88             :   void emitIfBreak(MachineInstr &MI);
      89             :   void emitElseBreak(MachineInstr &MI);
      90             :   void emitLoop(MachineInstr &MI);
      91             :   void emitEndCf(MachineInstr &MI);
      92             : 
      93             :   void findMaskOperands(MachineInstr &MI, unsigned OpNo,
      94             :                         SmallVectorImpl<MachineOperand> &Src) const;
      95             : 
      96             :   void combineMasks(MachineInstr &MI);
      97             : 
      98             : public:
      99             :   static char ID;
     100             : 
     101        1462 :   SILowerControlFlow() : MachineFunctionPass(ID) {}
     102             : 
     103             :   bool runOnMachineFunction(MachineFunction &MF) override;
     104             : 
     105        1462 :   StringRef getPassName() const override {
     106        1462 :     return "SI Lower control flow pseudo instructions";
     107             :   }
     108             : 
     109        1462 :   void getAnalysisUsage(AnalysisUsage &AU) const override {
     110             :     // Should preserve the same set that TwoAddressInstructions does.
     111        1462 :     AU.addPreserved<SlotIndexes>();
     112        1462 :     AU.addPreserved<LiveIntervals>();
     113        2924 :     AU.addPreservedID(LiveVariablesID);
     114        2924 :     AU.addPreservedID(MachineLoopInfoID);
     115        2924 :     AU.addPreservedID(MachineDominatorsID);
     116        1462 :     AU.setPreservesCFG();
     117        1462 :     MachineFunctionPass::getAnalysisUsage(AU);
     118        1462 :   }
     119             : };
     120             : 
     121             : } // end anonymous namespace
     122             : 
     123             : char SILowerControlFlow::ID = 0;
     124             : 
     125      312538 : INITIALIZE_PASS(SILowerControlFlow, DEBUG_TYPE,
     126             :                "SI lower control flow", false, false)
     127             : 
     128             : static void setImpSCCDefDead(MachineInstr &MI, bool IsDead) {
     129         406 :   MachineOperand &ImpDefSCC = MI.getOperand(3);
     130             :   assert(ImpDefSCC.getReg() == AMDGPU::SCC && ImpDefSCC.isDef());
     131             : 
     132         812 :   ImpDefSCC.setIsDead(IsDead);
     133             : }
     134             : 
     135             : char &llvm::SILowerControlFlowID = SILowerControlFlow::ID;
     136             : 
     137         323 : static bool isSimpleIf(const MachineInstr &MI, const MachineRegisterInfo *MRI) {
     138         323 :   unsigned SaveExecReg = MI.getOperand(0).getReg();
     139         323 :   auto U = MRI->use_instr_nodbg_begin(SaveExecReg);
     140             : 
     141         643 :   if (U == MRI->use_instr_nodbg_end() ||
     142        1265 :       std::next(U) != MRI->use_instr_nodbg_end() ||
     143         302 :       U->getOpcode() != AMDGPU::SI_END_CF)
     144             :     return false;
     145             : 
     146             :   // Check for SI_KILL_TERMINATOR on path from if to endif.
     147             :   // if there is any such terminator simplififcations are not safe.
     148         245 :   auto SMBB = MI.getParent();
     149         245 :   auto EMBB = U->getParent();
     150         245 :   DenseSet<const MachineBasicBlock*> Visited;
     151             :   SmallVector<MachineBasicBlock*, 4> Worklist(SMBB->succ_begin(),
     152         735 :                                               SMBB->succ_end());
     153             : 
     154        1046 :   while (!Worklist.empty()) {
     155         806 :     MachineBasicBlock *MBB = Worklist.pop_back_val();
     156             : 
     157        1638 :     if (MBB == EMBB || !Visited.insert(MBB).second)
     158         513 :       continue;
     159         805 :     for(auto &Term : MBB->terminators())
     160         112 :       if (Term.getOpcode() == AMDGPU::SI_KILL_TERMINATOR)
     161           5 :         return false;
     162             : 
     163         576 :     Worklist.append(MBB->succ_begin(), MBB->succ_end());
     164             :   }
     165             : 
     166             :   return true;
     167             : }
     168             : 
     169         323 : void SILowerControlFlow::emitIf(MachineInstr &MI) {
     170         323 :   MachineBasicBlock &MBB = *MI.getParent();
     171         323 :   const DebugLoc &DL = MI.getDebugLoc();
     172         323 :   MachineBasicBlock::iterator I(&MI);
     173             : 
     174         323 :   MachineOperand &SaveExec = MI.getOperand(0);
     175         323 :   MachineOperand &Cond = MI.getOperand(1);
     176             :   assert(SaveExec.getSubReg() == AMDGPU::NoSubRegister &&
     177             :          Cond.getSubReg() == AMDGPU::NoSubRegister);
     178             : 
     179         323 :   unsigned SaveExecReg = SaveExec.getReg();
     180             : 
     181         323 :   MachineOperand &ImpDefSCC = MI.getOperand(4);
     182             :   assert(ImpDefSCC.getReg() == AMDGPU::SCC && ImpDefSCC.isDef());
     183             : 
     184             :   // If there is only one use of save exec register and that use is SI_END_CF,
     185             :   // we can optimize SI_IF by returning the full saved exec mask instead of
     186             :   // just cleared bits.
     187         323 :   bool SimpleIf = isSimpleIf(MI, MRI);
     188             : 
     189             :   // Add an implicit def of exec to discourage scheduling VALU after this which
     190             :   // will interfere with trying to form s_and_saveexec_b64 later.
     191         406 :   unsigned CopyReg = SimpleIf ? SaveExecReg
     192         406 :                        : MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
     193             :   MachineInstr *CopyExec =
     194         969 :     BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), CopyReg)
     195         323 :     .addReg(AMDGPU::EXEC)
     196         323 :     .addReg(AMDGPU::EXEC, RegState::ImplicitDefine);
     197             : 
     198         323 :   unsigned Tmp = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
     199             : 
     200             :   MachineInstr *And =
     201         969 :     BuildMI(MBB, I, DL, TII->get(AMDGPU::S_AND_B64), Tmp)
     202         323 :     .addReg(CopyReg)
     203             :     //.addReg(AMDGPU::EXEC)
     204         323 :     .addReg(Cond.getReg());
     205         323 :   setImpSCCDefDead(*And, true);
     206             : 
     207         323 :   MachineInstr *Xor = nullptr;
     208         323 :   if (!SimpleIf) {
     209          83 :     Xor =
     210         249 :       BuildMI(MBB, I, DL, TII->get(AMDGPU::S_XOR_B64), SaveExecReg)
     211          83 :       .addReg(Tmp)
     212          83 :       .addReg(CopyReg);
     213          83 :     setImpSCCDefDead(*Xor, ImpDefSCC.isDead());
     214             :   }
     215             : 
     216             :   // Use a copy that is a terminator to get correct spill code placement it with
     217             :   // fast regalloc.
     218             :   MachineInstr *SetExec =
     219         969 :     BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B64_term), AMDGPU::EXEC)
     220         323 :     .addReg(Tmp, RegState::Kill);
     221             : 
     222             :   // Insert a pseudo terminator to help keep the verifier happy. This will also
     223             :   // be used later when inserting skips.
     224         969 :   MachineInstr *NewBr = BuildMI(MBB, I, DL, TII->get(AMDGPU::SI_MASK_BRANCH))
     225         969 :                             .add(MI.getOperand(2));
     226             : 
     227         323 :   if (!LIS) {
     228         323 :     MI.eraseFromParent();
     229         323 :     return;
     230             :   }
     231             : 
     232           0 :   LIS->InsertMachineInstrInMaps(*CopyExec);
     233             : 
     234             :   // Replace with and so we don't need to fix the live interval for condition
     235             :   // register.
     236           0 :   LIS->ReplaceMachineInstrInMaps(MI, *And);
     237             : 
     238           0 :   if (!SimpleIf)
     239           0 :     LIS->InsertMachineInstrInMaps(*Xor);
     240           0 :   LIS->InsertMachineInstrInMaps(*SetExec);
     241           0 :   LIS->InsertMachineInstrInMaps(*NewBr);
     242             : 
     243           0 :   LIS->removeRegUnit(*MCRegUnitIterator(AMDGPU::EXEC, TRI));
     244           0 :   MI.eraseFromParent();
     245             : 
     246             :   // FIXME: Is there a better way of adjusting the liveness? It shouldn't be
     247             :   // hard to add another def here but I'm not sure how to correctly update the
     248             :   // valno.
     249           0 :   LIS->removeInterval(SaveExecReg);
     250           0 :   LIS->createAndComputeVirtRegInterval(SaveExecReg);
     251           0 :   LIS->createAndComputeVirtRegInterval(Tmp);
     252           0 :   if (!SimpleIf)
     253           0 :     LIS->createAndComputeVirtRegInterval(CopyReg);
     254             : }
     255             : 
     256          45 : void SILowerControlFlow::emitElse(MachineInstr &MI) {
     257          45 :   MachineBasicBlock &MBB = *MI.getParent();
     258          45 :   const DebugLoc &DL = MI.getDebugLoc();
     259             : 
     260          45 :   unsigned DstReg = MI.getOperand(0).getReg();
     261             :   assert(MI.getOperand(0).getSubReg() == AMDGPU::NoSubRegister);
     262             : 
     263          45 :   bool ExecModified = MI.getOperand(3).getImm() != 0;
     264          45 :   MachineBasicBlock::iterator Start = MBB.begin();
     265             : 
     266             :   // We are running before TwoAddressInstructions, and si_else's operands are
     267             :   // tied. In order to correctly tie the registers, split this into a copy of
     268             :   // the src like it does.
     269          45 :   unsigned CopyReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
     270             :   MachineInstr *CopyExec =
     271         135 :     BuildMI(MBB, Start, DL, TII->get(AMDGPU::COPY), CopyReg)
     272         135 :       .add(MI.getOperand(1)); // Saved EXEC
     273             : 
     274             :   // This must be inserted before phis and any spill code inserted before the
     275             :   // else.
     276          51 :   unsigned SaveReg = ExecModified ?
     277          51 :     MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass) : DstReg;
     278             :   MachineInstr *OrSaveExec =
     279         135 :     BuildMI(MBB, Start, DL, TII->get(AMDGPU::S_OR_SAVEEXEC_B64), SaveReg)
     280          45 :     .addReg(CopyReg);
     281             : 
     282          45 :   MachineBasicBlock *DestBB = MI.getOperand(2).getMBB();
     283             : 
     284          45 :   MachineBasicBlock::iterator ElsePt(MI);
     285             : 
     286          45 :   if (ExecModified) {
     287             :     MachineInstr *And =
     288          18 :       BuildMI(MBB, ElsePt, DL, TII->get(AMDGPU::S_AND_B64), DstReg)
     289           6 :       .addReg(AMDGPU::EXEC)
     290           6 :       .addReg(SaveReg);
     291             : 
     292           6 :     if (LIS)
     293           0 :       LIS->InsertMachineInstrInMaps(*And);
     294             :   }
     295             : 
     296             :   MachineInstr *Xor =
     297         135 :     BuildMI(MBB, ElsePt, DL, TII->get(AMDGPU::S_XOR_B64_term), AMDGPU::EXEC)
     298          45 :     .addReg(AMDGPU::EXEC)
     299          45 :     .addReg(DstReg);
     300             : 
     301             :   MachineInstr *Branch =
     302         135 :     BuildMI(MBB, ElsePt, DL, TII->get(AMDGPU::SI_MASK_BRANCH))
     303          45 :     .addMBB(DestBB);
     304             : 
     305          45 :   if (!LIS) {
     306          45 :     MI.eraseFromParent();
     307          45 :     return;
     308             :   }
     309             : 
     310           0 :   LIS->RemoveMachineInstrFromMaps(MI);
     311           0 :   MI.eraseFromParent();
     312             : 
     313           0 :   LIS->InsertMachineInstrInMaps(*CopyExec);
     314           0 :   LIS->InsertMachineInstrInMaps(*OrSaveExec);
     315             : 
     316           0 :   LIS->InsertMachineInstrInMaps(*Xor);
     317           0 :   LIS->InsertMachineInstrInMaps(*Branch);
     318             : 
     319             :   // src reg is tied to dst reg.
     320           0 :   LIS->removeInterval(DstReg);
     321           0 :   LIS->createAndComputeVirtRegInterval(DstReg);
     322           0 :   LIS->createAndComputeVirtRegInterval(CopyReg);
     323           0 :   if (ExecModified)
     324           0 :     LIS->createAndComputeVirtRegInterval(SaveReg);
     325             : 
     326             :   // Let this be recomputed.
     327           0 :   LIS->removeRegUnit(*MCRegUnitIterator(AMDGPU::EXEC, TRI));
     328             : }
     329             : 
     330           4 : void SILowerControlFlow::emitBreak(MachineInstr &MI) {
     331           4 :   MachineBasicBlock &MBB = *MI.getParent();
     332           4 :   const DebugLoc &DL = MI.getDebugLoc();
     333           4 :   unsigned Dst = MI.getOperand(0).getReg();
     334             : 
     335          16 :   MachineInstr *Or = BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_OR_B64), Dst)
     336           4 :                          .addReg(AMDGPU::EXEC)
     337          12 :                          .add(MI.getOperand(1));
     338             : 
     339           4 :   if (LIS)
     340           0 :     LIS->ReplaceMachineInstrInMaps(MI, *Or);
     341           4 :   MI.eraseFromParent();
     342           4 : }
     343             : 
     344             : void SILowerControlFlow::emitIfBreak(MachineInstr &MI) {
     345          93 :   MI.setDesc(TII->get(AMDGPU::S_OR_B64));
     346             : }
     347             : 
     348             : void SILowerControlFlow::emitElseBreak(MachineInstr &MI) {
     349          57 :   MI.setDesc(TII->get(AMDGPU::S_OR_B64));
     350             : }
     351             : 
     352          44 : void SILowerControlFlow::emitLoop(MachineInstr &MI) {
     353          44 :   MachineBasicBlock &MBB = *MI.getParent();
     354          44 :   const DebugLoc &DL = MI.getDebugLoc();
     355             : 
     356             :   MachineInstr *AndN2 =
     357         176 :       BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_ANDN2_B64_term), AMDGPU::EXEC)
     358          44 :           .addReg(AMDGPU::EXEC)
     359          88 :           .add(MI.getOperand(0));
     360             : 
     361             :   MachineInstr *Branch =
     362         132 :       BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
     363         132 :           .add(MI.getOperand(1));
     364             : 
     365          44 :   if (LIS) {
     366           0 :     LIS->ReplaceMachineInstrInMaps(MI, *AndN2);
     367           0 :     LIS->InsertMachineInstrInMaps(*Branch);
     368             :   }
     369             : 
     370          44 :   MI.eraseFromParent();
     371          44 : }
     372             : 
     373         364 : void SILowerControlFlow::emitEndCf(MachineInstr &MI) {
     374         364 :   MachineBasicBlock &MBB = *MI.getParent();
     375         364 :   const DebugLoc &DL = MI.getDebugLoc();
     376             : 
     377         364 :   MachineBasicBlock::iterator InsPt = MBB.begin();
     378             :   MachineInstr *NewMI =
     379        1092 :       BuildMI(MBB, InsPt, DL, TII->get(AMDGPU::S_OR_B64), AMDGPU::EXEC)
     380         364 :           .addReg(AMDGPU::EXEC)
     381         728 :           .add(MI.getOperand(0));
     382             : 
     383         364 :   if (LIS)
     384           0 :     LIS->ReplaceMachineInstrInMaps(MI, *NewMI);
     385             : 
     386         364 :   MI.eraseFromParent();
     387             : 
     388         364 :   if (LIS)
     389           0 :     LIS->handleMove(*NewMI);
     390         364 : }
     391             : 
     392             : // Returns replace operands for a logical operation, either single result
     393             : // for exec or two operands if source was another equivalent operation.
     394        2520 : void SILowerControlFlow::findMaskOperands(MachineInstr &MI, unsigned OpNo,
     395             :        SmallVectorImpl<MachineOperand> &Src) const {
     396        5040 :   MachineOperand &Op = MI.getOperand(OpNo);
     397        5040 :   if (!Op.isReg() || !TargetRegisterInfo::isVirtualRegister(Op.getReg())) {
     398         633 :     Src.push_back(Op);
     399             :     return;
     400             :   }
     401             : 
     402        1887 :   MachineInstr *Def = MRI->getUniqueVRegDef(Op.getReg());
     403        1887 :   if (!Def || Def->getParent() != MI.getParent() ||
     404        2958 :       !(Def->isFullCopy() || (Def->getOpcode() == MI.getOpcode())))
     405             :     return;
     406             : 
     407             :   // Make sure we do not modify exec between def and use.
     408             :   // A copy with implcitly defined exec inserted earlier is an exclusion, it
     409             :   // does not really modify exec.
     410        2444 :   for (auto I = Def->getIterator(); I != MI.getIterator(); ++I)
     411        2323 :     if (I->modifiesRegister(AMDGPU::EXEC, TRI) &&
     412        1112 :         !(I->isCopy() && I->getOperand(0).getReg() != AMDGPU::EXEC))
     413             :       return;
     414             : 
     415        1429 :   for (const auto &SrcOp : Def->explicit_operands())
     416        2027 :     if (SrcOp.isUse() && (!SrcOp.isReg() ||
     417        1378 :         TargetRegisterInfo::isVirtualRegister(SrcOp.getReg()) ||
     418         328 :         SrcOp.getReg() == AMDGPU::EXEC))
     419         525 :       Src.push_back(SrcOp);
     420             : }
     421             : 
     422             : // Search and combine pairs of equivalent instructions, like
     423             : // S_AND_B64 x, (S_AND_B64 x, y) => S_AND_B64 x, y
     424             : // S_OR_B64  x, (S_OR_B64  x, y) => S_OR_B64  x, y
     425             : // One of the operands is exec mask.
     426        1260 : void SILowerControlFlow::combineMasks(MachineInstr &MI) {
     427             :   assert(MI.getNumExplicitOperands() == 3);
     428        1263 :   SmallVector<MachineOperand, 4> Ops;
     429        1260 :   unsigned OpToReplace = 1;
     430        1260 :   findMaskOperands(MI, 1, Ops);
     431        1260 :   if (Ops.size() == 1) OpToReplace = 2; // First operand can be exec or its copy
     432        1260 :   findMaskOperands(MI, 2, Ops);
     433        2517 :   if (Ops.size() != 3) return;
     434             : 
     435             :   unsigned UniqueOpndIdx;
     436          16 :   if (Ops[0].isIdenticalTo(Ops[1])) UniqueOpndIdx = 2;
     437          10 :   else if (Ops[0].isIdenticalTo(Ops[2])) UniqueOpndIdx = 1;
     438          10 :   else if (Ops[1].isIdenticalTo(Ops[2])) UniqueOpndIdx = 1;
     439             :   else return;
     440             : 
     441           6 :   unsigned Reg = MI.getOperand(OpToReplace).getReg();
     442           3 :   MI.RemoveOperand(OpToReplace);
     443           6 :   MI.addOperand(Ops[UniqueOpndIdx]);
     444           6 :   if (MRI->use_empty(Reg))
     445           3 :     MRI->getUniqueVRegDef(Reg)->eraseFromParent();
     446             : }
     447             : 
     448       14833 : bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
     449       14833 :   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
     450       14833 :   TII = ST.getInstrInfo();
     451       29666 :   TRI = &TII->getRegisterInfo();
     452             : 
     453             :   // This doesn't actually need LiveIntervals, but we can preserve them.
     454       14833 :   LIS = getAnalysisIfAvailable<LiveIntervals>();
     455       14833 :   MRI = &MF.getRegInfo();
     456             : 
     457       14833 :   MachineFunction::iterator NextBB;
     458       29666 :   for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
     459       31758 :        BI != BE; BI = NextBB) {
     460       16925 :     NextBB = std::next(BI);
     461       16925 :     MachineBasicBlock &MBB = *BI;
     462             : 
     463       50775 :     MachineBasicBlock::iterator I, Next, Last;
     464             : 
     465      752204 :     for (I = MBB.begin(), Last = MBB.end(); I != MBB.end(); I = Next) {
     466      342252 :       Next = std::next(I);
     467      342252 :       MachineInstr &MI = *I;
     468             : 
     469      683674 :       switch (MI.getOpcode()) {
     470         323 :       case AMDGPU::SI_IF:
     471         323 :         emitIf(MI);
     472         323 :         break;
     473             : 
     474          45 :       case AMDGPU::SI_ELSE:
     475          45 :         emitElse(MI);
     476          45 :         break;
     477             : 
     478           4 :       case AMDGPU::SI_BREAK:
     479           4 :         emitBreak(MI);
     480           4 :         break;
     481             : 
     482          31 :       case AMDGPU::SI_IF_BREAK:
     483          31 :         emitIfBreak(MI);
     484             :         break;
     485             : 
     486          19 :       case AMDGPU::SI_ELSE_BREAK:
     487          19 :         emitElseBreak(MI);
     488             :         break;
     489             : 
     490          44 :       case AMDGPU::SI_LOOP:
     491          44 :         emitLoop(MI);
     492          44 :         break;
     493             : 
     494         364 :       case AMDGPU::SI_END_CF:
     495         364 :         emitEndCf(MI);
     496         364 :         break;
     497             : 
     498        1260 :       case AMDGPU::S_AND_B64:
     499             :       case AMDGPU::S_OR_B64:
     500             :         // Cleanup bit manipulations on exec mask
     501        1260 :         combineMasks(MI);
     502        1260 :         Last = I;
     503        1260 :         continue;
     504             : 
     505      340162 :       default:
     506      340162 :         Last = I;
     507      340162 :         continue;
     508             :       }
     509             : 
     510             :       // Replay newly inserted code to combine masks
     511        2005 :       Next = (Last == MBB.end()) ? MBB.begin() : Last;
     512             :     }
     513             :   }
     514             : 
     515       14833 :   return true;
     516             : }

Generated by: LCOV version 1.13