LCOV - code coverage report
Current view: top level - lib/Target/AMDGPU - SILowerControlFlow.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 189 223 84.8 %
Date: 2018-07-13 00:08:38 Functions: 15 16 93.8 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===-- SILowerControlFlow.cpp - Use predicates for control flow ----------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : /// \file
      11             : /// This pass lowers the pseudo control flow instructions to real
      12             : /// machine instructions.
      13             : ///
      14             : /// All control flow is handled using predicated instructions and
      15             : /// a predicate stack.  Each Scalar ALU controls the operations of 64 Vector
      16             : /// ALUs.  The Scalar ALU can update the predicate for any of the Vector ALUs
      17             : /// by writting to the 64-bit EXEC register (each bit corresponds to a
      18             : /// single vector ALU).  Typically, for predicates, a vector ALU will write
      19             : /// to its bit of the VCC register (like EXEC VCC is 64-bits, one for each
      20             : /// Vector ALU) and then the ScalarALU will AND the VCC register with the
      21             : /// EXEC to update the predicates.
      22             : ///
      23             : /// For example:
      24             : /// %vcc = V_CMP_GT_F32 %vgpr1, %vgpr2
      25             : /// %sgpr0 = SI_IF %vcc
      26             : ///   %vgpr0 = V_ADD_F32 %vgpr0, %vgpr0
      27             : /// %sgpr0 = SI_ELSE %sgpr0
      28             : ///   %vgpr0 = V_SUB_F32 %vgpr0, %vgpr0
      29             : /// SI_END_CF %sgpr0
      30             : ///
      31             : /// becomes:
      32             : ///
      33             : /// %sgpr0 = S_AND_SAVEEXEC_B64 %vcc  // Save and update the exec mask
      34             : /// %sgpr0 = S_XOR_B64 %sgpr0, %exec  // Clear live bits from saved exec mask
      35             : /// S_CBRANCH_EXECZ label0            // This instruction is an optional
      36             : ///                                   // optimization which allows us to
      37             : ///                                   // branch if all the bits of
      38             : ///                                   // EXEC are zero.
      39             : /// %vgpr0 = V_ADD_F32 %vgpr0, %vgpr0 // Do the IF block of the branch
      40             : ///
      41             : /// label0:
      42             : /// %sgpr0 = S_OR_SAVEEXEC_B64 %exec   // Restore the exec mask for the Then block
      43             : /// %exec = S_XOR_B64 %sgpr0, %exec    // Clear live bits from saved exec mask
      44             : /// S_BRANCH_EXECZ label1              // Use our branch optimization
      45             : ///                                    // instruction again.
      46             : /// %vgpr0 = V_SUB_F32 %vgpr0, %vgpr   // Do the THEN block
      47             : /// label1:
      48             : /// %exec = S_OR_B64 %exec, %sgpr0     // Re-enable saved exec mask bits
      49             : //===----------------------------------------------------------------------===//
      50             : 
      51             : #include "AMDGPU.h"
      52             : #include "AMDGPUSubtarget.h"
      53             : #include "SIInstrInfo.h"
      54             : #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
      55             : #include "llvm/ADT/SmallVector.h"
      56             : #include "llvm/ADT/StringRef.h"
      57             : #include "llvm/CodeGen/LiveIntervals.h"
      58             : #include "llvm/CodeGen/MachineBasicBlock.h"
      59             : #include "llvm/CodeGen/MachineFunction.h"
      60             : #include "llvm/CodeGen/MachineFunctionPass.h"
      61             : #include "llvm/CodeGen/MachineInstr.h"
      62             : #include "llvm/CodeGen/MachineInstrBuilder.h"
      63             : #include "llvm/CodeGen/MachineOperand.h"
      64             : #include "llvm/CodeGen/MachineRegisterInfo.h"
      65             : #include "llvm/CodeGen/Passes.h"
      66             : #include "llvm/CodeGen/SlotIndexes.h"
      67             : #include "llvm/CodeGen/TargetRegisterInfo.h"
      68             : #include "llvm/MC/MCRegisterInfo.h"
      69             : #include "llvm/Pass.h"
      70             : #include <cassert>
      71             : #include <iterator>
      72             : 
      73             : using namespace llvm;
      74             : 
      75             : #define DEBUG_TYPE "si-lower-control-flow"
      76             : 
      77             : namespace {
      78             : 
      79        1777 : class SILowerControlFlow : public MachineFunctionPass {
      80             : private:
      81             :   const SIRegisterInfo *TRI = nullptr;
      82             :   const SIInstrInfo *TII = nullptr;
      83             :   LiveIntervals *LIS = nullptr;
      84             :   MachineRegisterInfo *MRI = nullptr;
      85             : 
      86             :   void emitIf(MachineInstr &MI);
      87             :   void emitElse(MachineInstr &MI);
      88             :   void emitBreak(MachineInstr &MI);
      89             :   void emitIfBreak(MachineInstr &MI);
      90             :   void emitElseBreak(MachineInstr &MI);
      91             :   void emitLoop(MachineInstr &MI);
      92             :   void emitEndCf(MachineInstr &MI);
      93             : 
      94             :   void findMaskOperands(MachineInstr &MI, unsigned OpNo,
      95             :                         SmallVectorImpl<MachineOperand> &Src) const;
      96             : 
      97             :   void combineMasks(MachineInstr &MI);
      98             : 
      99             : public:
     100             :   static char ID;
     101             : 
     102        1785 :   SILowerControlFlow() : MachineFunctionPass(ID) {}
     103             : 
     104             :   bool runOnMachineFunction(MachineFunction &MF) override;
     105             : 
     106        1785 :   StringRef getPassName() const override {
     107        1785 :     return "SI Lower control flow pseudo instructions";
     108             :   }
     109             : 
     110        1785 :   void getAnalysisUsage(AnalysisUsage &AU) const override {
     111             :     // Should preserve the same set that TwoAddressInstructions does.
     112             :     AU.addPreserved<SlotIndexes>();
     113             :     AU.addPreserved<LiveIntervals>();
     114        1785 :     AU.addPreservedID(LiveVariablesID);
     115        1785 :     AU.addPreservedID(MachineLoopInfoID);
     116        1785 :     AU.addPreservedID(MachineDominatorsID);
     117        1785 :     AU.setPreservesCFG();
     118        1785 :     MachineFunctionPass::getAnalysisUsage(AU);
     119        1785 :   }
     120             : };
     121             : 
     122             : } // end anonymous namespace
     123             : 
     124             : char SILowerControlFlow::ID = 0;
     125             : 
     126      342570 : INITIALIZE_PASS(SILowerControlFlow, DEBUG_TYPE,
     127             :                "SI lower control flow", false, false)
     128             : 
     129             : static void setImpSCCDefDead(MachineInstr &MI, bool IsDead) {
     130         449 :   MachineOperand &ImpDefSCC = MI.getOperand(3);
     131             :   assert(ImpDefSCC.getReg() == AMDGPU::SCC && ImpDefSCC.isDef());
     132             : 
     133             :   ImpDefSCC.setIsDead(IsDead);
     134             : }
     135             : 
     136             : char &llvm::SILowerControlFlowID = SILowerControlFlow::ID;
     137             : 
     138         352 : static bool isSimpleIf(const MachineInstr &MI, const MachineRegisterInfo *MRI,
     139             :                        const SIInstrInfo *TII) {
     140         352 :   unsigned SaveExecReg = MI.getOperand(0).getReg();
     141         352 :   auto U = MRI->use_instr_nodbg_begin(SaveExecReg);
     142             : 
     143         349 :   if (U == MRI->use_instr_nodbg_end() ||
     144         675 :       std::next(U) != MRI->use_instr_nodbg_end() ||
     145         323 :       U->getOpcode() != AMDGPU::SI_END_CF)
     146             :     return false;
     147             : 
     148             :   // Check for SI_KILL_*_TERMINATOR on path from if to endif.
     149             :   // if there is any such terminator simplififcations are not safe.
     150         260 :   auto SMBB = MI.getParent();
     151         260 :   auto EMBB = U->getParent();
     152             :   DenseSet<const MachineBasicBlock*> Visited;
     153             :   SmallVector<MachineBasicBlock*, 4> Worklist(SMBB->succ_begin(),
     154             :                                               SMBB->succ_end());
     155             : 
     156        1121 :   while (!Worklist.empty()) {
     157             :     MachineBasicBlock *MBB = Worklist.pop_back_val();
     158             : 
     159        1761 :     if (MBB == EMBB || !Visited.insert(MBB).second)
     160             :       continue;
     161         751 :     for(auto &Term : MBB->terminators())
     162         244 :       if (TII->isKillTerminator(Term.getOpcode()))
     163           5 :         return false;
     164             : 
     165         312 :     Worklist.append(MBB->succ_begin(), MBB->succ_end());
     166             :   }
     167             : 
     168             :   return true;
     169             : }
     170             : 
     171         352 : void SILowerControlFlow::emitIf(MachineInstr &MI) {
     172         352 :   MachineBasicBlock &MBB = *MI.getParent();
     173             :   const DebugLoc &DL = MI.getDebugLoc();
     174             :   MachineBasicBlock::iterator I(&MI);
     175             : 
     176         352 :   MachineOperand &SaveExec = MI.getOperand(0);
     177             :   MachineOperand &Cond = MI.getOperand(1);
     178             :   assert(SaveExec.getSubReg() == AMDGPU::NoSubRegister &&
     179             :          Cond.getSubReg() == AMDGPU::NoSubRegister);
     180             : 
     181         352 :   unsigned SaveExecReg = SaveExec.getReg();
     182             : 
     183             :   MachineOperand &ImpDefSCC = MI.getOperand(4);
     184             :   assert(ImpDefSCC.getReg() == AMDGPU::SCC && ImpDefSCC.isDef());
     185             : 
     186             :   // If there is only one use of save exec register and that use is SI_END_CF,
     187             :   // we can optimize SI_IF by returning the full saved exec mask instead of
     188             :   // just cleared bits.
     189         352 :   bool SimpleIf = isSimpleIf(MI, MRI, TII);
     190             : 
     191             :   // Add an implicit def of exec to discourage scheduling VALU after this which
     192             :   // will interfere with trying to form s_and_saveexec_b64 later.
     193         449 :   unsigned CopyReg = SimpleIf ? SaveExecReg
     194         449 :                        : MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
     195             :   MachineInstr *CopyExec =
     196        1056 :     BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), CopyReg)
     197         352 :     .addReg(AMDGPU::EXEC)
     198         352 :     .addReg(AMDGPU::EXEC, RegState::ImplicitDefine);
     199             : 
     200         704 :   unsigned Tmp = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
     201             : 
     202             :   MachineInstr *And =
     203        1056 :     BuildMI(MBB, I, DL, TII->get(AMDGPU::S_AND_B64), Tmp)
     204         352 :     .addReg(CopyReg)
     205             :     //.addReg(AMDGPU::EXEC)
     206         352 :     .addReg(Cond.getReg());
     207             :   setImpSCCDefDead(*And, true);
     208             : 
     209             :   MachineInstr *Xor = nullptr;
     210         352 :   if (!SimpleIf) {
     211          97 :     Xor =
     212         291 :       BuildMI(MBB, I, DL, TII->get(AMDGPU::S_XOR_B64), SaveExecReg)
     213          97 :       .addReg(Tmp)
     214          97 :       .addReg(CopyReg);
     215             :     setImpSCCDefDead(*Xor, ImpDefSCC.isDead());
     216             :   }
     217             : 
     218             :   // Use a copy that is a terminator to get correct spill code placement it with
     219             :   // fast regalloc.
     220             :   MachineInstr *SetExec =
     221        1056 :     BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B64_term), AMDGPU::EXEC)
     222         352 :     .addReg(Tmp, RegState::Kill);
     223             : 
     224             :   // Insert a pseudo terminator to help keep the verifier happy. This will also
     225             :   // be used later when inserting skips.
     226        1056 :   MachineInstr *NewBr = BuildMI(MBB, I, DL, TII->get(AMDGPU::SI_MASK_BRANCH))
     227         352 :                             .add(MI.getOperand(2));
     228             : 
     229         352 :   if (!LIS) {
     230         352 :     MI.eraseFromParent();
     231         352 :     return;
     232             :   }
     233             : 
     234           0 :   LIS->InsertMachineInstrInMaps(*CopyExec);
     235             : 
     236             :   // Replace with and so we don't need to fix the live interval for condition
     237             :   // register.
     238           0 :   LIS->ReplaceMachineInstrInMaps(MI, *And);
     239             : 
     240           0 :   if (!SimpleIf)
     241           0 :     LIS->InsertMachineInstrInMaps(*Xor);
     242           0 :   LIS->InsertMachineInstrInMaps(*SetExec);
     243           0 :   LIS->InsertMachineInstrInMaps(*NewBr);
     244             : 
     245           0 :   LIS->removeRegUnit(*MCRegUnitIterator(AMDGPU::EXEC, TRI));
     246           0 :   MI.eraseFromParent();
     247             : 
     248             :   // FIXME: Is there a better way of adjusting the liveness? It shouldn't be
     249             :   // hard to add another def here but I'm not sure how to correctly update the
     250             :   // valno.
     251           0 :   LIS->removeInterval(SaveExecReg);
     252           0 :   LIS->createAndComputeVirtRegInterval(SaveExecReg);
     253           0 :   LIS->createAndComputeVirtRegInterval(Tmp);
     254           0 :   if (!SimpleIf)
     255           0 :     LIS->createAndComputeVirtRegInterval(CopyReg);
     256             : }
     257             : 
     258          51 : void SILowerControlFlow::emitElse(MachineInstr &MI) {
     259          51 :   MachineBasicBlock &MBB = *MI.getParent();
     260             :   const DebugLoc &DL = MI.getDebugLoc();
     261             : 
     262          51 :   unsigned DstReg = MI.getOperand(0).getReg();
     263             :   assert(MI.getOperand(0).getSubReg() == AMDGPU::NoSubRegister);
     264             : 
     265          51 :   bool ExecModified = MI.getOperand(3).getImm() != 0;
     266          51 :   MachineBasicBlock::iterator Start = MBB.begin();
     267             : 
     268             :   // We are running before TwoAddressInstructions, and si_else's operands are
     269             :   // tied. In order to correctly tie the registers, split this into a copy of
     270             :   // the src like it does.
     271         102 :   unsigned CopyReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
     272             :   MachineInstr *CopyExec =
     273         153 :     BuildMI(MBB, Start, DL, TII->get(AMDGPU::COPY), CopyReg)
     274          51 :       .add(MI.getOperand(1)); // Saved EXEC
     275             : 
     276             :   // This must be inserted before phis and any spill code inserted before the
     277             :   // else.
     278          58 :   unsigned SaveReg = ExecModified ?
     279          58 :     MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass) : DstReg;
     280             :   MachineInstr *OrSaveExec =
     281         153 :     BuildMI(MBB, Start, DL, TII->get(AMDGPU::S_OR_SAVEEXEC_B64), SaveReg)
     282          51 :     .addReg(CopyReg);
     283             : 
     284          51 :   MachineBasicBlock *DestBB = MI.getOperand(2).getMBB();
     285             : 
     286             :   MachineBasicBlock::iterator ElsePt(MI);
     287             : 
     288          51 :   if (ExecModified) {
     289             :     MachineInstr *And =
     290          21 :       BuildMI(MBB, ElsePt, DL, TII->get(AMDGPU::S_AND_B64), DstReg)
     291           7 :       .addReg(AMDGPU::EXEC)
     292           7 :       .addReg(SaveReg);
     293             : 
     294           7 :     if (LIS)
     295           0 :       LIS->InsertMachineInstrInMaps(*And);
     296             :   }
     297             : 
     298             :   MachineInstr *Xor =
     299         153 :     BuildMI(MBB, ElsePt, DL, TII->get(AMDGPU::S_XOR_B64_term), AMDGPU::EXEC)
     300          51 :     .addReg(AMDGPU::EXEC)
     301          51 :     .addReg(DstReg);
     302             : 
     303             :   MachineInstr *Branch =
     304         153 :     BuildMI(MBB, ElsePt, DL, TII->get(AMDGPU::SI_MASK_BRANCH))
     305             :     .addMBB(DestBB);
     306             : 
     307          51 :   if (!LIS) {
     308          51 :     MI.eraseFromParent();
     309          51 :     return;
     310             :   }
     311             : 
     312           0 :   LIS->RemoveMachineInstrFromMaps(MI);
     313           0 :   MI.eraseFromParent();
     314             : 
     315           0 :   LIS->InsertMachineInstrInMaps(*CopyExec);
     316           0 :   LIS->InsertMachineInstrInMaps(*OrSaveExec);
     317             : 
     318           0 :   LIS->InsertMachineInstrInMaps(*Xor);
     319           0 :   LIS->InsertMachineInstrInMaps(*Branch);
     320             : 
     321             :   // src reg is tied to dst reg.
     322           0 :   LIS->removeInterval(DstReg);
     323           0 :   LIS->createAndComputeVirtRegInterval(DstReg);
     324           0 :   LIS->createAndComputeVirtRegInterval(CopyReg);
     325           0 :   if (ExecModified)
     326           0 :     LIS->createAndComputeVirtRegInterval(SaveReg);
     327             : 
     328             :   // Let this be recomputed.
     329           0 :   LIS->removeRegUnit(*MCRegUnitIterator(AMDGPU::EXEC, TRI));
     330             : }
     331             : 
     332          10 : void SILowerControlFlow::emitBreak(MachineInstr &MI) {
     333          10 :   MachineBasicBlock &MBB = *MI.getParent();
     334             :   const DebugLoc &DL = MI.getDebugLoc();
     335          10 :   unsigned Dst = MI.getOperand(0).getReg();
     336             : 
     337          30 :   MachineInstr *Or = BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_OR_B64), Dst)
     338          10 :                          .addReg(AMDGPU::EXEC)
     339          20 :                          .add(MI.getOperand(1));
     340             : 
     341          10 :   if (LIS)
     342           0 :     LIS->ReplaceMachineInstrInMaps(MI, *Or);
     343          10 :   MI.eraseFromParent();
     344          10 : }
     345             : 
     346          71 : void SILowerControlFlow::emitIfBreak(MachineInstr &MI) {
     347          71 :   MachineBasicBlock &MBB = *MI.getParent();
     348             :   const DebugLoc &DL = MI.getDebugLoc();
     349          71 :   auto Dst = MI.getOperand(0).getReg();
     350             : 
     351             :   // Skip ANDing with exec if the break condition is already masked by exec
     352             :   // because it is a V_CMP in the same basic block. (We know the break
     353             :   // condition operand was an i1 in IR, so if it is a VALU instruction it must
     354             :   // be one with a carry-out.)
     355             :   bool SkipAnding = false;
     356          71 :   if (MI.getOperand(1).isReg()) {
     357          71 :     if (MachineInstr *Def = MRI->getUniqueVRegDef(MI.getOperand(1).getReg())) {
     358          70 :       SkipAnding = Def->getParent() == MI.getParent()
     359         107 :           && SIInstrInfo::isVALU(*Def);
     360             :     }
     361             :   }
     362             : 
     363             :   // AND the break condition operand with exec, then OR that into the "loop
     364             :   // exit" mask.
     365             :   MachineInstr *And = nullptr, *Or = nullptr;
     366          70 :   if (!SkipAnding) {
     367         138 :     And = BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_B64), Dst)
     368          46 :              .addReg(AMDGPU::EXEC)
     369          46 :              .add(MI.getOperand(1));
     370         138 :     Or = BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_OR_B64), Dst)
     371          46 :              .addReg(Dst)
     372          46 :              .add(MI.getOperand(2));
     373             :   } else
     374          25 :     Or = BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_OR_B64), Dst)
     375          25 :              .add(MI.getOperand(1))
     376          25 :              .add(MI.getOperand(2));
     377             : 
     378          71 :   if (LIS) {
     379           0 :     if (And)
     380           0 :       LIS->InsertMachineInstrInMaps(*And);
     381           0 :     LIS->ReplaceMachineInstrInMaps(MI, *Or);
     382             :   }
     383             : 
     384          71 :   MI.eraseFromParent();
     385          71 : }
     386             : 
     387             : void SILowerControlFlow::emitElseBreak(MachineInstr &MI) {
     388             :   // Lowered in the same way as emitIfBreak above.
     389          27 :   emitIfBreak(MI);
     390             : }
     391             : 
     392          61 : void SILowerControlFlow::emitLoop(MachineInstr &MI) {
     393          61 :   MachineBasicBlock &MBB = *MI.getParent();
     394             :   const DebugLoc &DL = MI.getDebugLoc();
     395             : 
     396             :   MachineInstr *AndN2 =
     397         183 :       BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_ANDN2_B64_term), AMDGPU::EXEC)
     398          61 :           .addReg(AMDGPU::EXEC)
     399         122 :           .add(MI.getOperand(0));
     400             : 
     401             :   MachineInstr *Branch =
     402          61 :       BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
     403          61 :           .add(MI.getOperand(1));
     404             : 
     405          61 :   if (LIS) {
     406           0 :     LIS->ReplaceMachineInstrInMaps(MI, *AndN2);
     407           0 :     LIS->InsertMachineInstrInMaps(*Branch);
     408             :   }
     409             : 
     410          61 :   MI.eraseFromParent();
     411          61 : }
     412             : 
     413         410 : void SILowerControlFlow::emitEndCf(MachineInstr &MI) {
     414         410 :   MachineBasicBlock &MBB = *MI.getParent();
     415             :   const DebugLoc &DL = MI.getDebugLoc();
     416             : 
     417         410 :   MachineBasicBlock::iterator InsPt = MBB.begin();
     418             :   MachineInstr *NewMI =
     419        1230 :       BuildMI(MBB, InsPt, DL, TII->get(AMDGPU::S_OR_B64), AMDGPU::EXEC)
     420         410 :           .addReg(AMDGPU::EXEC)
     421         820 :           .add(MI.getOperand(0));
     422             : 
     423         410 :   if (LIS)
     424           0 :     LIS->ReplaceMachineInstrInMaps(MI, *NewMI);
     425             : 
     426         410 :   MI.eraseFromParent();
     427             : 
     428         410 :   if (LIS)
     429           0 :     LIS->handleMove(*NewMI);
     430         410 : }
     431             : 
     432             : // Returns replace operands for a logical operation, either single result
     433             : // for exec or two operands if source was another equivalent operation.
     434        3826 : void SILowerControlFlow::findMaskOperands(MachineInstr &MI, unsigned OpNo,
     435             :        SmallVectorImpl<MachineOperand> &Src) const {
     436        3826 :   MachineOperand &Op = MI.getOperand(OpNo);
     437        7640 :   if (!Op.isReg() || !TargetRegisterInfo::isVirtualRegister(Op.getReg())) {
     438        1044 :     Src.push_back(Op);
     439             :     return;
     440             :   }
     441             : 
     442        2782 :   MachineInstr *Def = MRI->getUniqueVRegDef(Op.getReg());
     443        2782 :   if (!Def || Def->getParent() != MI.getParent() ||
     444        3056 :       !(Def->isFullCopy() || (Def->getOpcode() == MI.getOpcode())))
     445             :     return;
     446             : 
     447             :   // Make sure we do not modify exec between def and use.
     448             :   // A copy with implcitly defined exec inserted earlier is an exclusion, it
     449             :   // does not really modify exec.
     450         753 :   for (auto I = Def->getIterator(); I != MI.getIterator(); ++I)
     451        3634 :     if (I->modifiesRegister(AMDGPU::EXEC, TRI) &&
     452         363 :         !(I->isCopy() && I->getOperand(0).getReg() != AMDGPU::EXEC))
     453             :       return;
     454             : 
     455        2926 :   for (const auto &SrcOp : Def->explicit_operands())
     456        3034 :     if (SrcOp.isReg() && SrcOp.isUse() &&
     457        1034 :         (TargetRegisterInfo::isVirtualRegister(SrcOp.getReg()) ||
     458             :         SrcOp.getReg() == AMDGPU::EXEC))
     459         650 :       Src.push_back(SrcOp);
     460             : }
     461             : 
     462             : // Search and combine pairs of equivalent instructions, like
     463             : // S_AND_B64 x, (S_AND_B64 x, y) => S_AND_B64 x, y
     464             : // S_OR_B64  x, (S_OR_B64  x, y) => S_OR_B64  x, y
     465             : // One of the operands is exec mask.
     466        1913 : void SILowerControlFlow::combineMasks(MachineInstr &MI) {
     467             :   assert(MI.getNumExplicitOperands() == 3);
     468             :   SmallVector<MachineOperand, 4> Ops;
     469             :   unsigned OpToReplace = 1;
     470        1913 :   findMaskOperands(MI, 1, Ops);
     471        1913 :   if (Ops.size() == 1) OpToReplace = 2; // First operand can be exec or its copy
     472        1913 :   findMaskOperands(MI, 2, Ops);
     473        1913 :   if (Ops.size() != 3) return;
     474             : 
     475             :   unsigned UniqueOpndIdx;
     476          35 :   if (Ops[0].isIdenticalTo(Ops[1])) UniqueOpndIdx = 2;
     477           5 :   else if (Ops[0].isIdenticalTo(Ops[2])) UniqueOpndIdx = 1;
     478           5 :   else if (Ops[1].isIdenticalTo(Ops[2])) UniqueOpndIdx = 1;
     479             :   else return;
     480             : 
     481          60 :   unsigned Reg = MI.getOperand(OpToReplace).getReg();
     482          30 :   MI.RemoveOperand(OpToReplace);
     483          60 :   MI.addOperand(Ops[UniqueOpndIdx]);
     484          60 :   if (MRI->use_empty(Reg))
     485          30 :     MRI->getUniqueVRegDef(Reg)->eraseFromParent();
     486             : }
     487             : 
     488       17876 : bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
     489       17876 :   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
     490       17876 :   TII = ST.getInstrInfo();
     491       17876 :   TRI = &TII->getRegisterInfo();
     492             : 
     493             :   // This doesn't actually need LiveIntervals, but we can preserve them.
     494       17876 :   LIS = getAnalysisIfAvailable<LiveIntervals>();
     495       17876 :   MRI = &MF.getRegInfo();
     496             : 
     497             :   MachineFunction::iterator NextBB;
     498             :   for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
     499       38094 :        BI != BE; BI = NextBB) {
     500             :     NextBB = std::next(BI);
     501             :     MachineBasicBlock &MBB = *BI;
     502             : 
     503             :     MachineBasicBlock::iterator I, Next, Last;
     504             : 
     505      410545 :     for (I = MBB.begin(), Last = MBB.end(); I != MBB.end(); I = Next) {
     506             :       Next = std::next(I);
     507             :       MachineInstr &MI = *I;
     508             : 
     509     1170026 :       switch (MI.getOpcode()) {
     510         352 :       case AMDGPU::SI_IF:
     511         352 :         emitIf(MI);
     512         352 :         break;
     513             : 
     514          51 :       case AMDGPU::SI_ELSE:
     515          51 :         emitElse(MI);
     516          51 :         break;
     517             : 
     518          10 :       case AMDGPU::SI_BREAK:
     519          10 :         emitBreak(MI);
     520          10 :         break;
     521             : 
     522          44 :       case AMDGPU::SI_IF_BREAK:
     523          44 :         emitIfBreak(MI);
     524          44 :         break;
     525             : 
     526             :       case AMDGPU::SI_ELSE_BREAK:
     527             :         emitElseBreak(MI);
     528             :         break;
     529             : 
     530          61 :       case AMDGPU::SI_LOOP:
     531          61 :         emitLoop(MI);
     532          61 :         break;
     533             : 
     534         410 :       case AMDGPU::SI_END_CF:
     535         410 :         emitEndCf(MI);
     536         410 :         break;
     537             : 
     538        1913 :       case AMDGPU::S_AND_B64:
     539             :       case AMDGPU::S_OR_B64:
     540             :         // Cleanup bit manipulations on exec mask
     541        1913 :         combineMasks(MI);
     542             :         Last = I;
     543        1913 :         continue;
     544             : 
     545      387459 :       default:
     546             :         Last = I;
     547      387459 :         continue;
     548             :       }
     549             : 
     550             :       // Replay newly inserted code to combine masks
     551         955 :       Next = (Last == MBB.end()) ? MBB.begin() : Last;
     552             :     }
     553             :   }
     554             : 
     555       17876 :   return true;
     556             : }

Generated by: LCOV version 1.13