LCOV - code coverage report
Current view: top level - lib/Target/AMDGPU - SIFixWWMLiveness.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 61 65 93.8 %
Date: 2017-09-14 15:23:50 Functions: 9 11 81.8 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===-- SIFixWWMLiveness.cpp - Fix WWM live intervals ---------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : /// \file
      11             : /// \brief Computations in WWM can overwrite values in inactive channels for
      12             : /// variables that the register allocator thinks are dead. This pass adds fake
      13             : /// uses of those variables to WWM instructions to make sure that they aren't
      14             : /// overwritten.
      15             : ///
      16             : /// As an example, consider this snippet:
      17             : /// %vgpr0 = V_MOV_B32_e32 0.0
      18             : /// if (...) {
      19             : ///   %vgpr1 = ...
      20             : ///   %vgpr2 = WWM %vgpr1<kill>
      21             : ///   ... = %vgpr2<kill>
      22             : ///   %vgpr0 = V_MOV_B32_e32 1.0
      23             : /// }
      24             : /// ... = %vgpr0
      25             : ///
      26             : /// The live intervals of %vgpr0 don't overlap with those of %vgpr1. Normally,
      27             : /// we can safely allocate %vgpr0 and %vgpr1 in the same register, since
      28             : /// writing %vgpr1 would only write to channels that would be clobbered by the
      29             : /// second write to %vgpr0 anyways. But if %vgpr1 is written with WWM enabled,
      30             : /// it would clobber even the inactive channels for which the if-condition is
      31             : /// false, for which %vgpr0 is supposed to be 0. This pass adds an implicit use
      32             : /// of %vgpr0 to the WWM instruction to make sure they aren't allocated to the
      33             : /// same register.
      34             : ///
      35             : /// In general, we need to figure out what registers might have their inactive
      36             : /// channels which are eventually used accidentally clobbered by a WWM
      37             : /// instruction. We approximate this using two conditions:
      38             : ///
      39             : /// 1. A definition of the variable reaches the WWM instruction.
      40             : /// 2. The variable would be live at the WWM instruction if all its defs were
      41             : /// partial defs (i.e. considered as a use), ignoring normal uses.
      42             : ///
      43             : /// If a register matches both conditions, then we add an implicit use of it to
      44             : /// the WWM instruction. Condition #2 is the heart of the matter: every
      45             : /// definition is really a partial definition, since every VALU instruction is
      46             : /// implicitly predicated.  We can usually ignore this, but WWM forces us not
      47             : /// to. Condition #1 prevents false positives if the variable is undefined at
      48             : /// the WWM instruction anyways. This is overly conservative in certain cases,
      49             : /// especially in uniform control flow, but this is a workaround anyways until
      50             : /// LLVM gains the notion of predicated uses and definitions of variables.
      51             : ///
      52             : //===----------------------------------------------------------------------===//
      53             : 
      54             : #include "AMDGPU.h"
      55             : #include "AMDGPUSubtarget.h"
      56             : #include "SIInstrInfo.h"
      57             : #include "SIRegisterInfo.h"
      58             : #include "llvm/ADT/DepthFirstIterator.h"
      59             : #include "llvm/ADT/SparseBitVector.h"
      60             : #include "llvm/CodeGen/LiveIntervalAnalysis.h"
      61             : #include "llvm/CodeGen/MachineFunctionPass.h"
      62             : #include "llvm/CodeGen/Passes.h"
      63             : #include "llvm/Target/TargetRegisterInfo.h"
      64             : 
      65             : using namespace llvm;
      66             : 
      67             : #define DEBUG_TYPE "si-fix-wwm-liveness"
      68             : 
      69             : namespace {
      70             : 
      71        1455 : class SIFixWWMLiveness : public MachineFunctionPass {
      72             : private:
      73             :   LiveIntervals *LIS = nullptr;
      74             :   const SIRegisterInfo *TRI;
      75             :   MachineRegisterInfo *MRI;
      76             : 
      77             : public:
      78             :   static char ID;
      79             : 
      80        1463 :   SIFixWWMLiveness() : MachineFunctionPass(ID) {
      81        1463 :     initializeSIFixWWMLivenessPass(*PassRegistry::getPassRegistry());
      82        1463 :   }
      83             : 
      84             :   bool runOnMachineFunction(MachineFunction &MF) override;
      85             : 
      86             :   bool runOnWWMInstruction(MachineInstr &MI);
      87             : 
      88             :   void addDefs(const MachineInstr &MI, SparseBitVector<> &set);
      89             : 
      90        1463 :   StringRef getPassName() const override { return "SI Fix WWM Liveness"; }
      91             : 
      92        1463 :   void getAnalysisUsage(AnalysisUsage &AU) const override {
      93             :     // Should preserve the same set that TwoAddressInstructions does.
      94        1463 :     AU.addPreserved<SlotIndexes>();
      95        1463 :     AU.addPreserved<LiveIntervals>();
      96        2926 :     AU.addPreservedID(LiveVariablesID);
      97        2926 :     AU.addPreservedID(MachineLoopInfoID);
      98        2926 :     AU.addPreservedID(MachineDominatorsID);
      99        1463 :     AU.setPreservesCFG();
     100        1463 :     MachineFunctionPass::getAnalysisUsage(AU);
     101        1463 :   }
     102             : };
     103             : 
     104             : } // End anonymous namespace.
     105             : 
     106      316927 : INITIALIZE_PASS(SIFixWWMLiveness, DEBUG_TYPE,
     107             :                 "SI fix WWM liveness", false, false)
     108             : 
     109             : char SIFixWWMLiveness::ID = 0;
     110             : 
     111             : char &llvm::SIFixWWMLivenessID = SIFixWWMLiveness::ID;
     112             : 
     113           0 : FunctionPass *llvm::createSIFixWWMLivenessPass() {
     114           0 :   return new SIFixWWMLiveness();
     115             : }
     116             : 
     117         318 : void SIFixWWMLiveness::addDefs(const MachineInstr &MI, SparseBitVector<> &Regs)
     118             : {
     119         596 :   for (const MachineOperand &Op : MI.defs()) {
     120         278 :     if (Op.isReg()) {
     121         278 :       unsigned Reg = Op.getReg();
     122         278 :       if (TRI->isVGPR(*MRI, Reg))
     123         154 :         Regs.set(Reg);
     124             :     }
     125             :   }
     126         318 : }
     127             : 
     128          17 : bool SIFixWWMLiveness::runOnWWMInstruction(MachineInstr &WWM) {
     129          17 :   MachineBasicBlock *MBB = WWM.getParent();
     130             : 
     131             :   // Compute the registers that are live out of MI by figuring out which defs
     132             :   // are reachable from MI.
     133          34 :   SparseBitVector<> LiveOut;
     134             : 
     135          17 :   for (auto II = MachineBasicBlock::iterator(WWM), IE =
     136         137 :        MBB->end(); II != IE; ++II) {
     137          86 :     addDefs(*II, LiveOut);
     138             :   }
     139             : 
     140          85 :   for (df_iterator<MachineBasicBlock *> I = ++df_begin(MBB),
     141          34 :                                         E = df_end(MBB);
     142          28 :        I != E; ++I) {
     143         140 :     for (const MachineInstr &MI : **I) {
     144          48 :       addDefs(MI, LiveOut);
     145             :     }
     146             :   }
     147             : 
     148             :   // Compute the registers that reach MI.
     149          34 :   SparseBitVector<> Reachable;
     150             : 
     151          34 :   for (auto II = ++MachineBasicBlock::reverse_iterator(WWM), IE =
     152         152 :        MBB->rend(); II != IE; ++II) {
     153         101 :     addDefs(*II, Reachable);
     154             :   }
     155             : 
     156          85 :   for (idf_iterator<MachineBasicBlock *> I = ++idf_begin(MBB),
     157          34 :                                          E = idf_end(MBB);
     158          24 :        I != E; ++I) {
     159         194 :     for (const MachineInstr &MI : **I) {
     160          83 :       addDefs(MI, Reachable);
     161             :     }
     162             :   }
     163             : 
     164             :   // find the intersection, and add implicit uses.
     165          17 :   LiveOut &= Reachable;
     166             : 
     167          17 :   bool Modified = false;
     168          41 :   for (unsigned Reg : LiveOut) {
     169           7 :     WWM.addOperand(MachineOperand::CreateReg(Reg, false, /*isImp=*/true));
     170           7 :     if (LIS) {
     171             :       // FIXME: is there a better way to update the live interval?
     172           0 :       LIS->removeInterval(Reg);
     173           0 :       LIS->createAndComputeVirtRegInterval(Reg);
     174             :     }
     175           7 :     Modified = true;
     176             :   }
     177             : 
     178          34 :   return Modified;
     179             : }
     180             : 
     181       14834 : bool SIFixWWMLiveness::runOnMachineFunction(MachineFunction &MF) {
     182       14834 :   bool Modified = false;
     183             : 
     184             :   // This doesn't actually need LiveIntervals, but we can preserve them.
     185       14834 :   LIS = getAnalysisIfAvailable<LiveIntervals>();
     186             : 
     187       14834 :   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
     188       14834 :   const SIInstrInfo *TII = ST.getInstrInfo();
     189             : 
     190       14834 :   TRI = &TII->getRegisterInfo();
     191       14834 :   MRI = &MF.getRegInfo();
     192             : 
     193       61430 :   for (MachineBasicBlock &MBB : MF) {
     194      749828 :     for (MachineInstr &MI : MBB) {
     195      341058 :       if (MI.getOpcode() == AMDGPU::EXIT_WWM) {
     196          17 :         Modified |= runOnWWMInstruction(MI);
     197             :       }
     198             :     }
     199             :   }
     200             : 
     201       14834 :   return Modified;
     202             : }

Generated by: LCOV version 1.13