LCOV - code coverage report
Current view: top level - lib/Target/AMDGPU - SIFixWWMLiveness.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 52 56 92.9 %
Date: 2018-06-17 00:07:59 Functions: 9 11 81.8 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===-- SIFixWWMLiveness.cpp - Fix WWM live intervals ---------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : /// \file
      11             : /// Computations in WWM can overwrite values in inactive channels for
      12             : /// variables that the register allocator thinks are dead. This pass adds fake
      13             : /// uses of those variables to WWM instructions to make sure that they aren't
      14             : /// overwritten.
      15             : ///
      16             : /// As an example, consider this snippet:
      17             : /// %vgpr0 = V_MOV_B32_e32 0.0
      18             : /// if (...) {
      19             : ///   %vgpr1 = ...
      20             : ///   %vgpr2 = WWM killed %vgpr1
      21             : ///   ... = killed %vgpr2
      22             : ///   %vgpr0 = V_MOV_B32_e32 1.0
      23             : /// }
      24             : /// ... = %vgpr0
      25             : ///
      26             : /// The live intervals of %vgpr0 don't overlap with those of %vgpr1. Normally,
      27             : /// we can safely allocate %vgpr0 and %vgpr1 in the same register, since
      28             : /// writing %vgpr1 would only write to channels that would be clobbered by the
      29             : /// second write to %vgpr0 anyways. But if %vgpr1 is written with WWM enabled,
      30             : /// it would clobber even the inactive channels for which the if-condition is
      31             : /// false, for which %vgpr0 is supposed to be 0. This pass adds an implicit use
      32             : /// of %vgpr0 to the WWM instruction to make sure they aren't allocated to the
      33             : /// same register.
      34             : ///
      35             : /// In general, we need to figure out what registers might have their inactive
      36             : /// channels which are eventually used accidentally clobbered by a WWM
      37             : /// instruction. We approximate this using two conditions:
      38             : ///
      39             : /// 1. A definition of the variable reaches the WWM instruction.
      40             : /// 2. The variable would be live at the WWM instruction if all its defs were
      41             : /// partial defs (i.e. considered as a use), ignoring normal uses.
      42             : ///
      43             : /// If a register matches both conditions, then we add an implicit use of it to
      44             : /// the WWM instruction. Condition #2 is the heart of the matter: every
      45             : /// definition is really a partial definition, since every VALU instruction is
      46             : /// implicitly predicated.  We can usually ignore this, but WWM forces us not
      47             : /// to. Condition #1 prevents false positives if the variable is undefined at
      48             : /// the WWM instruction anyways. This is overly conservative in certain cases,
      49             : /// especially in uniform control flow, but this is a workaround anyways until
      50             : /// LLVM gains the notion of predicated uses and definitions of variables.
      51             : ///
      52             : //===----------------------------------------------------------------------===//
      53             : 
      54             : #include "AMDGPU.h"
      55             : #include "AMDGPUSubtarget.h"
      56             : #include "SIInstrInfo.h"
      57             : #include "SIRegisterInfo.h"
      58             : #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
      59             : #include "llvm/ADT/DepthFirstIterator.h"
      60             : #include "llvm/ADT/SparseBitVector.h"
      61             : #include "llvm/CodeGen/LiveIntervals.h"
      62             : #include "llvm/CodeGen/MachineFunctionPass.h"
      63             : #include "llvm/CodeGen/Passes.h"
      64             : #include "llvm/CodeGen/TargetRegisterInfo.h"
      65             : 
      66             : using namespace llvm;
      67             : 
      68             : #define DEBUG_TYPE "si-fix-wwm-liveness"
      69             : 
      70             : namespace {
      71             : 
      72        1780 : class SIFixWWMLiveness : public MachineFunctionPass {
      73             : private:
      74             :   LiveIntervals *LIS = nullptr;
      75             :   const SIRegisterInfo *TRI;
      76             :   MachineRegisterInfo *MRI;
      77             : 
      78             : public:
      79             :   static char ID;
      80             : 
      81        1788 :   SIFixWWMLiveness() : MachineFunctionPass(ID) {
      82        1788 :     initializeSIFixWWMLivenessPass(*PassRegistry::getPassRegistry());
      83        1788 :   }
      84             : 
      85             :   bool runOnMachineFunction(MachineFunction &MF) override;
      86             : 
      87             :   bool runOnWWMInstruction(MachineInstr &MI);
      88             : 
      89             :   void addDefs(const MachineInstr &MI, SparseBitVector<> &set);
      90             : 
      91        1788 :   StringRef getPassName() const override { return "SI Fix WWM Liveness"; }
      92             : 
      93        1788 :   void getAnalysisUsage(AnalysisUsage &AU) const override {
      94             :     // Should preserve the same set that TwoAddressInstructions does.
      95             :     AU.addPreserved<SlotIndexes>();
      96             :     AU.addPreserved<LiveIntervals>();
      97        1788 :     AU.addPreservedID(LiveVariablesID);
      98        1788 :     AU.addPreservedID(MachineLoopInfoID);
      99        1788 :     AU.addPreservedID(MachineDominatorsID);
     100        1788 :     AU.setPreservesCFG();
     101        1788 :     MachineFunctionPass::getAnalysisUsage(AU);
     102        1788 :   }
     103             : };
     104             : 
     105             : } // End anonymous namespace.
     106             : 
     107      360660 : INITIALIZE_PASS(SIFixWWMLiveness, DEBUG_TYPE,
     108             :                 "SI fix WWM liveness", false, false)
     109             : 
     110             : char SIFixWWMLiveness::ID = 0;
     111             : 
     112             : char &llvm::SIFixWWMLivenessID = SIFixWWMLiveness::ID;
     113             : 
     114           0 : FunctionPass *llvm::createSIFixWWMLivenessPass() {
     115           0 :   return new SIFixWWMLiveness();
     116             : }
     117             : 
     118         400 : void SIFixWWMLiveness::addDefs(const MachineInstr &MI, SparseBitVector<> &Regs)
     119             : {
     120        1108 :   for (const MachineOperand &Op : MI.defs()) {
     121         354 :     if (Op.isReg()) {
     122         354 :       unsigned Reg = Op.getReg();
     123         354 :       if (TRI->isVGPR(*MRI, Reg))
     124         182 :         Regs.set(Reg);
     125             :     }
     126             :   }
     127         400 : }
     128             : 
     129          19 : bool SIFixWWMLiveness::runOnWWMInstruction(MachineInstr &WWM) {
     130          19 :   MachineBasicBlock *MBB = WWM.getParent();
     131             : 
     132             :   // Compute the registers that are live out of MI by figuring out which defs
     133             :   // are reachable from MI.
     134             :   SparseBitVector<> LiveOut;
     135             : 
     136             :   for (auto II = MachineBasicBlock::iterator(WWM), IE =
     137         113 :        MBB->end(); II != IE; ++II) {
     138          94 :     addDefs(*II, LiveOut);
     139             :   }
     140             : 
     141          38 :   for (df_iterator<MachineBasicBlock *> I = ++df_begin(MBB),
     142          19 :                                         E = df_end(MBB);
     143          32 :        I != E; ++I) {
     144          97 :     for (const MachineInstr &MI : **I) {
     145          58 :       addDefs(MI, LiveOut);
     146             :     }
     147             :   }
     148             : 
     149             :   // Compute the registers that reach MI.
     150             :   SparseBitVector<> Reachable;
     151             : 
     152          19 :   for (auto II = ++MachineBasicBlock::reverse_iterator(WWM), IE =
     153         136 :        MBB->rend(); II != IE; ++II) {
     154         117 :     addDefs(*II, Reachable);
     155             :   }
     156             : 
     157          38 :   for (idf_iterator<MachineBasicBlock *> I = ++idf_begin(MBB),
     158          19 :                                          E = idf_end(MBB);
     159          28 :        I != E; ++I) {
     160         158 :     for (const MachineInstr &MI : **I) {
     161         131 :       addDefs(MI, Reachable);
     162             :     }
     163             :   }
     164             : 
     165             :   // find the intersection, and add implicit uses.
     166          19 :   LiveOut &= Reachable;
     167             : 
     168             :   bool Modified = false;
     169           9 :   for (unsigned Reg : LiveOut) {
     170           9 :     WWM.addOperand(MachineOperand::CreateReg(Reg, false, /*isImp=*/true));
     171           9 :     if (LIS) {
     172             :       // FIXME: is there a better way to update the live interval?
     173           0 :       LIS->removeInterval(Reg);
     174           0 :       LIS->createAndComputeVirtRegInterval(Reg);
     175             :     }
     176             :     Modified = true;
     177             :   }
     178             : 
     179          19 :   return Modified;
     180             : }
     181             : 
     182       17859 : bool SIFixWWMLiveness::runOnMachineFunction(MachineFunction &MF) {
     183             :   bool Modified = false;
     184             : 
     185             :   // This doesn't actually need LiveIntervals, but we can preserve them.
     186       17859 :   LIS = getAnalysisIfAvailable<LiveIntervals>();
     187             : 
     188       17859 :   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
     189             :   const SIInstrInfo *TII = ST.getInstrInfo();
     190             : 
     191       17859 :   TRI = &TII->getRegisterInfo();
     192       17859 :   MRI = &MF.getRegInfo();
     193             : 
     194       38033 :   for (MachineBasicBlock &MBB : MF) {
     195      421251 :     for (MachineInstr &MI : MBB) {
     196      761806 :       if (MI.getOpcode() == AMDGPU::EXIT_WWM) {
     197          19 :         Modified |= runOnWWMInstruction(MI);
     198             :       }
     199             :     }
     200             :   }
     201             : 
     202       17859 :   return Modified;
     203             : }

Generated by: LCOV version 1.13