LCOV - code coverage report
Current view: top level - lib/Target/AMDGPU - SIFixWWMLiveness.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 111 113 98.2 %
Date: 2018-10-20 13:21:21 Functions: 10 11 90.9 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===-- SIFixWWMLiveness.cpp - Fix WWM live intervals ---------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : /// \file
      11             : /// Computations in WWM can overwrite values in inactive channels for
      12             : /// variables that the register allocator thinks are dead. This pass adds fake
      13             : /// uses of those variables to their def(s) to make sure that they aren't
      14             : /// overwritten.
      15             : ///
      16             : /// As an example, consider this snippet:
      17             : /// %vgpr0 = V_MOV_B32_e32 0.0
      18             : /// if (...) {
      19             : ///   %vgpr1 = ...
      20             : ///   %vgpr2 = WWM killed %vgpr1
      21             : ///   ... = killed %vgpr2
      22             : ///   %vgpr0 = V_MOV_B32_e32 1.0
      23             : /// }
      24             : /// ... = %vgpr0
      25             : ///
      26             : /// The live intervals of %vgpr0 don't overlap with those of %vgpr1. Normally,
      27             : /// we can safely allocate %vgpr0 and %vgpr1 in the same register, since
      28             : /// writing %vgpr1 would only write to channels that would be clobbered by the
      29             : /// second write to %vgpr0 anyways. But if %vgpr1 is written with WWM enabled,
      30             : /// it would clobber even the inactive channels for which the if-condition is
      31             : /// false, for which %vgpr0 is supposed to be 0. This pass adds an implicit use
      32             : /// of %vgpr0 to its def to make sure they aren't allocated to the
      33             : /// same register.
      34             : ///
      35             : /// In general, we need to figure out what registers might have their inactive
      36             : /// channels which are eventually used accidentally clobbered by a WWM
      37             : /// instruction. We do that by spotting three separate cases of registers:
      38             : ///
      39             : /// 1. A "then phi": the value resulting from phi elimination of a phi node at
      40             : ///    the end of an if..endif. If there is WWM code in the "then", then we
      41             : ///    make the def at the end of the "then" branch a partial def by adding an
      42             : ///    implicit use of the register.
      43             : ///
      44             : /// 2. A "loop exit register": a value written inside a loop but used outside the
      45             : ///    loop, where there is WWM code inside the loop (the case in the example
      46             : ///    above). We add an implicit_def of the register in the loop pre-header,
      47             : ///    and make the original def a partial def by adding an implicit use of the
      48             : ///    register.
      49             : ///
      50             : /// 3. A "loop exit phi": the value resulting from phi elimination of a phi node
      51             : ///    in a loop header. If there is WWM code inside the loop, then we make all
      52             : ///    defs inside the loop partial defs by adding an implicit use of the
      53             : ///    register on each one.
      54             : ///
      55             : /// Note that we do not need to consider an if..else..endif phi. We only need to
      56             : /// consider non-uniform control flow, and control flow structurization would
      57             : /// have transformed a non-uniform if..else..endif into two if..endifs.
      58             : ///
      59             : /// The analysis to detect these cases relies on a property of the MIR
      60             : /// arising from this pass running straight after PHIElimination and before any
      61             : /// coalescing: that any virtual register with more than one definition must be
      62             : /// the new register added to lower a phi node by PHIElimination.
      63             : ///
      64             : /// FIXME: We should detect whether a register in one of the above categories is
      65             : /// already live at the WWM code before deciding to add the implicit uses to
      66             : /// synthesize its liveness.
      67             : ///
      68             : /// FIXME: I believe this whole scheme may be flawed due to the possibility of
      69             : /// the register allocator doing live interval splitting.
      70             : ///
      71             : //===----------------------------------------------------------------------===//
      72             : 
      73             : #include "AMDGPU.h"
      74             : #include "AMDGPUSubtarget.h"
      75             : #include "SIInstrInfo.h"
      76             : #include "SIRegisterInfo.h"
      77             : #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
      78             : #include "llvm/ADT/DepthFirstIterator.h"
      79             : #include "llvm/ADT/SparseBitVector.h"
      80             : #include "llvm/CodeGen/LiveIntervals.h"
      81             : #include "llvm/CodeGen/MachineDominators.h"
      82             : #include "llvm/CodeGen/MachineFunctionPass.h"
      83             : #include "llvm/CodeGen/MachineLoopInfo.h"
      84             : #include "llvm/CodeGen/Passes.h"
      85             : #include "llvm/CodeGen/TargetRegisterInfo.h"
      86             : 
      87             : using namespace llvm;
      88             : 
      89             : #define DEBUG_TYPE "si-fix-wwm-liveness"
      90             : 
      91             : namespace {
      92             : 
      93             : class SIFixWWMLiveness : public MachineFunctionPass {
      94             : private:
      95             :   MachineDominatorTree *DomTree;
      96             :   MachineLoopInfo *LoopInfo;
      97             :   LiveIntervals *LIS = nullptr;
      98             :   const SIInstrInfo *TII;
      99             :   const SIRegisterInfo *TRI;
     100             :   MachineRegisterInfo *MRI;
     101             : 
     102             :   std::vector<MachineInstr *> WWMs;
     103             :   std::vector<MachineOperand *> ThenDefs;
     104             :   std::vector<std::pair<MachineOperand *, MachineLoop *>> LoopExitDefs;
     105             :   std::vector<std::pair<MachineOperand *, MachineLoop *>> LoopPhiDefs;
     106             : 
     107             : public:
     108             :   static char ID;
     109             : 
     110        1954 :   SIFixWWMLiveness() : MachineFunctionPass(ID) {
     111        1954 :     initializeSIFixWWMLivenessPass(*PassRegistry::getPassRegistry());
     112        1954 :   }
     113             : 
     114             :   bool runOnMachineFunction(MachineFunction &MF) override;
     115             : 
     116        1954 :   StringRef getPassName() const override { return "SI Fix WWM Liveness"; }
     117             : 
     118        1954 :   void getAnalysisUsage(AnalysisUsage &AU) const override {
     119        1954 :     AU.addRequiredID(MachineDominatorsID);
     120        1954 :     AU.addRequiredID(MachineLoopInfoID);
     121             :     // Should preserve the same set that TwoAddressInstructions does.
     122             :     AU.addPreserved<SlotIndexes>();
     123             :     AU.addPreserved<LiveIntervals>();
     124        1954 :     AU.addPreservedID(LiveVariablesID);
     125             :     AU.addPreservedID(MachineLoopInfoID);
     126             :     AU.addPreservedID(MachineDominatorsID);
     127        1954 :     AU.setPreservesCFG();
     128        1954 :     MachineFunctionPass::getAnalysisUsage(AU);
     129        1954 :   }
     130             : 
     131             : private:
     132             :   void processDef(MachineOperand &DefOpnd);
     133             :   bool processThenDef(MachineOperand *DefOpnd);
     134             :   bool processLoopExitDef(MachineOperand *DefOpnd, MachineLoop *Loop);
     135             :   bool processLoopPhiDef(MachineOperand *DefOpnd, MachineLoop *Loop);
     136             : };
     137             : 
     138             : } // End anonymous namespace.
     139             : 
     140       85105 : INITIALIZE_PASS_BEGIN(SIFixWWMLiveness, DEBUG_TYPE,
     141             :                 "SI fix WWM liveness", false, false)
     142       85105 : INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
     143       85105 : INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
     144      200978 : INITIALIZE_PASS_END(SIFixWWMLiveness, DEBUG_TYPE,
     145             :                 "SI fix WWM liveness", false, false)
     146             : 
     147             : char SIFixWWMLiveness::ID = 0;
     148             : 
     149             : char &llvm::SIFixWWMLivenessID = SIFixWWMLiveness::ID;
     150             : 
     151           0 : FunctionPass *llvm::createSIFixWWMLivenessPass() {
     152           0 :   return new SIFixWWMLiveness();
     153             : }
     154             : 
     155       19731 : bool SIFixWWMLiveness::runOnMachineFunction(MachineFunction &MF) {
     156             :   LLVM_DEBUG(dbgs() << "SIFixWWMLiveness: function " << MF.getName() << "\n");
     157             :   bool Modified = false;
     158             : 
     159             :   // This doesn't actually need LiveIntervals, but we can preserve them.
     160       19731 :   LIS = getAnalysisIfAvailable<LiveIntervals>();
     161             : 
     162       19731 :   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
     163             : 
     164       19731 :   TII = ST.getInstrInfo();
     165       19731 :   TRI = &TII->getRegisterInfo();
     166       19731 :   MRI = &MF.getRegInfo();
     167             : 
     168       19731 :   DomTree = &getAnalysis<MachineDominatorTree>();
     169       19731 :   LoopInfo = &getAnalysis<MachineLoopInfo>();
     170             : 
     171             :   // Scan the function to find the WWM sections and the candidate registers for
     172             :   // having liveness modified.
     173       42093 :   for (MachineBasicBlock &MBB : MF) {
     174      451474 :     for (MachineInstr &MI : MBB) {
     175      858224 :       if (MI.getOpcode() == AMDGPU::EXIT_WWM)
     176          48 :         WWMs.push_back(&MI);
     177             :       else {
     178      811756 :         for (MachineOperand &DefOpnd : MI.defs()) {
     179      382692 :           if (DefOpnd.isReg()) {
     180      382692 :             unsigned Reg = DefOpnd.getReg();
     181      382692 :             if (TRI->isVGPR(*MRI, Reg))
     182      198573 :               processDef(DefOpnd);
     183             :           }
     184             :         }
     185             :       }
     186             :     }
     187             :   }
     188       19731 :   if (!WWMs.empty()) {
     189             :     // Synthesize liveness over WWM sections as required.
     190          69 :     for (auto ThenDef : ThenDefs)
     191          29 :       Modified |= processThenDef(ThenDef);
     192          43 :     for (auto LoopExitDef : LoopExitDefs)
     193           3 :       Modified |= processLoopExitDef(LoopExitDef.first, LoopExitDef.second);
     194          43 :     for (auto LoopPhiDef : LoopPhiDefs)
     195           3 :       Modified |= processLoopPhiDef(LoopPhiDef.first, LoopPhiDef.second);
     196             :   }
     197             : 
     198             :   WWMs.clear();
     199             :   ThenDefs.clear();
     200             :   LoopExitDefs.clear();
     201             :   LoopPhiDefs.clear();
     202             : 
     203       19731 :   return Modified;
     204             : }
     205             : 
     206             : // During the function scan, process an operand that defines a VGPR.
     207             : // This categorizes the register and puts it in the appropriate list for later
     208             : // use when processing a WWM section.
     209      198573 : void SIFixWWMLiveness::processDef(MachineOperand &DefOpnd) {
     210      198573 :   unsigned Reg = DefOpnd.getReg();
     211             :   // Get all the defining instructions. For convenience, make Defs[0] the def
     212             :   // we are on now.
     213             :   SmallVector<const MachineInstr *, 4> Defs;
     214      198573 :   Defs.push_back(DefOpnd.getParent());
     215      601571 :   for (auto &MI : MRI->def_instructions(Reg)) {
     216      204425 :     if (&MI != DefOpnd.getParent())
     217        5852 :       Defs.push_back(&MI);
     218             :   }
     219             :   // Check whether this def dominates all the others. If not, ignore this def.
     220             :   // Either it is going to be processed when the scan encounters its other def
     221             :   // that dominates all defs, or there is no def that dominates all others.
     222             :   // The latter case is an eliminated phi from an if..else..endif or similar,
     223             :   // which must be for uniform control flow so can be ignored.
     224             :   // Because this pass runs shortly after PHIElimination, we assume that any
     225             :   // multi-def register is a lowered phi, and thus has each def in a separate
     226             :   // basic block.
     227      400026 :   for (unsigned I = 1; I != Defs.size(); ++I) {
     228       17469 :     if (!DomTree->dominates(Defs[0]->getParent(), Defs[I]->getParent()))
     229             :       return;
     230             :   }
     231             :   // Check for the case of an if..endif lowered phi: It has two defs, one
     232             :   // dominates the other, and there is a single use in a successor of the
     233             :   // dominant def.
     234             :   // Later we will spot any WWM code inside
     235             :   // the "then" clause and turn the second def into a partial def so its
     236             :   // liveness goes through the WWM code in the "then" clause.
     237      195630 :   if (Defs.size() == 2) {
     238        2834 :     auto DomDefBlock = Defs[0]->getParent();
     239        2834 :     if (DomDefBlock->succ_size() == 2 && MRI->hasOneUse(Reg)) {
     240        1581 :       auto UseBlock = MRI->use_begin(Reg)->getParent()->getParent();
     241        3144 :       for (auto Succ : DomDefBlock->successors()) {
     242        3144 :         if (Succ == UseBlock) {
     243             :           LLVM_DEBUG(dbgs() << printReg(Reg, TRI) << " is a then phi reg\n");
     244        1581 :           ThenDefs.push_back(&DefOpnd);
     245             :           return;
     246             :         }
     247             :       }
     248             :     }
     249             :   }
     250             :   // Check for the case of a non-lowered-phi register (single def) that exits
     251             :   // a loop, that is, it has a use that is outside a loop that the def is
     252             :   // inside. We find the outermost loop that the def is inside but a use is
     253             :   // outside. Later we will spot any WWM code inside that loop and then make
     254             :   // the def a partial def so its liveness goes round the loop and through the
     255             :   // WWM code.
     256      194049 :   if (Defs.size() == 1) {
     257      385550 :     auto Loop = LoopInfo->getLoopFor(Defs[0]->getParent());
     258        4317 :     if (!Loop)
     259      188458 :       return;
     260             :     bool IsLoopExit = false;
     261       10074 :     for (auto &Use : MRI->use_instructions(Reg)) {
     262        5757 :       auto UseBlock = Use.getParent();
     263        5757 :       if (Loop->contains(UseBlock))
     264             :         continue;
     265             :       IsLoopExit = true;
     266        1166 :       while (auto Parent = Loop->getParentLoop()) {
     267          31 :         if (Parent->contains(UseBlock))
     268             :           break;
     269             :         Loop = Parent;
     270             :       }
     271             :     }
     272        4317 :     if (!IsLoopExit)
     273             :       return;
     274             :     LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
     275             :         << " is a loop exit reg with loop header at "
     276             :         << "bb." << Loop->getHeader()->getNumber() << "\n");
     277        1128 :     LoopExitDefs.push_back(std::pair<MachineOperand *, MachineLoop *>(
     278             :             &DefOpnd, Loop));
     279        1128 :     return;
     280             :   }
     281             :   // Check for the case of a lowered single-preheader-loop phi, that is, a
     282             :   // multi-def register where the dominating def is in the loop pre-header and
     283             :   // all other defs are in backedges. Later we will spot any WWM code inside
     284             :   // that loop and then make the backedge defs partial defs so the liveness
     285             :   // goes through the WWM code.
     286             :   // Note that we are ignoring multi-preheader loops on the basis that the
     287             :   // structurizer does not allow that for non-uniform loops.
     288             :   // There must be a single use in the loop header.
     289        1274 :   if (!MRI->hasOneUse(Reg))
     290             :     return;
     291        1216 :   auto UseBlock = MRI->use_begin(Reg)->getParent()->getParent();
     292        1216 :   auto Loop = LoopInfo->getLoopFor(UseBlock);
     293        1213 :   if (!Loop || Loop->getHeader() != UseBlock
     294        2426 :       || Loop->contains(Defs[0]->getParent())) {
     295             :     LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
     296             :         << " is multi-def but single use not in loop header\n");
     297           3 :     return;
     298             :   }
     299        2429 :   for (unsigned I = 1; I != Defs.size(); ++I) {
     300        1216 :     if (!Loop->contains(Defs[I]->getParent()))
     301             :       return;
     302             :   }
     303             :   LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
     304             :       << " is a loop phi reg with loop header at "
     305             :       << "bb." << Loop->getHeader()->getNumber() << "\n");
     306        1213 :   LoopPhiDefs.push_back(
     307        1213 :       std::pair<MachineOperand *, MachineLoop *>(&DefOpnd, Loop));
     308             : }
     309             : 
     310             : // Process a then phi def: It has two defs, one dominates the other, and there
     311             : // is a single use in a successor of the dominant def. Here we spot any WWM
     312             : // code inside the "then" clause and turn the second def into a partial def so
     313             : // its liveness goes through the WWM code in the "then" clause.
     314          29 : bool SIFixWWMLiveness::processThenDef(MachineOperand *DefOpnd) {
     315             :   LLVM_DEBUG(dbgs() << "Processing then def: " << *DefOpnd->getParent());
     316          58 :   if (DefOpnd->getParent()->getOpcode() == TargetOpcode::IMPLICIT_DEF) {
     317             :     // Ignore if dominating def is undef.
     318             :     LLVM_DEBUG(dbgs() << "  ignoring as dominating def is undef\n");
     319             :     return false;
     320             :   }
     321           9 :   unsigned Reg = DefOpnd->getReg();
     322             :   // Get the use block, which is the endif block.
     323           9 :   auto UseBlock = MRI->use_instr_begin(Reg)->getParent();
     324             :   // Check whether there is WWM code inside the then branch. The WWM code must
     325             :   // be dominated by the if but not dominated by the endif.
     326             :   bool ContainsWWM = false;
     327           9 :   for (auto WWM : WWMs) {
     328           9 :     if (DomTree->dominates(DefOpnd->getParent()->getParent(), WWM->getParent())
     329          18 :         && !DomTree->dominates(UseBlock, WWM->getParent())) {
     330             :       LLVM_DEBUG(dbgs() << "  contains WWM: " << *WWM);
     331             :       ContainsWWM = true;
     332             :       break;
     333             :     }
     334             :   }
     335           9 :   if (!ContainsWWM)
     336             :     return false;
     337             :   // Get the other def.
     338             :   MachineInstr *OtherDef = nullptr;
     339          36 :   for (auto &MI : MRI->def_instructions(Reg)) {
     340          18 :     if (&MI != DefOpnd->getParent())
     341             :       OtherDef = &MI;
     342             :   }
     343             :   // Make it a partial def.
     344           9 :   OtherDef->addOperand(MachineOperand::CreateReg(Reg, false, /*isImp=*/true));
     345             :   LLVM_DEBUG(dbgs() << *OtherDef);
     346           9 :   return true;
     347             : }
     348             : 
     349             : // Process a loop exit def, that is, a register with a single use in a loop
     350             : // that has a use outside the loop.  Here we spot any WWM code inside that loop
     351             : // and then make the def a partial def so its liveness goes round the loop and
     352             : // through the WWM code.
     353           3 : bool SIFixWWMLiveness::processLoopExitDef(MachineOperand *DefOpnd,
     354             :       MachineLoop *Loop) {
     355             :   LLVM_DEBUG(dbgs() << "Processing loop exit def: " << *DefOpnd->getParent());
     356             :   // Check whether there is WWM code inside the loop.
     357             :   bool ContainsWWM = false;
     358           6 :   for (auto WWM : WWMs) {
     359           6 :     if (Loop->contains(WWM->getParent())) {
     360             :       LLVM_DEBUG(dbgs() << "  contains WWM: " << *WWM);
     361             :       ContainsWWM = true;
     362             :       break;
     363             :     }
     364             :   }
     365           3 :   if (!ContainsWWM)
     366             :     return false;
     367           3 :   unsigned Reg = DefOpnd->getReg();
     368             :   // Add a new implicit_def in loop preheader(s).
     369           9 :   for (auto Pred : Loop->getHeader()->predecessors()) {
     370           6 :     if (!Loop->contains(Pred)) {
     371           3 :       auto ImplicitDef = BuildMI(*Pred, Pred->getFirstTerminator(), DebugLoc(),
     372           6 :           TII->get(TargetOpcode::IMPLICIT_DEF), Reg);
     373             :       LLVM_DEBUG(dbgs() << *ImplicitDef);
     374             :       (void)ImplicitDef;
     375             :     }
     376             :   }
     377             :   // Make the original def partial.
     378           6 :   DefOpnd->getParent()->addOperand(MachineOperand::CreateReg(
     379             :           Reg, false, /*isImp=*/true));
     380             :   LLVM_DEBUG(dbgs() << *DefOpnd->getParent());
     381           3 :   return true;
     382             : }
     383             : 
     384             : // Process a loop phi def, that is, a multi-def register where the dominating
     385             : // def is in the loop pre-header and all other defs are in backedges. Here we
     386             : // spot any WWM code inside that loop and then make the backedge defs partial
     387             : // defs so the liveness goes through the WWM code.
     388           3 : bool SIFixWWMLiveness::processLoopPhiDef(MachineOperand *DefOpnd,
     389             :       MachineLoop *Loop) {
     390             :   LLVM_DEBUG(dbgs() << "Processing loop phi def: " << *DefOpnd->getParent());
     391             :   // Check whether there is WWM code inside the loop.
     392             :   bool ContainsWWM = false;
     393           6 :   for (auto WWM : WWMs) {
     394           6 :     if (Loop->contains(WWM->getParent())) {
     395             :       LLVM_DEBUG(dbgs() << "  contains WWM: " << *WWM);
     396             :       ContainsWWM = true;
     397             :       break;
     398             :     }
     399             :   }
     400           3 :   if (!ContainsWWM)
     401             :     return false;
     402           3 :   unsigned Reg = DefOpnd->getReg();
     403             :   // Remove kill mark from uses.
     404           9 :   for (auto &Use : MRI->use_operands(Reg))
     405             :     Use.setIsKill(false);
     406             :   // Make all defs except the dominating one partial defs.
     407             :   SmallVector<MachineInstr *, 4> Defs;
     408          12 :   for (auto &Def : MRI->def_instructions(Reg))
     409           6 :     Defs.push_back(&Def);
     410           9 :   for (auto Def : Defs) {
     411           6 :     if (DefOpnd->getParent() == Def)
     412             :       continue;
     413           3 :     Def->addOperand(MachineOperand::CreateReg(Reg, false, /*isImp=*/true));
     414             :     LLVM_DEBUG(dbgs() << *Def);
     415             :   }
     416             :   return true;
     417             : }
     418             : 

Generated by: LCOV version 1.13