LCOV - code coverage report
Current view: top level - lib/Target/AMDGPU - SIFixSGPRCopies.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 245 267 91.8 %
Date: 2017-09-14 15:23:50 Functions: 25 27 92.6 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===- SIFixSGPRCopies.cpp - Remove potential VGPR => SGPR copies ---------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : /// \file
      11             : /// Copies from VGPR to SGPR registers are illegal and the register coalescer
      12             : /// will sometimes generate these illegal copies in situations like this:
      13             : ///
      14             : ///  Register Class <vsrc> is the union of <vgpr> and <sgpr>
      15             : ///
      16             : /// BB0:
      17             : ///   %vreg0 <sgpr> = SCALAR_INST
      18             : ///   %vreg1 <vsrc> = COPY %vreg0 <sgpr>
      19             : ///    ...
      20             : ///    BRANCH %cond BB1, BB2
      21             : ///  BB1:
      22             : ///    %vreg2 <vgpr> = VECTOR_INST
      23             : ///    %vreg3 <vsrc> = COPY %vreg2 <vgpr>
      24             : ///  BB2:
      25             : ///    %vreg4 <vsrc> = PHI %vreg1 <vsrc>, <BB#0>, %vreg3 <vrsc>, <BB#1>
      26             : ///    %vreg5 <vgpr> = VECTOR_INST %vreg4 <vsrc>
      27             : ///
      28             : ///
      29             : /// The coalescer will begin at BB0 and eliminate its copy, then the resulting
      30             : /// code will look like this:
      31             : ///
      32             : /// BB0:
      33             : ///   %vreg0 <sgpr> = SCALAR_INST
      34             : ///    ...
      35             : ///    BRANCH %cond BB1, BB2
      36             : /// BB1:
      37             : ///   %vreg2 <vgpr> = VECTOR_INST
      38             : ///   %vreg3 <vsrc> = COPY %vreg2 <vgpr>
      39             : /// BB2:
      40             : ///   %vreg4 <sgpr> = PHI %vreg0 <sgpr>, <BB#0>, %vreg3 <vsrc>, <BB#1>
      41             : ///   %vreg5 <vgpr> = VECTOR_INST %vreg4 <sgpr>
      42             : ///
      43             : /// Now that the result of the PHI instruction is an SGPR, the register
      44             : /// allocator is now forced to constrain the register class of %vreg3 to
      45             : /// <sgpr> so we end up with final code like this:
      46             : ///
      47             : /// BB0:
      48             : ///   %vreg0 <sgpr> = SCALAR_INST
      49             : ///    ...
      50             : ///    BRANCH %cond BB1, BB2
      51             : /// BB1:
      52             : ///   %vreg2 <vgpr> = VECTOR_INST
      53             : ///   %vreg3 <sgpr> = COPY %vreg2 <vgpr>
      54             : /// BB2:
      55             : ///   %vreg4 <sgpr> = PHI %vreg0 <sgpr>, <BB#0>, %vreg3 <sgpr>, <BB#1>
      56             : ///   %vreg5 <vgpr> = VECTOR_INST %vreg4 <sgpr>
      57             : ///
      58             : /// Now this code contains an illegal copy from a VGPR to an SGPR.
      59             : ///
      60             : /// In order to avoid this problem, this pass searches for PHI instructions
      61             : /// which define a <vsrc> register and constrains its definition class to
      62             : /// <vgpr> if the user of the PHI's definition register is a vector instruction.
      63             : /// If the PHI's definition class is constrained to <vgpr> then the coalescer
      64             : /// will be unable to perform the COPY removal from the above example  which
      65             : /// ultimately led to the creation of an illegal COPY.
      66             : //===----------------------------------------------------------------------===//
      67             : 
      68             : #include "AMDGPU.h"
      69             : #include "AMDGPUSubtarget.h"
      70             : #include "SIInstrInfo.h"
      71             : #include "SIRegisterInfo.h"
      72             : #include "llvm/ADT/DenseSet.h"
      73             : #include "llvm/ADT/STLExtras.h"
      74             : #include "llvm/ADT/SmallSet.h"
      75             : #include "llvm/ADT/SmallVector.h"
      76             : #include "llvm/CodeGen/MachineBasicBlock.h"
      77             : #include "llvm/CodeGen/MachineDominators.h"
      78             : #include "llvm/CodeGen/MachineFunction.h"
      79             : #include "llvm/CodeGen/MachineFunctionPass.h"
      80             : #include "llvm/CodeGen/MachineInstr.h"
      81             : #include "llvm/CodeGen/MachineInstrBuilder.h"
      82             : #include "llvm/CodeGen/MachineOperand.h"
      83             : #include "llvm/CodeGen/MachineRegisterInfo.h"
      84             : #include "llvm/Pass.h"
      85             : #include "llvm/Support/CodeGen.h"
      86             : #include "llvm/Support/CommandLine.h"
      87             : #include "llvm/Support/Debug.h"
      88             : #include "llvm/Support/raw_ostream.h"
      89             : #include "llvm/Target/TargetMachine.h"
      90             : #include "llvm/Target/TargetRegisterInfo.h"
      91             : #include <cassert>
      92             : #include <cstdint>
      93             : #include <iterator>
      94             : #include <list>
      95             : #include <map>
      96             : #include <tuple>
      97             : #include <utility>
      98             : 
      99             : using namespace llvm;
     100             : 
     101             : #define DEBUG_TYPE "si-fix-sgpr-copies"
     102             : 
     103       72306 : static cl::opt<bool> EnableM0Merge(
     104             :   "amdgpu-enable-merge-m0",
     105      216918 :   cl::desc("Merge and hoist M0 initializations"),
     106      289224 :   cl::init(false));
     107             : 
     108             : namespace {
     109             : 
     110        1458 : class SIFixSGPRCopies : public MachineFunctionPass {
     111             :   MachineDominatorTree *MDT;
     112             : 
     113             : public:
     114             :   static char ID;
     115             : 
     116        1466 :   SIFixSGPRCopies() : MachineFunctionPass(ID) {}
     117             : 
     118             :   bool runOnMachineFunction(MachineFunction &MF) override;
     119             : 
     120           3 :   StringRef getPassName() const override { return "SI Fix SGPR copies"; }
     121             : 
     122        1460 :   void getAnalysisUsage(AnalysisUsage &AU) const override {
     123        1460 :     AU.addRequired<MachineDominatorTree>();
     124        1460 :     AU.addPreserved<MachineDominatorTree>();
     125        1460 :     AU.setPreservesCFG();
     126        1460 :     MachineFunctionPass::getAnalysisUsage(AU);
     127        1460 :   }
     128             : };
     129             : 
     130             : } // end anonymous namespace
     131             : 
     132       53042 : INITIALIZE_PASS_BEGIN(SIFixSGPRCopies, DEBUG_TYPE,
     133             :                      "SI Fix SGPR copies", false, false)
     134       53042 : INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
     135      312538 : INITIALIZE_PASS_END(SIFixSGPRCopies, DEBUG_TYPE,
     136             :                      "SI Fix SGPR copies", false, false)
     137             : 
     138             : char SIFixSGPRCopies::ID = 0;
     139             : 
     140             : char &llvm::SIFixSGPRCopiesID = SIFixSGPRCopies::ID;
     141             : 
     142           0 : FunctionPass *llvm::createSIFixSGPRCopiesPass() {
     143           0 :   return new SIFixSGPRCopies();
     144             : }
     145             : 
     146       26007 : static bool hasVGPROperands(const MachineInstr &MI, const SIRegisterInfo *TRI) {
     147       26007 :   const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
     148      200070 :   for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
     149      633524 :     if (!MI.getOperand(i).isReg() ||
     150      205720 :         !TargetRegisterInfo::isVirtualRegister(MI.getOperand(i).getReg()))
     151       74028 :       continue;
     152             : 
     153      205720 :     if (TRI->hasVGPRs(MRI.getRegClass(MI.getOperand(i).getReg())))
     154             :       return true;
     155             :   }
     156             :   return false;
     157             : }
     158             : 
     159             : static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
     160      199642 : getCopyRegClasses(const MachineInstr &Copy,
     161             :                   const SIRegisterInfo &TRI,
     162             :                   const MachineRegisterInfo &MRI) {
     163      199642 :   unsigned DstReg = Copy.getOperand(0).getReg();
     164      199642 :   unsigned SrcReg = Copy.getOperand(1).getReg();
     165             : 
     166             :   const TargetRegisterClass *SrcRC =
     167      377475 :     TargetRegisterInfo::isVirtualRegister(SrcReg) ?
     168             :     MRI.getRegClass(SrcReg) :
     169      199642 :     TRI.getPhysRegClass(SrcReg);
     170             : 
     171             :   // We don't really care about the subregister here.
     172             :   // SrcRC = TRI.getSubRegClass(SrcRC, Copy.getOperand(1).getSubReg());
     173             : 
     174             :   const TargetRegisterClass *DstRC =
     175      399284 :     TargetRegisterInfo::isVirtualRegister(DstReg) ?
     176             :     MRI.getRegClass(DstReg) :
     177      199642 :     TRI.getPhysRegClass(DstReg);
     178             : 
     179      399284 :   return std::make_pair(SrcRC, DstRC);
     180             : }
     181             : 
     182      194830 : static bool isVGPRToSGPRCopy(const TargetRegisterClass *SrcRC,
     183             :                              const TargetRegisterClass *DstRC,
     184             :                              const SIRegisterInfo &TRI) {
     185      194830 :   return TRI.isSGPRClass(DstRC) && TRI.hasVGPRs(SrcRC);
     186             : }
     187             : 
     188      171072 : static bool isSGPRToVGPRCopy(const TargetRegisterClass *SrcRC,
     189             :                              const TargetRegisterClass *DstRC,
     190             :                              const SIRegisterInfo &TRI) {
     191      171072 :   return TRI.isSGPRClass(SrcRC) && TRI.hasVGPRs(DstRC);
     192             : }
     193             : 
     194       32409 : static bool tryChangeVGPRtoSGPRinCopy(MachineInstr &MI,
     195             :                                       const SIRegisterInfo *TRI,
     196             :                                       const SIInstrInfo *TII) {
     197       32409 :   MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
     198       64818 :   auto &Src = MI.getOperand(1);
     199       32409 :   unsigned DstReg = MI.getOperand(0).getReg();
     200       32409 :   unsigned SrcReg = Src.getReg();
     201       64818 :   if (!TargetRegisterInfo::isVirtualRegister(SrcReg) ||
     202       32409 :       !TargetRegisterInfo::isVirtualRegister(DstReg))
     203             :     return false;
     204             : 
     205       98950 :   for (const auto &MO : MRI.reg_nodbg_operands(DstReg)) {
     206       64800 :     const auto *UseMI = MO.getParent();
     207       64800 :     if (UseMI == &MI)
     208       32409 :       continue;
     209       96769 :     if (MO.isDef() || UseMI->getParent() != MI.getParent() ||
     210       60813 :         UseMI->getOpcode() <= TargetOpcode::GENERIC_OP_END ||
     211       28422 :         !TII->isOperandLegal(*UseMI, UseMI->getOperandNo(&MO), &Src))
     212             :       return false;
     213             :   }
     214             :   // Change VGPR to SGPR destination.
     215        1741 :   MRI.setRegClass(DstReg, TRI->getEquivalentSGPRClass(MRI.getRegClass(DstReg)));
     216        1741 :   return true;
     217             : }
     218             : 
     219             : // Distribute an SGPR->VGPR copy of a REG_SEQUENCE into a VGPR REG_SEQUENCE.
     220             : //
     221             : // SGPRx = ...
     222             : // SGPRy = REG_SEQUENCE SGPRx, sub0 ...
     223             : // VGPRz = COPY SGPRy
     224             : //
     225             : // ==>
     226             : //
     227             : // VGPRx = COPY SGPRx
     228             : // VGPRz = REG_SEQUENCE VGPRx, sub0
     229             : //
     230             : // This exposes immediate folding opportunities when materializing 64-bit
     231             : // immediates.
     232       43891 : static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI,
     233             :                                         const SIRegisterInfo *TRI,
     234             :                                         const SIInstrInfo *TII,
     235             :                                         MachineRegisterInfo &MRI) {
     236             :   assert(MI.isRegSequence());
     237             : 
     238       43891 :   unsigned DstReg = MI.getOperand(0).getReg();
     239       87782 :   if (!TRI->isSGPRClass(MRI.getRegClass(DstReg)))
     240             :     return false;
     241             : 
     242       23182 :   if (!MRI.hasOneUse(DstReg))
     243             :     return false;
     244             : 
     245       35866 :   MachineInstr &CopyUse = *MRI.use_instr_begin(DstReg);
     246       17933 :   if (!CopyUse.isCopy())
     247             :     return false;
     248             : 
     249             :   // It is illegal to have vreg inputs to a physreg defining reg_sequence.
     250        9656 :   if (TargetRegisterInfo::isPhysicalRegister(CopyUse.getOperand(0).getReg()))
     251             :     return false;
     252             : 
     253             :   const TargetRegisterClass *SrcRC, *DstRC;
     254       14436 :   std::tie(SrcRC, DstRC) = getCopyRegClasses(CopyUse, *TRI, MRI);
     255             : 
     256        4812 :   if (!isSGPRToVGPRCopy(SrcRC, DstRC, *TRI))
     257             :     return false;
     258             : 
     259        4412 :   if (tryChangeVGPRtoSGPRinCopy(CopyUse, TRI, TII))
     260             :     return true;
     261             : 
     262             :   // TODO: Could have multiple extracts?
     263        8730 :   unsigned SubReg = CopyUse.getOperand(1).getSubReg();
     264        4365 :   if (SubReg != AMDGPU::NoSubRegister)
     265             :     return false;
     266             : 
     267        4365 :   MRI.setRegClass(DstReg, DstRC);
     268             : 
     269             :   // SGPRx = ...
     270             :   // SGPRy = REG_SEQUENCE SGPRx, sub0 ...
     271             :   // VGPRz = COPY SGPRy
     272             : 
     273             :   // =>
     274             :   // VGPRx = COPY SGPRx
     275             :   // VGPRz = REG_SEQUENCE VGPRx, sub0
     276             : 
     277        4365 :   MI.getOperand(0).setReg(CopyUse.getOperand(0).getReg());
     278             : 
     279       16717 :   for (unsigned I = 1, N = MI.getNumOperands(); I != N; I += 2) {
     280       24704 :     unsigned SrcReg = MI.getOperand(I).getReg();
     281       24704 :     unsigned SrcSubReg = MI.getOperand(I).getSubReg();
     282             : 
     283       12352 :     const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg);
     284             :     assert(TRI->isSGPRClass(SrcRC) &&
     285             :            "Expected SGPR REG_SEQUENCE to only have SGPR inputs");
     286             : 
     287       12352 :     SrcRC = TRI->getSubRegClass(SrcRC, SrcSubReg);
     288       12352 :     const TargetRegisterClass *NewSrcRC = TRI->getEquivalentVGPRClass(SrcRC);
     289             : 
     290       12352 :     unsigned TmpReg = MRI.createVirtualRegister(NewSrcRC);
     291             : 
     292       24704 :     BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(), TII->get(AMDGPU::COPY),
     293       37056 :             TmpReg)
     294       37056 :         .add(MI.getOperand(I));
     295             : 
     296       24704 :     MI.getOperand(I).setReg(TmpReg);
     297             :   }
     298             : 
     299        4365 :   CopyUse.eraseFromParent();
     300        4365 :   return true;
     301             : }
     302             : 
     303         332 : static bool phiHasVGPROperands(const MachineInstr &PHI,
     304             :                                const MachineRegisterInfo &MRI,
     305             :                                const SIRegisterInfo *TRI,
     306             :                                const SIInstrInfo *TII) {
     307        1013 :   for (unsigned i = 1; i < PHI.getNumOperands(); i += 2) {
     308        1370 :     unsigned Reg = PHI.getOperand(i).getReg();
     309         685 :     if (TRI->hasVGPRs(MRI.getRegClass(Reg)))
     310             :       return true;
     311             :   }
     312             :   return false;
     313             : }
     314             : 
     315         245 : static bool phiHasBreakDef(const MachineInstr &PHI,
     316             :                            const MachineRegisterInfo &MRI,
     317             :                            SmallSet<unsigned, 8> &Visited) {
     318         602 :   for (unsigned i = 1; i < PHI.getNumOperands(); i += 2) {
     319         924 :     unsigned Reg = PHI.getOperand(i).getReg();
     320         462 :     if (Visited.count(Reg))
     321          16 :       continue;
     322             : 
     323         446 :     Visited.insert(Reg);
     324             : 
     325         446 :     MachineInstr *DefInstr = MRI.getVRegDef(Reg);
     326         892 :     switch (DefInstr->getOpcode()) {
     327             :     default:
     328             :       break;
     329             :     case AMDGPU::SI_BREAK:
     330             :     case AMDGPU::SI_IF_BREAK:
     331             :     case AMDGPU::SI_ELSE_BREAK:
     332         105 :       return true;
     333          60 :     case AMDGPU::PHI:
     334          60 :       if (phiHasBreakDef(*DefInstr, MRI, Visited))
     335             :         return true;
     336             :     }
     337             :   }
     338             :   return false;
     339             : }
     340             : 
     341         638 : static bool hasTerminatorThatModifiesExec(const MachineBasicBlock &MBB,
     342             :                                           const TargetRegisterInfo &TRI) {
     343         638 :   for (MachineBasicBlock::const_iterator I = MBB.getFirstTerminator(),
     344        1922 :        E = MBB.end(); I != E; ++I) {
     345        1640 :     if (I->modifiesRegister(AMDGPU::EXEC, &TRI))
     346         174 :       return true;
     347             :   }
     348         464 :   return false;
     349             : }
     350             : 
     351       28413 : static bool isSafeToFoldImmIntoCopy(const MachineInstr *Copy,
     352             :                                     const MachineInstr *MoveImm,
     353             :                                     const SIInstrInfo *TII,
     354             :                                     unsigned &SMovOp,
     355             :                                     int64_t &Imm) {
     356       56826 :   if (Copy->getOpcode() != AMDGPU::COPY)
     357             :     return false;
     358             : 
     359       28405 :   if (!MoveImm->isMoveImmediate())
     360             :     return false;
     361             : 
     362         366 :   const MachineOperand *ImmOp =
     363             :       TII->getNamedOperand(*MoveImm, AMDGPU::OpName::src0);
     364         366 :   if (!ImmOp->isImm())
     365             :     return false;
     366             : 
     367             :   // FIXME: Handle copies with sub-regs.
     368           0 :   if (Copy->getOperand(0).getSubReg())
     369             :     return false;
     370             : 
     371           0 :   switch (MoveImm->getOpcode()) {
     372             :   default:
     373             :     return false;
     374           0 :   case AMDGPU::V_MOV_B32_e32:
     375           0 :     SMovOp = AMDGPU::S_MOV_B32;
     376           0 :     break;
     377           0 :   case AMDGPU::V_MOV_B64_PSEUDO:
     378           0 :     SMovOp = AMDGPU::S_MOV_B64;
     379           0 :     break;
     380             :   }
     381           0 :   Imm = ImmOp->getImm();
     382           0 :   return true;
     383             : }
     384             : 
     385             : template <class UnaryPredicate>
     386         776 : bool searchPredecessors(const MachineBasicBlock *MBB,
     387             :                         const MachineBasicBlock *CutOff,
     388             :                         UnaryPredicate Predicate) {
     389         776 :   if (MBB == CutOff)
     390             :     return false;
     391             : 
     392         648 :   DenseSet<const MachineBasicBlock *> Visited;
     393        1944 :   SmallVector<MachineBasicBlock *, 4> Worklist(MBB->pred_begin(),
     394             :                                                MBB->pred_end());
     395             : 
     396        1362 :   while (!Worklist.empty()) {
     397         888 :     MachineBasicBlock *MBB = Worklist.pop_back_val();
     398             : 
     399        1776 :     if (!Visited.insert(MBB).second)
     400         184 :       continue;
     401         704 :     if (MBB == CutOff)
     402          56 :       continue;
     403         648 :     if (Predicate(MBB))
     404             :       return true;
     405             : 
     406         948 :     Worklist.append(MBB->pred_begin(), MBB->pred_end());
     407             :   }
     408             : 
     409             :   return false;
     410             : }
     411             : 
     412             : static bool predsHasDivergentTerminator(MachineBasicBlock *MBB,
     413             :                                         const TargetRegisterInfo *TRI) {
     414        1230 :   return searchPredecessors(MBB, nullptr, [TRI](MachineBasicBlock *MBB) {
     415        1230 :            return hasTerminatorThatModifiesExec(*MBB, *TRI); });
     416             : }
     417             : 
     418             : // Checks if there is potential path From instruction To instruction.
     419             : // If CutOff is specified and it sits in between of that path we ignore
     420             : // a higher portion of the path and report it is not reachable.
     421         264 : static bool isReachable(const MachineInstr *From,
     422             :                         const MachineInstr *To,
     423             :                         const MachineBasicBlock *CutOff,
     424             :                         MachineDominatorTree &MDT) {
     425             :   // If either From block dominates To block or instructions are in the same
     426             :   // block and From is higher.
     427         264 :   if (MDT.dominates(From, To))
     428             :     return true;
     429             : 
     430         203 :   const MachineBasicBlock *MBBFrom = From->getParent();
     431         203 :   const MachineBasicBlock *MBBTo = To->getParent();
     432         203 :   if (MBBFrom == MBBTo)
     433             :     return false;
     434             : 
     435             :   // Instructions are in different blocks, do predecessor search.
     436             :   // We should almost never get here since we do not usually produce M0 stores
     437             :   // other than -1.
     438         184 :   return searchPredecessors(MBBTo, CutOff, [MBBFrom]
     439         194 :            (const MachineBasicBlock *MBB) { return MBB == MBBFrom; });
     440             : }
     441             : 
     442             : // Hoist and merge identical SGPR initializations into a common predecessor.
     443             : // This is intended to combine M0 initializations, but can work with any
     444             : // SGPR. A VGPR cannot be processed since we cannot guarantee vector
     445             : // executioon.
     446           1 : static bool hoistAndMergeSGPRInits(unsigned Reg,
     447             :                                    const MachineRegisterInfo &MRI,
     448             :                                    MachineDominatorTree &MDT) {
     449             :   // List of inits by immediate value.
     450             :   using InitListMap = std::map<unsigned, std::list<MachineInstr *>>;
     451           2 :   InitListMap Inits;
     452             :   // List of clobbering instructions.
     453           2 :   SmallVector<MachineInstr*, 8> Clobbers;
     454           1 :   bool Changed = false;
     455             : 
     456          36 :   for (auto &MI : MRI.def_instructions(Reg)) {
     457          17 :     MachineOperand *Imm = nullptr;
     458          47 :     for (auto &MO: MI.operands()) {
     459          79 :       if ((MO.isReg() && ((MO.isDef() && MO.getReg() != Reg) || !MO.isDef())) ||
     460          77 :           (!MO.isImm() && !MO.isReg()) || (MO.isImm() && Imm)) {
     461             :         Imm = nullptr;
     462             :         break;
     463          30 :       } else if (MO.isImm())
     464          15 :         Imm = &MO;
     465             :     }
     466          17 :     if (Imm)
     467          30 :       Inits[Imm->getImm()].push_front(&MI);
     468             :     else
     469           2 :       Clobbers.push_back(&MI);
     470             :   }
     471             : 
     472           7 :   for (auto &Init : Inits) {
     473           4 :     auto &Defs = Init.second;
     474             : 
     475          12 :     for (auto I1 = Defs.begin(), E = Defs.end(); I1 != E; ) {
     476          11 :       MachineInstr *MI1 = *I1;
     477             : 
     478          30 :       for (auto I2 = std::next(I1); I2 != E; ) {
     479          20 :         MachineInstr *MI2 = *I2;
     480             : 
     481             :         // Check any possible interference
     482             :         auto intereferes = [&](MachineBasicBlock::iterator From,
     483          21 :                                MachineBasicBlock::iterator To) -> bool {
     484             : 
     485             :           assert(MDT.dominates(&*To, &*From));
     486             : 
     487         448 :           auto interferes = [&MDT, From, To](MachineInstr* &Clobber) -> bool {
     488         264 :             const MachineBasicBlock *MBBFrom = From->getParent();
     489         264 :             const MachineBasicBlock *MBBTo = To->getParent();
     490         264 :             bool MayClobberFrom = isReachable(Clobber, &*From, MBBTo, MDT);
     491         264 :             bool MayClobberTo = isReachable(Clobber, &*To, MBBTo, MDT);
     492         132 :             if (!MayClobberFrom && !MayClobberTo)
     493             :               return false;
     494          36 :             if ((MayClobberFrom && !MayClobberTo) ||
     495             :                 (!MayClobberFrom && MayClobberTo))
     496             :               return true;
     497             :             // Both can clobber, this is not an interference only if both are
     498             :             // dominated by Clobber and belong to the same block or if Clobber
     499             :             // properly dominates To, given that To >> From, so it dominates
     500             :             // both and located in a common dominator.
     501          50 :             return !((MBBFrom == MBBTo &&
     502          18 :                       MDT.dominates(Clobber, &*From) &&
     503          12 :                       MDT.dominates(Clobber, &*To)) ||
     504          38 :                      MDT.properlyDominates(Clobber->getParent(), MBBTo));
     505          21 :           };
     506             : 
     507          60 :           return (llvm::any_of(Clobbers, interferes)) ||
     508          74 :                  (llvm::any_of(Inits, [&](InitListMap::value_type &C) {
     509         122 :                     return C.first != Init.first &&
     510          48 :                            llvm::any_of(C.second, interferes);
     511          77 :                   }));
     512          20 :         };
     513             : 
     514          20 :         if (MDT.dominates(MI1, MI2)) {
     515          35 :           if (!intereferes(MI2, MI1)) {
     516             :             DEBUG(dbgs() << "Erasing from BB#" << MI2->getParent()->getNumber()
     517             :                          << " " << *MI2);
     518           3 :             MI2->eraseFromParent();
     519           9 :             Defs.erase(I2++);
     520           3 :             Changed = true;
     521           6 :             continue;
     522             :           }
     523           4 :         } else if (MDT.dominates(MI2, MI1)) {
     524           0 :           if (!intereferes(MI1, MI2)) {
     525             :             DEBUG(dbgs() << "Erasing from BB#" << MI1->getParent()->getNumber()
     526             :                          << " " << *MI1);
     527           0 :             MI1->eraseFromParent();
     528           0 :             Defs.erase(I1++);
     529           0 :             Changed = true;
     530           0 :             break;
     531             :           }
     532             :         } else {
     533           4 :           auto *MBB = MDT.findNearestCommonDominator(MI1->getParent(),
     534           4 :                                                      MI2->getParent());
     535           4 :           if (!MBB) {
     536           0 :             ++I2;
     537           0 :             continue;
     538             :           }
     539             : 
     540           4 :           MachineBasicBlock::iterator I = MBB->getFirstNonPHI();
     541           5 :           if (!intereferes(MI1, I) && !intereferes(MI2, I)) {
     542             :             DEBUG(dbgs() << "Erasing from BB#" << MI1->getParent()->getNumber()
     543             :                          << " " << *MI1 << "and moving from BB#"
     544             :                          << MI2->getParent()->getNumber() << " to BB#"
     545             :                          << I->getParent()->getNumber() << " " << *MI2);
     546           2 :             I->getParent()->splice(I, MI2->getParent(), MI2);
     547           1 :             MI1->eraseFromParent();
     548           3 :             Defs.erase(I1++);
     549           1 :             Changed = true;
     550           1 :             break;
     551             :           }
     552             :         }
     553          16 :         ++I2;
     554             :       }
     555             :       ++I1;
     556             :     }
     557             :   }
     558             : 
     559           1 :   if (Changed)
     560           1 :     MRI.clearKillFlags(Reg);
     561             : 
     562           2 :   return Changed;
     563             : }
     564             : 
     565       14818 : bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
     566       14818 :   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
     567       14818 :   MachineRegisterInfo &MRI = MF.getRegInfo();
     568       14818 :   const SIRegisterInfo *TRI = ST.getRegisterInfo();
     569       14818 :   const SIInstrInfo *TII = ST.getInstrInfo();
     570       14818 :   MDT = &getAnalysis<MachineDominatorTree>();
     571             : 
     572       29636 :   SmallVector<MachineInstr *, 16> Worklist;
     573             : 
     574       29636 :   for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
     575       31634 :                                                   BI != BE; ++BI) {
     576       16816 :     MachineBasicBlock &MBB = *BI;
     577       33632 :     for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
     578      505993 :          I != E; ++I) {
     579      489177 :       MachineInstr &MI = *I;
     580             : 
     581      489177 :       switch (MI.getOpcode()) {
     582      239612 :       default:
     583      239612 :         continue;
     584      199709 :       case AMDGPU::COPY:
     585             :       case AMDGPU::WQM:
     586             :       case AMDGPU::WWM: {
     587             :         // If the destination register is a physical register there isn't really
     588             :         // much we can do to fix this.
     589      399418 :         if (!TargetRegisterInfo::isVirtualRegister(MI.getOperand(0).getReg()))
     590        4879 :           continue;
     591             : 
     592             :         const TargetRegisterClass *SrcRC, *DstRC;
     593      584490 :         std::tie(SrcRC, DstRC) = getCopyRegClasses(MI, *TRI, MRI);
     594      194830 :         if (isVGPRToSGPRCopy(SrcRC, DstRC, *TRI)) {
     595       28570 :           unsigned SrcReg = MI.getOperand(1).getReg();
     596       28570 :           if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) {
     597         157 :             TII->moveToVALU(MI);
     598         157 :             break;
     599             :           }
     600             : 
     601       28413 :           MachineInstr *DefMI = MRI.getVRegDef(SrcReg);
     602             :           unsigned SMovOp;
     603             :           int64_t Imm;
     604             :           // If we are just copying an immediate, we can replace the copy with
     605             :           // s_mov_b32.
     606       28413 :           if (isSafeToFoldImmIntoCopy(&MI, DefMI, TII, SMovOp, Imm)) {
     607           0 :             MI.getOperand(1).ChangeToImmediate(Imm);
     608           0 :             MI.addImplicitDefUseOperands(MF);
     609           0 :             MI.setDesc(TII->get(SMovOp));
     610             :             break;
     611             :           }
     612       28413 :           TII->moveToVALU(MI);
     613      166260 :         } else if (isSGPRToVGPRCopy(SrcRC, DstRC, *TRI)) {
     614       27997 :           tryChangeVGPRtoSGPRinCopy(MI, TRI, TII);
     615             :         }
     616             : 
     617             :         break;
     618             :       }
     619        3097 :       case AMDGPU::PHI: {
     620        3097 :         unsigned Reg = MI.getOperand(0).getReg();
     621        6194 :         if (!TRI->isSGPRClass(MRI.getRegClass(Reg)))
     622             :           break;
     623             : 
     624             :         // We don't need to fix the PHI if the common dominator of the
     625             :         // two incoming blocks terminates with a uniform branch.
     626         332 :         bool HasVGPROperand = phiHasVGPROperands(MI, MRI, TRI, TII);
     627         332 :         if (MI.getNumExplicitOperands() == 5 && !HasVGPROperand) {
     628         317 :           MachineBasicBlock *MBB0 = MI.getOperand(2).getMBB();
     629         317 :           MachineBasicBlock *MBB1 = MI.getOperand(4).getMBB();
     630             : 
     631         592 :           if (!predsHasDivergentTerminator(MBB0, TRI) &&
     632         275 :               !predsHasDivergentTerminator(MBB1, TRI)) {
     633             :             DEBUG(dbgs() << "Not fixing PHI for uniform branch: " << MI << '\n');
     634             :             break;
     635             :           }
     636             :         }
     637             : 
     638             :         // If a PHI node defines an SGPR and any of its operands are VGPRs,
     639             :         // then we need to move it to the VALU.
     640             :         //
     641             :         // Also, if a PHI node defines an SGPR and has all SGPR operands
     642             :         // we must move it to the VALU, because the SGPR operands will
     643             :         // all end up being assigned the same register, which means
     644             :         // there is a potential for a conflict if different threads take
     645             :         // different control flow paths.
     646             :         //
     647             :         // For Example:
     648             :         //
     649             :         // sgpr0 = def;
     650             :         // ...
     651             :         // sgpr1 = def;
     652             :         // ...
     653             :         // sgpr2 = PHI sgpr0, sgpr1
     654             :         // use sgpr2;
     655             :         //
     656             :         // Will Become:
     657             :         //
     658             :         // sgpr2 = def;
     659             :         // ...
     660             :         // sgpr2 = def;
     661             :         // ...
     662             :         // use sgpr2
     663             :         //
     664             :         // The one exception to this rule is when one of the operands
     665             :         // is defined by a SI_BREAK, SI_IF_BREAK, or SI_ELSE_BREAK
     666             :         // instruction.  In this case, there we know the program will
     667             :         // never enter the second block (the loop) without entering
     668             :         // the first block (where the condition is computed), so there
     669             :         // is no chance for values to be over-written.
     670             : 
     671         378 :         SmallSet<unsigned, 8> Visited;
     672         189 :         if (HasVGPROperand || !phiHasBreakDef(MI, MRI, Visited)) {
     673             :           DEBUG(dbgs() << "Fixing PHI: " << MI);
     674         116 :           TII->moveToVALU(MI);
     675             :         }
     676             :         break;
     677             :       }
     678       46716 :       case AMDGPU::REG_SEQUENCE:
     679      116614 :         if (TRI->hasVGPRs(TII->getOpRegClass(MI, 0)) ||
     680       26007 :             !hasVGPROperands(MI, TRI)) {
     681       43891 :           foldVGPRCopyIntoRegSequence(MI, TRI, TII, MRI);
     682       43891 :           continue;
     683             :         }
     684             : 
     685             :         DEBUG(dbgs() << "Fixing REG_SEQUENCE: " << MI);
     686             : 
     687        2825 :         TII->moveToVALU(MI);
     688        2825 :         break;
     689          43 :       case AMDGPU::INSERT_SUBREG: {
     690             :         const TargetRegisterClass *DstRC, *Src0RC, *Src1RC;
     691          86 :         DstRC = MRI.getRegClass(MI.getOperand(0).getReg());
     692          86 :         Src0RC = MRI.getRegClass(MI.getOperand(1).getReg());
     693          86 :         Src1RC = MRI.getRegClass(MI.getOperand(2).getReg());
     694          54 :         if (TRI->isSGPRClass(DstRC) &&
     695          22 :             (TRI->hasVGPRs(Src0RC) || TRI->hasVGPRs(Src1RC))) {
     696             :           DEBUG(dbgs() << " Fixing INSERT_SUBREG: " << MI);
     697           2 :           TII->moveToVALU(MI);
     698             :         }
     699             :         break;
     700      239612 :       }
     701             :       }
     702             :     }
     703             :   }
     704             : 
     705       29460 :   if (MF.getTarget().getOptLevel() > CodeGenOpt::None && EnableM0Merge)
     706           1 :     hoistAndMergeSGPRInits(AMDGPU::M0, MRI, *MDT);
     707             : 
     708       29636 :   return true;
     709      216918 : }

Generated by: LCOV version 1.13