LCOV - code coverage report
Current view: top level - lib/Target/AMDGPU - SIFixSGPRCopies.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 215 234 91.9 %
Date: 2018-07-13 00:08:38 Functions: 25 27 92.6 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===- SIFixSGPRCopies.cpp - Remove potential VGPR => SGPR copies ---------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : /// \file
      11             : /// Copies from VGPR to SGPR registers are illegal and the register coalescer
      12             : /// will sometimes generate these illegal copies in situations like this:
      13             : ///
      14             : ///  Register Class <vsrc> is the union of <vgpr> and <sgpr>
      15             : ///
      16             : /// BB0:
      17             : ///   %0 <sgpr> = SCALAR_INST
      18             : ///   %1 <vsrc> = COPY %0 <sgpr>
      19             : ///    ...
      20             : ///    BRANCH %cond BB1, BB2
      21             : ///  BB1:
      22             : ///    %2 <vgpr> = VECTOR_INST
      23             : ///    %3 <vsrc> = COPY %2 <vgpr>
      24             : ///  BB2:
      25             : ///    %4 <vsrc> = PHI %1 <vsrc>, <%bb.0>, %3 <vrsc>, <%bb.1>
      26             : ///    %5 <vgpr> = VECTOR_INST %4 <vsrc>
      27             : ///
      28             : ///
      29             : /// The coalescer will begin at BB0 and eliminate its copy, then the resulting
      30             : /// code will look like this:
      31             : ///
      32             : /// BB0:
      33             : ///   %0 <sgpr> = SCALAR_INST
      34             : ///    ...
      35             : ///    BRANCH %cond BB1, BB2
      36             : /// BB1:
      37             : ///   %2 <vgpr> = VECTOR_INST
      38             : ///   %3 <vsrc> = COPY %2 <vgpr>
      39             : /// BB2:
      40             : ///   %4 <sgpr> = PHI %0 <sgpr>, <%bb.0>, %3 <vsrc>, <%bb.1>
      41             : ///   %5 <vgpr> = VECTOR_INST %4 <sgpr>
      42             : ///
      43             : /// Now that the result of the PHI instruction is an SGPR, the register
      44             : /// allocator is now forced to constrain the register class of %3 to
      45             : /// <sgpr> so we end up with final code like this:
      46             : ///
      47             : /// BB0:
      48             : ///   %0 <sgpr> = SCALAR_INST
      49             : ///    ...
      50             : ///    BRANCH %cond BB1, BB2
      51             : /// BB1:
      52             : ///   %2 <vgpr> = VECTOR_INST
      53             : ///   %3 <sgpr> = COPY %2 <vgpr>
      54             : /// BB2:
      55             : ///   %4 <sgpr> = PHI %0 <sgpr>, <%bb.0>, %3 <sgpr>, <%bb.1>
      56             : ///   %5 <vgpr> = VECTOR_INST %4 <sgpr>
      57             : ///
      58             : /// Now this code contains an illegal copy from a VGPR to an SGPR.
      59             : ///
      60             : /// In order to avoid this problem, this pass searches for PHI instructions
      61             : /// which define a <vsrc> register and constrains its definition class to
      62             : /// <vgpr> if the user of the PHI's definition register is a vector instruction.
      63             : /// If the PHI's definition class is constrained to <vgpr> then the coalescer
      64             : /// will be unable to perform the COPY removal from the above example  which
      65             : /// ultimately led to the creation of an illegal COPY.
      66             : //===----------------------------------------------------------------------===//
      67             : 
      68             : #include "AMDGPU.h"
      69             : #include "AMDGPUSubtarget.h"
      70             : #include "SIInstrInfo.h"
      71             : #include "SIRegisterInfo.h"
      72             : #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
      73             : #include "llvm/ADT/DenseSet.h"
      74             : #include "llvm/ADT/STLExtras.h"
      75             : #include "llvm/ADT/SmallSet.h"
      76             : #include "llvm/ADT/SmallVector.h"
      77             : #include "llvm/CodeGen/MachineBasicBlock.h"
      78             : #include "llvm/CodeGen/MachineDominators.h"
      79             : #include "llvm/CodeGen/MachineFunction.h"
      80             : #include "llvm/CodeGen/MachineFunctionPass.h"
      81             : #include "llvm/CodeGen/MachineInstr.h"
      82             : #include "llvm/CodeGen/MachineInstrBuilder.h"
      83             : #include "llvm/CodeGen/MachineOperand.h"
      84             : #include "llvm/CodeGen/MachineRegisterInfo.h"
      85             : #include "llvm/CodeGen/TargetRegisterInfo.h"
      86             : #include "llvm/Pass.h"
      87             : #include "llvm/Support/CodeGen.h"
      88             : #include "llvm/Support/CommandLine.h"
      89             : #include "llvm/Support/Debug.h"
      90             : #include "llvm/Support/raw_ostream.h"
      91             : #include "llvm/Target/TargetMachine.h"
      92             : #include <cassert>
      93             : #include <cstdint>
      94             : #include <iterator>
      95             : #include <list>
      96             : #include <map>
      97             : #include <tuple>
      98             : #include <utility>
      99             : 
     100             : using namespace llvm;
     101             : 
     102             : #define DEBUG_TYPE "si-fix-sgpr-copies"
     103             : 
     104       99743 : static cl::opt<bool> EnableM0Merge(
     105             :   "amdgpu-enable-merge-m0",
     106       99743 :   cl::desc("Merge and hoist M0 initializations"),
     107      299229 :   cl::init(false));
     108             : 
     109             : namespace {
     110             : 
     111        1783 : class SIFixSGPRCopies : public MachineFunctionPass {
     112             :   MachineDominatorTree *MDT;
     113             : 
     114             : public:
     115             :   static char ID;
     116             : 
     117        1791 :   SIFixSGPRCopies() : MachineFunctionPass(ID) {}
     118             : 
     119             :   bool runOnMachineFunction(MachineFunction &MF) override;
     120             : 
     121           3 :   StringRef getPassName() const override { return "SI Fix SGPR copies"; }
     122             : 
     123        1780 :   void getAnalysisUsage(AnalysisUsage &AU) const override {
     124             :     AU.addRequired<MachineDominatorTree>();
     125             :     AU.addPreserved<MachineDominatorTree>();
     126        1780 :     AU.setPreservesCFG();
     127        1780 :     MachineFunctionPass::getAnalysisUsage(AU);
     128        1780 :   }
     129             : };
     130             : 
     131             : } // end anonymous namespace
     132             : 
     133       73254 : INITIALIZE_PASS_BEGIN(SIFixSGPRCopies, DEBUG_TYPE,
     134             :                      "SI Fix SGPR copies", false, false)
     135       73254 : INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
     136      342570 : INITIALIZE_PASS_END(SIFixSGPRCopies, DEBUG_TYPE,
     137             :                      "SI Fix SGPR copies", false, false)
     138             : 
     139             : char SIFixSGPRCopies::ID = 0;
     140             : 
     141             : char &llvm::SIFixSGPRCopiesID = SIFixSGPRCopies::ID;
     142             : 
     143           0 : FunctionPass *llvm::createSIFixSGPRCopiesPass() {
     144           0 :   return new SIFixSGPRCopies();
     145             : }
     146             : 
     147       39314 : static bool hasVGPROperands(const MachineInstr &MI, const SIRegisterInfo *TRI) {
     148       39314 :   const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
     149      285706 :   for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
     150      642960 :     if (!MI.getOperand(i).isReg() ||
     151      145294 :         !TargetRegisterInfo::isVirtualRegister(MI.getOperand(i).getReg()))
     152      103539 :       continue;
     153             : 
     154      145294 :     if (TRI->hasVGPRs(MRI.getRegClass(MI.getOperand(i).getReg())))
     155             :       return true;
     156             :   }
     157             :   return false;
     158             : }
     159             : 
     160             : static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
     161      250934 : getCopyRegClasses(const MachineInstr &Copy,
     162             :                   const SIRegisterInfo &TRI,
     163             :                   const MachineRegisterInfo &MRI) {
     164      250934 :   unsigned DstReg = Copy.getOperand(0).getReg();
     165      250934 :   unsigned SrcReg = Copy.getOperand(1).getReg();
     166             : 
     167             :   const TargetRegisterClass *SrcRC =
     168      283381 :     TargetRegisterInfo::isVirtualRegister(SrcReg) ?
     169             :     MRI.getRegClass(SrcReg) :
     170             :     TRI.getPhysRegClass(SrcReg);
     171             : 
     172             :   // We don't really care about the subregister here.
     173             :   // SrcRC = TRI.getSubRegClass(SrcRC, Copy.getOperand(1).getSubReg());
     174             : 
     175             :   const TargetRegisterClass *DstRC =
     176      250934 :     TargetRegisterInfo::isVirtualRegister(DstReg) ?
     177             :     MRI.getRegClass(DstReg) :
     178             :     TRI.getPhysRegClass(DstReg);
     179             : 
     180      250934 :   return std::make_pair(SrcRC, DstRC);
     181             : }
     182             : 
     183      245311 : static bool isVGPRToSGPRCopy(const TargetRegisterClass *SrcRC,
     184             :                              const TargetRegisterClass *DstRC,
     185             :                              const SIRegisterInfo &TRI) {
     186      245311 :   return TRI.isSGPRClass(DstRC) && TRI.hasVGPRs(SrcRC);
     187             : }
     188             : 
     189      217511 : static bool isSGPRToVGPRCopy(const TargetRegisterClass *SrcRC,
     190             :                              const TargetRegisterClass *DstRC,
     191             :                              const SIRegisterInfo &TRI) {
     192      217511 :   return TRI.isSGPRClass(SrcRC) && TRI.hasVGPRs(DstRC);
     193             : }
     194             : 
     195       40334 : static bool tryChangeVGPRtoSGPRinCopy(MachineInstr &MI,
     196             :                                       const SIRegisterInfo *TRI,
     197             :                                       const SIInstrInfo *TII) {
     198       40334 :   MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
     199       40334 :   auto &Src = MI.getOperand(1);
     200       40334 :   unsigned DstReg = MI.getOperand(0).getReg();
     201       40334 :   unsigned SrcReg = Src.getReg();
     202       80668 :   if (!TargetRegisterInfo::isVirtualRegister(SrcReg) ||
     203             :       !TargetRegisterInfo::isVirtualRegister(DstReg))
     204             :     return false;
     205             : 
     206       82502 :   for (const auto &MO : MRI.reg_nodbg_operands(DstReg)) {
     207       80826 :     const auto *UseMI = MO.getParent();
     208       80826 :     if (UseMI == &MI)
     209       40334 :       continue;
     210       80574 :     if (MO.isDef() || UseMI->getParent() != MI.getParent() ||
     211      116070 :         UseMI->getOpcode() <= TargetOpcode::GENERIC_OP_END ||
     212       35496 :         !TII->isOperandLegal(*UseMI, UseMI->getOperandNo(&MO), &Src))
     213             :       return false;
     214             :   }
     215             :   // Change VGPR to SGPR destination.
     216        1676 :   MRI.setRegClass(DstReg, TRI->getEquivalentSGPRClass(MRI.getRegClass(DstReg)));
     217        1676 :   return true;
     218             : }
     219             : 
     220             : // Distribute an SGPR->VGPR copy of a REG_SEQUENCE into a VGPR REG_SEQUENCE.
     221             : //
     222             : // SGPRx = ...
     223             : // SGPRy = REG_SEQUENCE SGPRx, sub0 ...
     224             : // VGPRz = COPY SGPRy
     225             : //
     226             : // ==>
     227             : //
     228             : // VGPRx = COPY SGPRx
     229             : // VGPRz = REG_SEQUENCE VGPRx, sub0
     230             : //
     231             : // This exposes immediate folding opportunities when materializing 64-bit
     232             : // immediates.
     233       60573 : static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI,
     234             :                                         const SIRegisterInfo *TRI,
     235             :                                         const SIInstrInfo *TII,
     236             :                                         MachineRegisterInfo &MRI) {
     237             :   assert(MI.isRegSequence());
     238             : 
     239       60573 :   unsigned DstReg = MI.getOperand(0).getReg();
     240       60573 :   if (!TRI->isSGPRClass(MRI.getRegClass(DstReg)))
     241             :     return false;
     242             : 
     243       36873 :   if (!MRI.hasOneUse(DstReg))
     244             :     return false;
     245             : 
     246       51350 :   MachineInstr &CopyUse = *MRI.use_instr_begin(DstReg);
     247       25675 :   if (!CopyUse.isCopy())
     248             :     return false;
     249             : 
     250             :   // It is illegal to have vreg inputs to a physreg defining reg_sequence.
     251       11278 :   if (TargetRegisterInfo::isPhysicalRegister(CopyUse.getOperand(0).getReg()))
     252             :     return false;
     253             : 
     254             :   const TargetRegisterClass *SrcRC, *DstRC;
     255       11246 :   std::tie(SrcRC, DstRC) = getCopyRegClasses(CopyUse, *TRI, MRI);
     256             : 
     257        5623 :   if (!isSGPRToVGPRCopy(SrcRC, DstRC, *TRI))
     258             :     return false;
     259             : 
     260        5140 :   if (tryChangeVGPRtoSGPRinCopy(CopyUse, TRI, TII))
     261             :     return true;
     262             : 
     263             :   // TODO: Could have multiple extracts?
     264        5093 :   unsigned SubReg = CopyUse.getOperand(1).getSubReg();
     265        5093 :   if (SubReg != AMDGPU::NoSubRegister)
     266             :     return false;
     267             : 
     268        5093 :   MRI.setRegClass(DstReg, DstRC);
     269             : 
     270             :   // SGPRx = ...
     271             :   // SGPRy = REG_SEQUENCE SGPRx, sub0 ...
     272             :   // VGPRz = COPY SGPRy
     273             : 
     274             :   // =>
     275             :   // VGPRx = COPY SGPRx
     276             :   // VGPRz = REG_SEQUENCE VGPRx, sub0
     277             : 
     278        5093 :   MI.getOperand(0).setReg(CopyUse.getOperand(0).getReg());
     279             : 
     280       18757 :   for (unsigned I = 1, N = MI.getNumOperands(); I != N; I += 2) {
     281       27328 :     unsigned SrcReg = MI.getOperand(I).getReg();
     282             :     unsigned SrcSubReg = MI.getOperand(I).getSubReg();
     283             : 
     284             :     const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg);
     285             :     assert(TRI->isSGPRClass(SrcRC) &&
     286             :            "Expected SGPR REG_SEQUENCE to only have SGPR inputs");
     287             : 
     288       13664 :     SrcRC = TRI->getSubRegClass(SrcRC, SrcSubReg);
     289       13664 :     const TargetRegisterClass *NewSrcRC = TRI->getEquivalentVGPRClass(SrcRC);
     290             : 
     291       13664 :     unsigned TmpReg = MRI.createVirtualRegister(NewSrcRC);
     292             : 
     293       13664 :     BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(), TII->get(AMDGPU::COPY),
     294       13664 :             TmpReg)
     295       13664 :         .add(MI.getOperand(I));
     296             : 
     297       27328 :     MI.getOperand(I).setReg(TmpReg);
     298             :   }
     299             : 
     300        5093 :   CopyUse.eraseFromParent();
     301        5093 :   return true;
     302             : }
     303             : 
     304         412 : static bool phiHasVGPROperands(const MachineInstr &PHI,
     305             :                                const MachineRegisterInfo &MRI,
     306             :                                const SIRegisterInfo *TRI,
     307             :                                const SIInstrInfo *TII) {
     308        2094 :   for (unsigned i = 1; i < PHI.getNumOperands(); i += 2) {
     309        1690 :     unsigned Reg = PHI.getOperand(i).getReg();
     310         845 :     if (TRI->hasVGPRs(MRI.getRegClass(Reg)))
     311             :       return true;
     312             :   }
     313             :   return false;
     314             : }
     315             : 
     316         342 : static bool phiHasBreakDef(const MachineInstr &PHI,
     317             :                            const MachineRegisterInfo &MRI,
     318             :                            SmallSet<unsigned, 8> &Visited) {
     319        1306 :   for (unsigned i = 1; i < PHI.getNumOperands(); i += 2) {
     320        1268 :     unsigned Reg = PHI.getOperand(i).getReg();
     321         634 :     if (Visited.count(Reg))
     322          28 :       continue;
     323             : 
     324         606 :     Visited.insert(Reg);
     325             : 
     326         606 :     MachineInstr *DefInstr = MRI.getVRegDef(Reg);
     327        1212 :     switch (DefInstr->getOpcode()) {
     328             :     default:
     329             :       break;
     330             :     case AMDGPU::SI_BREAK:
     331             :     case AMDGPU::SI_IF_BREAK:
     332             :     case AMDGPU::SI_ELSE_BREAK:
     333         152 :       return true;
     334         100 :     case AMDGPU::PHI:
     335         100 :       if (phiHasBreakDef(*DefInstr, MRI, Visited))
     336             :         return true;
     337             :     }
     338             :   }
     339             :   return false;
     340             : }
     341             : 
     342         757 : static bool hasTerminatorThatModifiesExec(const MachineBasicBlock &MBB,
     343             :                                           const TargetRegisterInfo &TRI) {
     344         757 :   for (MachineBasicBlock::const_iterator I = MBB.getFirstTerminator(),
     345        1469 :        E = MBB.end(); I != E; ++I) {
     346         943 :     if (I->modifiesRegister(AMDGPU::EXEC, &TRI))
     347         231 :       return true;
     348             :   }
     349         526 :   return false;
     350             : }
     351             : 
     352       33254 : static bool isSafeToFoldImmIntoCopy(const MachineInstr *Copy,
     353             :                                     const MachineInstr *MoveImm,
     354             :                                     const SIInstrInfo *TII,
     355             :                                     unsigned &SMovOp,
     356             :                                     int64_t &Imm) {
     357       66508 :   if (Copy->getOpcode() != AMDGPU::COPY)
     358             :     return false;
     359             : 
     360       33244 :   if (!MoveImm->isMoveImmediate())
     361             :     return false;
     362             : 
     363             :   const MachineOperand *ImmOp =
     364             :       TII->getNamedOperand(*MoveImm, AMDGPU::OpName::src0);
     365         340 :   if (!ImmOp->isImm())
     366             :     return false;
     367             : 
     368             :   // FIXME: Handle copies with sub-regs.
     369           0 :   if (Copy->getOperand(0).getSubReg())
     370             :     return false;
     371             : 
     372           0 :   switch (MoveImm->getOpcode()) {
     373             :   default:
     374             :     return false;
     375           0 :   case AMDGPU::V_MOV_B32_e32:
     376           0 :     SMovOp = AMDGPU::S_MOV_B32;
     377           0 :     break;
     378           0 :   case AMDGPU::V_MOV_B64_PSEUDO:
     379           0 :     SMovOp = AMDGPU::S_MOV_B64;
     380           0 :     break;
     381             :   }
     382           0 :   Imm = ImmOp->getImm();
     383           0 :   return true;
     384             : }
     385             : 
     386             : template <class UnaryPredicate>
     387         908 : bool searchPredecessors(const MachineBasicBlock *MBB,
     388             :                         const MachineBasicBlock *CutOff,
     389             :                         UnaryPredicate Predicate) {
     390         908 :   if (MBB == CutOff)
     391             :     return false;
     392             : 
     393             :   DenseSet<const MachineBasicBlock *> Visited;
     394             :   SmallVector<MachineBasicBlock *, 4> Worklist(MBB->pred_begin(),
     395             :                                                MBB->pred_end());
     396             : 
     397        1594 :   while (!Worklist.empty()) {
     398             :     MachineBasicBlock *MBB = Worklist.pop_back_val();
     399             : 
     400        2090 :     if (!Visited.insert(MBB).second)
     401         222 :       continue;
     402         823 :     if (MBB == CutOff)
     403          56 :       continue;
     404         767 :     if (Predicate(MBB))
     405             :       return true;
     406             : 
     407         536 :     Worklist.append(MBB->pred_begin(), MBB->pred_end());
     408             :   }
     409             : 
     410             :   return false;
     411             : }
     412             : 
     413             : static bool predsHasDivergentTerminator(MachineBasicBlock *MBB,
     414             :                                         const TargetRegisterInfo *TRI) {
     415        1481 :   return searchPredecessors(MBB, nullptr, [TRI](MachineBasicBlock *MBB) {
     416        1481 :            return hasTerminatorThatModifiesExec(*MBB, *TRI); });
     417             : }
     418             : 
     419             : // Checks if there is potential path From instruction To instruction.
     420             : // If CutOff is specified and it sits in between of that path we ignore
     421             : // a higher portion of the path and report it is not reachable.
     422         264 : static bool isReachable(const MachineInstr *From,
     423             :                         const MachineInstr *To,
     424             :                         const MachineBasicBlock *CutOff,
     425             :                         MachineDominatorTree &MDT) {
     426             :   // If either From block dominates To block or instructions are in the same
     427             :   // block and From is higher.
     428         264 :   if (MDT.dominates(From, To))
     429             :     return true;
     430             : 
     431         203 :   const MachineBasicBlock *MBBFrom = From->getParent();
     432         203 :   const MachineBasicBlock *MBBTo = To->getParent();
     433         203 :   if (MBBFrom == MBBTo)
     434             :     return false;
     435             : 
     436             :   // Instructions are in different blocks, do predecessor search.
     437             :   // We should almost never get here since we do not usually produce M0 stores
     438             :   // other than -1.
     439         184 :   return searchPredecessors(MBBTo, CutOff, [MBBFrom]
     440         194 :            (const MachineBasicBlock *MBB) { return MBB == MBBFrom; });
     441             : }
     442             : 
     443             : // Hoist and merge identical SGPR initializations into a common predecessor.
     444             : // This is intended to combine M0 initializations, but can work with any
     445             : // SGPR. A VGPR cannot be processed since we cannot guarantee vector
     446             : // executioon.
     447           1 : static bool hoistAndMergeSGPRInits(unsigned Reg,
     448             :                                    const MachineRegisterInfo &MRI,
     449             :                                    MachineDominatorTree &MDT) {
     450             :   // List of inits by immediate value.
     451             :   using InitListMap = std::map<unsigned, std::list<MachineInstr *>>;
     452             :   InitListMap Inits;
     453             :   // List of clobbering instructions.
     454             :   SmallVector<MachineInstr*, 8> Clobbers;
     455             :   bool Changed = false;
     456             : 
     457          18 :   for (auto &MI : MRI.def_instructions(Reg)) {
     458             :     MachineOperand *Imm = nullptr;
     459          77 :     for (auto &MO: MI.operands()) {
     460          17 :       if ((MO.isReg() && ((MO.isDef() && MO.getReg() != Reg) || !MO.isDef())) ||
     461          47 :           (!MO.isImm() && !MO.isReg()) || (MO.isImm() && Imm)) {
     462             :         Imm = nullptr;
     463             :         break;
     464          30 :       } else if (MO.isImm())
     465             :         Imm = &MO;
     466             :     }
     467          17 :     if (Imm)
     468          30 :       Inits[Imm->getImm()].push_front(&MI);
     469             :     else
     470           2 :       Clobbers.push_back(&MI);
     471             :   }
     472             : 
     473           5 :   for (auto &Init : Inits) {
     474             :     auto &Defs = Init.second;
     475             : 
     476          19 :     for (auto I1 = Defs.begin(), E = Defs.end(); I1 != E; ) {
     477          11 :       MachineInstr *MI1 = *I1;
     478             : 
     479          30 :       for (auto I2 = std::next(I1); I2 != E; ) {
     480          20 :         MachineInstr *MI2 = *I2;
     481             : 
     482             :         // Check any possible interference
     483             :         auto intereferes = [&](MachineBasicBlock::iterator From,
     484          21 :                                MachineBasicBlock::iterator To) -> bool {
     485             : 
     486             :           assert(MDT.dominates(&*To, &*From));
     487             : 
     488         448 :           auto interferes = [&MDT, From, To](MachineInstr* &Clobber) -> bool {
     489         132 :             const MachineBasicBlock *MBBFrom = From->getParent();
     490         132 :             const MachineBasicBlock *MBBTo = To->getParent();
     491         132 :             bool MayClobberFrom = isReachable(Clobber, &*From, MBBTo, MDT);
     492         132 :             bool MayClobberTo = isReachable(Clobber, &*To, MBBTo, MDT);
     493         132 :             if (!MayClobberFrom && !MayClobberTo)
     494             :               return false;
     495          36 :             if ((MayClobberFrom && !MayClobberTo) ||
     496             :                 (!MayClobberFrom && MayClobberTo))
     497             :               return true;
     498             :             // Both can clobber, this is not an interference only if both are
     499             :             // dominated by Clobber and belong to the same block or if Clobber
     500             :             // properly dominates To, given that To >> From, so it dominates
     501             :             // both and located in a common dominator.
     502          50 :             return !((MBBFrom == MBBTo &&
     503          12 :                       MDT.dominates(Clobber, &*From) &&
     504           6 :                       MDT.dominates(Clobber, &*To)) ||
     505          19 :                      MDT.properlyDominates(Clobber->getParent(), MBBTo));
     506          21 :           };
     507             : 
     508          60 :           return (llvm::any_of(Clobbers, interferes)) ||
     509          74 :                  (llvm::any_of(Inits, [&](InitListMap::value_type &C) {
     510         122 :                     return C.first != Init.first &&
     511          48 :                            llvm::any_of(C.second, interferes);
     512          77 :                   }));
     513          20 :         };
     514             : 
     515          20 :         if (MDT.dominates(MI1, MI2)) {
     516          19 :           if (!intereferes(MI2, MI1)) {
     517             :             LLVM_DEBUG(dbgs()
     518             :                        << "Erasing from "
     519             :                        << printMBBReference(*MI2->getParent()) << " " << *MI2);
     520           3 :             MI2->eraseFromParent();
     521             :             Defs.erase(I2++);
     522             :             Changed = true;
     523           6 :             continue;
     524             :           }
     525           4 :         } else if (MDT.dominates(MI2, MI1)) {
     526           0 :           if (!intereferes(MI1, MI2)) {
     527             :             LLVM_DEBUG(dbgs()
     528             :                        << "Erasing from "
     529             :                        << printMBBReference(*MI1->getParent()) << " " << *MI1);
     530           0 :             MI1->eraseFromParent();
     531             :             Defs.erase(I1++);
     532             :             Changed = true;
     533           0 :             break;
     534             :           }
     535             :         } else {
     536           4 :           auto *MBB = MDT.findNearestCommonDominator(MI1->getParent(),
     537             :                                                      MI2->getParent());
     538           4 :           if (!MBB) {
     539             :             ++I2;
     540           0 :             continue;
     541             :           }
     542             : 
     543           4 :           MachineBasicBlock::iterator I = MBB->getFirstNonPHI();
     544           5 :           if (!intereferes(MI1, I) && !intereferes(MI2, I)) {
     545             :             LLVM_DEBUG(dbgs()
     546             :                        << "Erasing from "
     547             :                        << printMBBReference(*MI1->getParent()) << " " << *MI1
     548             :                        << "and moving from "
     549             :                        << printMBBReference(*MI2->getParent()) << " to "
     550             :                        << printMBBReference(*I->getParent()) << " " << *MI2);
     551           2 :             I->getParent()->splice(I, MI2->getParent(), MI2);
     552           1 :             MI1->eraseFromParent();
     553             :             Defs.erase(I1++);
     554             :             Changed = true;
     555           1 :             break;
     556             :           }
     557             :         }
     558             :         ++I2;
     559             :       }
     560             :       ++I1;
     561             :     }
     562             :   }
     563             : 
     564           1 :   if (Changed)
     565           1 :     MRI.clearKillFlags(Reg);
     566             : 
     567           1 :   return Changed;
     568             : }
     569             : 
     570       17856 : bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
     571       17856 :   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
     572       17856 :   MachineRegisterInfo &MRI = MF.getRegInfo();
     573             :   const SIRegisterInfo *TRI = ST.getRegisterInfo();
     574             :   const SIInstrInfo *TII = ST.getInstrInfo();
     575       17856 :   MDT = &getAnalysis<MachineDominatorTree>();
     576             : 
     577             :   SmallVector<MachineInstr *, 16> Worklist;
     578             : 
     579             :   for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
     580       37913 :                                                   BI != BE; ++BI) {
     581             :     MachineBasicBlock &MBB = *BI;
     582       20057 :     for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
     583      598725 :          I != E; ++I) {
     584             :       MachineInstr &MI = *I;
     585             : 
     586     1157336 :       switch (MI.getOpcode()) {
     587      259605 :       default:
     588      259605 :         continue;
     589      252738 :       case AMDGPU::COPY:
     590             :       case AMDGPU::WQM:
     591             :       case AMDGPU::WWM: {
     592             :         // If the destination register is a physical register there isn't really
     593             :         // much we can do to fix this.
     594      505476 :         if (!TargetRegisterInfo::isVirtualRegister(MI.getOperand(0).getReg()))
     595        7427 :           continue;
     596             : 
     597             :         const TargetRegisterClass *SrcRC, *DstRC;
     598      490622 :         std::tie(SrcRC, DstRC) = getCopyRegClasses(MI, *TRI, MRI);
     599      245311 :         if (isVGPRToSGPRCopy(SrcRC, DstRC, *TRI)) {
     600       33423 :           unsigned SrcReg = MI.getOperand(1).getReg();
     601       33423 :           if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) {
     602         169 :             TII->moveToVALU(MI);
     603         169 :             break;
     604             :           }
     605             : 
     606       33254 :           MachineInstr *DefMI = MRI.getVRegDef(SrcReg);
     607             :           unsigned SMovOp;
     608             :           int64_t Imm;
     609             :           // If we are just copying an immediate, we can replace the copy with
     610             :           // s_mov_b32.
     611       33254 :           if (isSafeToFoldImmIntoCopy(&MI, DefMI, TII, SMovOp, Imm)) {
     612           0 :             MI.getOperand(1).ChangeToImmediate(Imm);
     613           0 :             MI.addImplicitDefUseOperands(MF);
     614           0 :             MI.setDesc(TII->get(SMovOp));
     615             :             break;
     616             :           }
     617       33254 :           TII->moveToVALU(MI);
     618      211888 :         } else if (isSGPRToVGPRCopy(SrcRC, DstRC, *TRI)) {
     619       35194 :           tryChangeVGPRtoSGPRinCopy(MI, TRI, TII);
     620             :         }
     621             : 
     622             :         break;
     623             :       }
     624        3281 :       case AMDGPU::PHI: {
     625        3281 :         unsigned Reg = MI.getOperand(0).getReg();
     626        3281 :         if (!TRI->isSGPRClass(MRI.getRegClass(Reg)))
     627             :           break;
     628             : 
     629             :         // We don't need to fix the PHI if the common dominator of the
     630             :         // two incoming blocks terminates with a uniform branch.
     631         412 :         bool HasVGPROperand = phiHasVGPROperands(MI, MRI, TRI, TII);
     632         412 :         if (MI.getNumExplicitOperands() == 5 && !HasVGPROperand) {
     633         397 :           MachineBasicBlock *MBB0 = MI.getOperand(2).getMBB();
     634         397 :           MachineBasicBlock *MBB1 = MI.getOperand(4).getMBB();
     635             : 
     636         724 :           if (!predsHasDivergentTerminator(MBB0, TRI) &&
     637             :               !predsHasDivergentTerminator(MBB1, TRI)) {
     638             :             LLVM_DEBUG(dbgs()
     639             :                        << "Not fixing PHI for uniform branch: " << MI << '\n');
     640             :             break;
     641             :           }
     642             :         }
     643             : 
     644             :         // If a PHI node defines an SGPR and any of its operands are VGPRs,
     645             :         // then we need to move it to the VALU.
     646             :         //
     647             :         // Also, if a PHI node defines an SGPR and has all SGPR operands
     648             :         // we must move it to the VALU, because the SGPR operands will
     649             :         // all end up being assigned the same register, which means
     650             :         // there is a potential for a conflict if different threads take
     651             :         // different control flow paths.
     652             :         //
     653             :         // For Example:
     654             :         //
     655             :         // sgpr0 = def;
     656             :         // ...
     657             :         // sgpr1 = def;
     658             :         // ...
     659             :         // sgpr2 = PHI sgpr0, sgpr1
     660             :         // use sgpr2;
     661             :         //
     662             :         // Will Become:
     663             :         //
     664             :         // sgpr2 = def;
     665             :         // ...
     666             :         // sgpr2 = def;
     667             :         // ...
     668             :         // use sgpr2
     669             :         //
     670             :         // The one exception to this rule is when one of the operands
     671             :         // is defined by a SI_BREAK, SI_IF_BREAK, or SI_ELSE_BREAK
     672             :         // instruction.  In this case, there we know the program will
     673             :         // never enter the second block (the loop) without entering
     674             :         // the first block (where the condition is computed), so there
     675             :         // is no chance for values to be over-written.
     676             : 
     677         246 :         SmallSet<unsigned, 8> Visited;
     678         246 :         if (HasVGPROperand || !phiHasBreakDef(MI, MRI, Visited)) {
     679             :           LLVM_DEBUG(dbgs() << "Fixing PHI: " << MI);
     680         142 :           TII->moveToVALU(MI);
     681             :         }
     682             :         break;
     683             :       }
     684       63014 :       case AMDGPU::REG_SEQUENCE:
     685      162901 :         if (TRI->hasVGPRs(TII->getOpRegClass(MI, 0)) ||
     686       39314 :             !hasVGPROperands(MI, TRI)) {
     687       60573 :           foldVGPRCopyIntoRegSequence(MI, TRI, TII, MRI);
     688       60573 :           continue;
     689             :         }
     690             : 
     691             :         LLVM_DEBUG(dbgs() << "Fixing REG_SEQUENCE: " << MI);
     692             : 
     693        2441 :         TII->moveToVALU(MI);
     694        2441 :         break;
     695          30 :       case AMDGPU::INSERT_SUBREG: {
     696             :         const TargetRegisterClass *DstRC, *Src0RC, *Src1RC;
     697          30 :         DstRC = MRI.getRegClass(MI.getOperand(0).getReg());
     698          30 :         Src0RC = MRI.getRegClass(MI.getOperand(1).getReg());
     699          30 :         Src1RC = MRI.getRegClass(MI.getOperand(2).getReg());
     700          42 :         if (TRI->isSGPRClass(DstRC) &&
     701          24 :             (TRI->hasVGPRs(Src0RC) || TRI->hasVGPRs(Src1RC))) {
     702             :           LLVM_DEBUG(dbgs() << " Fixing INSERT_SUBREG: " << MI);
     703           2 :           TII->moveToVALU(MI);
     704             :         }
     705             :         break;
     706      259605 :       }
     707             :       }
     708             :     }
     709             :   }
     710             : 
     711       35531 :   if (MF.getTarget().getOptLevel() > CodeGenOpt::None && EnableM0Merge)
     712           1 :     hoistAndMergeSGPRInits(AMDGPU::M0, MRI, *MDT);
     713             : 
     714       17856 :   return true;
     715      299229 : }

Generated by: LCOV version 1.13