LCOV - code coverage report
Current view: top level - lib/Target/AMDGPU - SIFixSGPRCopies.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 210 224 93.8 %
Date: 2018-10-20 13:21:21 Functions: 18 20 90.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===- SIFixSGPRCopies.cpp - Remove potential VGPR => SGPR copies ---------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : /// \file
      11             : /// Copies from VGPR to SGPR registers are illegal and the register coalescer
      12             : /// will sometimes generate these illegal copies in situations like this:
      13             : ///
      14             : ///  Register Class <vsrc> is the union of <vgpr> and <sgpr>
      15             : ///
      16             : /// BB0:
      17             : ///   %0 <sgpr> = SCALAR_INST
      18             : ///   %1 <vsrc> = COPY %0 <sgpr>
      19             : ///    ...
      20             : ///    BRANCH %cond BB1, BB2
      21             : ///  BB1:
      22             : ///    %2 <vgpr> = VECTOR_INST
      23             : ///    %3 <vsrc> = COPY %2 <vgpr>
      24             : ///  BB2:
      25             : ///    %4 <vsrc> = PHI %1 <vsrc>, <%bb.0>, %3 <vrsc>, <%bb.1>
      26             : ///    %5 <vgpr> = VECTOR_INST %4 <vsrc>
      27             : ///
      28             : ///
      29             : /// The coalescer will begin at BB0 and eliminate its copy, then the resulting
      30             : /// code will look like this:
      31             : ///
      32             : /// BB0:
      33             : ///   %0 <sgpr> = SCALAR_INST
      34             : ///    ...
      35             : ///    BRANCH %cond BB1, BB2
      36             : /// BB1:
      37             : ///   %2 <vgpr> = VECTOR_INST
      38             : ///   %3 <vsrc> = COPY %2 <vgpr>
      39             : /// BB2:
      40             : ///   %4 <sgpr> = PHI %0 <sgpr>, <%bb.0>, %3 <vsrc>, <%bb.1>
      41             : ///   %5 <vgpr> = VECTOR_INST %4 <sgpr>
      42             : ///
      43             : /// Now that the result of the PHI instruction is an SGPR, the register
      44             : /// allocator is now forced to constrain the register class of %3 to
      45             : /// <sgpr> so we end up with final code like this:
      46             : ///
      47             : /// BB0:
      48             : ///   %0 <sgpr> = SCALAR_INST
      49             : ///    ...
      50             : ///    BRANCH %cond BB1, BB2
      51             : /// BB1:
      52             : ///   %2 <vgpr> = VECTOR_INST
      53             : ///   %3 <sgpr> = COPY %2 <vgpr>
      54             : /// BB2:
      55             : ///   %4 <sgpr> = PHI %0 <sgpr>, <%bb.0>, %3 <sgpr>, <%bb.1>
      56             : ///   %5 <vgpr> = VECTOR_INST %4 <sgpr>
      57             : ///
      58             : /// Now this code contains an illegal copy from a VGPR to an SGPR.
      59             : ///
      60             : /// In order to avoid this problem, this pass searches for PHI instructions
      61             : /// which define a <vsrc> register and constrains its definition class to
      62             : /// <vgpr> if the user of the PHI's definition register is a vector instruction.
      63             : /// If the PHI's definition class is constrained to <vgpr> then the coalescer
      64             : /// will be unable to perform the COPY removal from the above example  which
      65             : /// ultimately led to the creation of an illegal COPY.
      66             : //===----------------------------------------------------------------------===//
      67             : 
      68             : #include "AMDGPU.h"
      69             : #include "AMDGPUSubtarget.h"
      70             : #include "SIInstrInfo.h"
      71             : #include "SIRegisterInfo.h"
      72             : #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
      73             : #include "llvm/ADT/DenseSet.h"
      74             : #include "llvm/ADT/STLExtras.h"
      75             : #include "llvm/ADT/SmallSet.h"
      76             : #include "llvm/ADT/SmallVector.h"
      77             : #include "llvm/CodeGen/MachineBasicBlock.h"
      78             : #include "llvm/CodeGen/MachineDominators.h"
      79             : #include "llvm/CodeGen/MachineFunction.h"
      80             : #include "llvm/CodeGen/MachineFunctionPass.h"
      81             : #include "llvm/CodeGen/MachineInstr.h"
      82             : #include "llvm/CodeGen/MachineInstrBuilder.h"
      83             : #include "llvm/CodeGen/MachineOperand.h"
      84             : #include "llvm/CodeGen/MachineRegisterInfo.h"
      85             : #include "llvm/CodeGen/TargetRegisterInfo.h"
      86             : #include "llvm/Pass.h"
      87             : #include "llvm/Support/CodeGen.h"
      88             : #include "llvm/Support/CommandLine.h"
      89             : #include "llvm/Support/Debug.h"
      90             : #include "llvm/Support/raw_ostream.h"
      91             : #include "llvm/Target/TargetMachine.h"
      92             : #include <cassert>
      93             : #include <cstdint>
      94             : #include <iterator>
      95             : #include <list>
      96             : #include <map>
      97             : #include <tuple>
      98             : #include <utility>
      99             : 
     100             : using namespace llvm;
     101             : 
     102             : #define DEBUG_TYPE "si-fix-sgpr-copies"
     103             : 
     104             : static cl::opt<bool> EnableM0Merge(
     105             :   "amdgpu-enable-merge-m0",
     106             :   cl::desc("Merge and hoist M0 initializations"),
     107             :   cl::init(false));
     108             : 
     109             : namespace {
     110             : 
     111             : class SIFixSGPRCopies : public MachineFunctionPass {
     112             :   MachineDominatorTree *MDT;
     113             : 
     114             : public:
     115             :   static char ID;
     116             : 
     117        1963 :   SIFixSGPRCopies() : MachineFunctionPass(ID) {}
     118             : 
     119             :   bool runOnMachineFunction(MachineFunction &MF) override;
     120             : 
     121           7 :   StringRef getPassName() const override { return "SI Fix SGPR copies"; }
     122             : 
     123        1952 :   void getAnalysisUsage(AnalysisUsage &AU) const override {
     124             :     AU.addRequired<MachineDominatorTree>();
     125             :     AU.addPreserved<MachineDominatorTree>();
     126        1952 :     AU.setPreservesCFG();
     127        1952 :     MachineFunctionPass::getAnalysisUsage(AU);
     128        1952 :   }
     129             : };
     130             : 
     131             : } // end anonymous namespace
     132             : 
     133       85105 : INITIALIZE_PASS_BEGIN(SIFixSGPRCopies, DEBUG_TYPE,
     134             :                      "SI Fix SGPR copies", false, false)
     135       85105 : INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
     136      199024 : INITIALIZE_PASS_END(SIFixSGPRCopies, DEBUG_TYPE,
     137             :                      "SI Fix SGPR copies", false, false)
     138             : 
     139             : char SIFixSGPRCopies::ID = 0;
     140             : 
     141             : char &llvm::SIFixSGPRCopiesID = SIFixSGPRCopies::ID;
     142             : 
     143           0 : FunctionPass *llvm::createSIFixSGPRCopiesPass() {
     144           0 :   return new SIFixSGPRCopies();
     145             : }
     146             : 
     147       44116 : static bool hasVGPROperands(const MachineInstr &MI, const SIRegisterInfo *TRI) {
     148       44116 :   const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
     149      303634 :   for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
     150      531270 :     if (!MI.getOperand(i).isReg() ||
     151      157934 :         !TargetRegisterInfo::isVirtualRegister(MI.getOperand(i).getReg()))
     152             :       continue;
     153             : 
     154      157934 :     if (TRI->hasVGPRs(MRI.getRegClass(MI.getOperand(i).getReg())))
     155             :       return true;
     156             :   }
     157             :   return false;
     158             : }
     159             : 
     160             : static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
     161      272160 : getCopyRegClasses(const MachineInstr &Copy,
     162             :                   const SIRegisterInfo &TRI,
     163             :                   const MachineRegisterInfo &MRI) {
     164      272160 :   unsigned DstReg = Copy.getOperand(0).getReg();
     165      272160 :   unsigned SrcReg = Copy.getOperand(1).getReg();
     166             : 
     167             :   const TargetRegisterClass *SrcRC =
     168      272160 :     TargetRegisterInfo::isVirtualRegister(SrcReg) ?
     169             :     MRI.getRegClass(SrcReg) :
     170       40471 :     TRI.getPhysRegClass(SrcReg);
     171             : 
     172             :   // We don't really care about the subregister here.
     173             :   // SrcRC = TRI.getSubRegClass(SrcRC, Copy.getOperand(1).getSubReg());
     174             : 
     175             :   const TargetRegisterClass *DstRC =
     176      272160 :     TargetRegisterInfo::isVirtualRegister(DstReg) ?
     177             :     MRI.getRegClass(DstReg) :
     178           0 :     TRI.getPhysRegClass(DstReg);
     179             : 
     180      272160 :   return std::make_pair(SrcRC, DstRC);
     181             : }
     182             : 
     183      266172 : static bool isVGPRToSGPRCopy(const TargetRegisterClass *SrcRC,
     184             :                              const TargetRegisterClass *DstRC,
     185             :                              const SIRegisterInfo &TRI) {
     186      266172 :   return TRI.isSGPRClass(DstRC) && TRI.hasVGPRs(SrcRC);
     187             : }
     188             : 
     189      240137 : static bool isSGPRToVGPRCopy(const TargetRegisterClass *SrcRC,
     190             :                              const TargetRegisterClass *DstRC,
     191             :                              const SIRegisterInfo &TRI) {
     192      240137 :   return TRI.isSGPRClass(SrcRC) && TRI.hasVGPRs(DstRC);
     193             : }
     194             : 
     195       48152 : static bool tryChangeVGPRtoSGPRinCopy(MachineInstr &MI,
     196             :                                       const SIRegisterInfo *TRI,
     197             :                                       const SIInstrInfo *TII) {
     198       48152 :   MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
     199       48152 :   auto &Src = MI.getOperand(1);
     200       48152 :   unsigned DstReg = MI.getOperand(0).getReg();
     201       48152 :   unsigned SrcReg = Src.getReg();
     202       48152 :   if (!TargetRegisterInfo::isVirtualRegister(SrcReg) ||
     203             :       !TargetRegisterInfo::isVirtualRegister(DstReg))
     204             :     return false;
     205             : 
     206       98749 :   for (const auto &MO : MRI.reg_nodbg_operands(DstReg)) {
     207       96423 :     const auto *UseMI = MO.getParent();
     208       96423 :     if (UseMI == &MI)
     209             :       continue;
     210       48271 :     if (MO.isDef() || UseMI->getParent() != MI.getParent() ||
     211      139923 :         UseMI->getOpcode() <= TargetOpcode::GENERIC_OP_END ||
     212       43761 :         !TII->isOperandLegal(*UseMI, UseMI->getOperandNo(&MO), &Src))
     213             :       return false;
     214             :   }
     215             :   // Change VGPR to SGPR destination.
     216        2326 :   MRI.setRegClass(DstReg, TRI->getEquivalentSGPRClass(MRI.getRegClass(DstReg)));
     217        2326 :   return true;
     218             : }
     219             : 
     220             : // Distribute an SGPR->VGPR copy of a REG_SEQUENCE into a VGPR REG_SEQUENCE.
     221             : //
     222             : // SGPRx = ...
     223             : // SGPRy = REG_SEQUENCE SGPRx, sub0 ...
     224             : // VGPRz = COPY SGPRy
     225             : //
     226             : // ==>
     227             : //
     228             : // VGPRx = COPY SGPRx
     229             : // VGPRz = REG_SEQUENCE VGPRx, sub0
     230             : //
     231             : // This exposes immediate folding opportunities when materializing 64-bit
     232             : // immediates.
     233       59553 : static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI,
     234             :                                         const SIRegisterInfo *TRI,
     235             :                                         const SIInstrInfo *TII,
     236             :                                         MachineRegisterInfo &MRI) {
     237             :   assert(MI.isRegSequence());
     238             : 
     239       59553 :   unsigned DstReg = MI.getOperand(0).getReg();
     240       59553 :   if (!TRI->isSGPRClass(MRI.getRegClass(DstReg)))
     241             :     return false;
     242             : 
     243       37999 :   if (!MRI.hasOneUse(DstReg))
     244             :     return false;
     245             : 
     246       26471 :   MachineInstr &CopyUse = *MRI.use_instr_begin(DstReg);
     247       26471 :   if (!CopyUse.isCopy())
     248             :     return false;
     249             : 
     250             :   // It is illegal to have vreg inputs to a physreg defining reg_sequence.
     251       12000 :   if (TargetRegisterInfo::isPhysicalRegister(CopyUse.getOperand(0).getReg()))
     252             :     return false;
     253             : 
     254             :   const TargetRegisterClass *SrcRC, *DstRC;
     255        5988 :   std::tie(SrcRC, DstRC) = getCopyRegClasses(CopyUse, *TRI, MRI);
     256             : 
     257        5988 :   if (!isSGPRToVGPRCopy(SrcRC, DstRC, *TRI))
     258             :     return false;
     259             : 
     260        5333 :   if (tryChangeVGPRtoSGPRinCopy(CopyUse, TRI, TII))
     261             :     return true;
     262             : 
     263             :   // TODO: Could have multiple extracts?
     264        5282 :   unsigned SubReg = CopyUse.getOperand(1).getSubReg();
     265        5282 :   if (SubReg != AMDGPU::NoSubRegister)
     266             :     return false;
     267             : 
     268        5282 :   MRI.setRegClass(DstReg, DstRC);
     269             : 
     270             :   // SGPRx = ...
     271             :   // SGPRy = REG_SEQUENCE SGPRx, sub0 ...
     272             :   // VGPRz = COPY SGPRy
     273             : 
     274             :   // =>
     275             :   // VGPRx = COPY SGPRx
     276             :   // VGPRz = REG_SEQUENCE VGPRx, sub0
     277             : 
     278        5282 :   MI.getOperand(0).setReg(CopyUse.getOperand(0).getReg());
     279             : 
     280       19538 :   for (unsigned I = 1, N = MI.getNumOperands(); I != N; I += 2) {
     281       28512 :     unsigned SrcReg = MI.getOperand(I).getReg();
     282             :     unsigned SrcSubReg = MI.getOperand(I).getSubReg();
     283             : 
     284             :     const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg);
     285             :     assert(TRI->isSGPRClass(SrcRC) &&
     286             :            "Expected SGPR REG_SEQUENCE to only have SGPR inputs");
     287             : 
     288       14256 :     SrcRC = TRI->getSubRegClass(SrcRC, SrcSubReg);
     289       14256 :     const TargetRegisterClass *NewSrcRC = TRI->getEquivalentVGPRClass(SrcRC);
     290             : 
     291       14256 :     unsigned TmpReg = MRI.createVirtualRegister(NewSrcRC);
     292             : 
     293       14256 :     BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(), TII->get(AMDGPU::COPY),
     294       14256 :             TmpReg)
     295       14256 :         .add(MI.getOperand(I));
     296             : 
     297       28512 :     MI.getOperand(I).setReg(TmpReg);
     298             :   }
     299             : 
     300        5282 :   CopyUse.eraseFromParent();
     301        5282 :   return true;
     302             : }
     303             : 
     304           0 : static bool phiHasVGPROperands(const MachineInstr &PHI,
     305             :                                const MachineRegisterInfo &MRI,
     306             :                                const SIRegisterInfo *TRI,
     307             :                                const SIInstrInfo *TII) {
     308           0 :   for (unsigned i = 1; i < PHI.getNumOperands(); i += 2) {
     309           0 :     unsigned Reg = PHI.getOperand(i).getReg();
     310           0 :     if (TRI->hasVGPRs(MRI.getRegClass(Reg)))
     311           0 :       return true;
     312             :   }
     313             :   return false;
     314             : }
     315             : 
     316         349 : static bool phiHasBreakDef(const MachineInstr &PHI,
     317             :                            const MachineRegisterInfo &MRI,
     318             :                            SmallSet<unsigned, 8> &Visited) {
     319         819 :   for (unsigned i = 1; i < PHI.getNumOperands(); i += 2) {
     320         644 :     unsigned Reg = PHI.getOperand(i).getReg();
     321         644 :     if (Visited.count(Reg))
     322          30 :       continue;
     323             : 
     324         614 :     Visited.insert(Reg);
     325             : 
     326         614 :     MachineInstr *DefInstr = MRI.getVRegDef(Reg);
     327        1228 :     switch (DefInstr->getOpcode()) {
     328             :     default:
     329             :       break;
     330             :     case AMDGPU::SI_BREAK:
     331             :     case AMDGPU::SI_IF_BREAK:
     332             :     case AMDGPU::SI_ELSE_BREAK:
     333         174 :       return true;
     334          87 :     case AMDGPU::PHI:
     335          87 :       if (phiHasBreakDef(*DefInstr, MRI, Visited))
     336             :         return true;
     337             :     }
     338             :   }
     339             :   return false;
     340             : }
     341             : 
     342         787 : static bool hasTerminatorThatModifiesExec(const MachineBasicBlock &MBB,
     343             :                                           const TargetRegisterInfo &TRI) {
     344             :   for (MachineBasicBlock::const_iterator I = MBB.getFirstTerminator(),
     345        1431 :        E = MBB.end(); I != E; ++I) {
     346         895 :     if (I->modifiesRegister(AMDGPU::EXEC, &TRI))
     347             :       return true;
     348             :   }
     349             :   return false;
     350             : }
     351             : 
     352       31647 : static bool isSafeToFoldImmIntoCopy(const MachineInstr *Copy,
     353             :                                     const MachineInstr *MoveImm,
     354             :                                     const SIInstrInfo *TII,
     355             :                                     unsigned &SMovOp,
     356             :                                     int64_t &Imm) {
     357       63294 :   if (Copy->getOpcode() != AMDGPU::COPY)
     358             :     return false;
     359             : 
     360       31397 :   if (!MoveImm->isMoveImmediate())
     361             :     return false;
     362             : 
     363             :   const MachineOperand *ImmOp =
     364             :       TII->getNamedOperand(*MoveImm, AMDGPU::OpName::src0);
     365         292 :   if (!ImmOp->isImm())
     366             :     return false;
     367             : 
     368             :   // FIXME: Handle copies with sub-regs.
     369          54 :   if (Copy->getOperand(0).getSubReg())
     370             :     return false;
     371             : 
     372          54 :   switch (MoveImm->getOpcode()) {
     373             :   default:
     374             :     return false;
     375          27 :   case AMDGPU::V_MOV_B32_e32:
     376          27 :     SMovOp = AMDGPU::S_MOV_B32;
     377          27 :     break;
     378           0 :   case AMDGPU::V_MOV_B64_PSEUDO:
     379           0 :     SMovOp = AMDGPU::S_MOV_B64;
     380           0 :     break;
     381             :   }
     382          27 :   Imm = ImmOp->getImm();
     383          27 :   return true;
     384             : }
     385             : 
     386             : template <class UnaryPredicate>
     387         921 : bool searchPredecessors(const MachineBasicBlock *MBB,
     388             :                         const MachineBasicBlock *CutOff,
     389             :                         UnaryPredicate Predicate) {
     390         921 :   if (MBB == CutOff)
     391             :     return false;
     392             : 
     393             :   DenseSet<const MachineBasicBlock *> Visited;
     394         793 :   SmallVector<MachineBasicBlock *, 4> Worklist(MBB->pred_begin(),
     395             :                                                MBB->pred_end());
     396             : 
     397        1621 :   while (!Worklist.empty()) {
     398             :     MachineBasicBlock *MBB = Worklist.pop_back_val();
     399             : 
     400        1079 :     if (!Visited.insert(MBB).second)
     401             :       continue;
     402         853 :     if (MBB == CutOff)
     403             :       continue;
     404         797 :     if (Predicate(MBB))
     405             :       return true;
     406             : 
     407         546 :     Worklist.append(MBB->pred_begin(), MBB->pred_end());
     408             :   }
     409             : 
     410             :   return false;
     411             : }
     412         184 : 
     413             : static bool predsHasDivergentTerminator(MachineBasicBlock *MBB,
     414             :                                         const TargetRegisterInfo *TRI) {
     415         184 :   return searchPredecessors(MBB, nullptr, [TRI](MachineBasicBlock *MBB) {
     416             :            return hasTerminatorThatModifiesExec(*MBB, *TRI); });
     417             : }
     418             : 
     419          56 : // Checks if there is potential path From instruction To instruction.
     420             : // If CutOff is specified and it sits in between of that path we ignore
     421             : // a higher portion of the path and report it is not reachable.
     422         132 : static bool isReachable(const MachineInstr *From,
     423             :                         const MachineInstr *To,
     424             :                         const MachineBasicBlock *CutOff,
     425          76 :                         MachineDominatorTree &MDT) {
     426             :   // If either From block dominates To block or instructions are in the same
     427          66 :   // block and From is higher.
     428             :   if (MDT.dominates(From, To))
     429          10 :     return true;
     430             : 
     431             :   const MachineBasicBlock *MBBFrom = From->getParent();
     432          10 :   const MachineBasicBlock *MBBTo = To->getParent();
     433             :   if (MBBFrom == MBBTo)
     434             :     return false;
     435             : 
     436             :   // Instructions are in different blocks, do predecessor search.
     437         737 :   // We should almost never get here since we do not usually produce M0 stores
     438             :   // other than -1.
     439             :   return searchPredecessors(MBBTo, CutOff, [MBBFrom]
     440         737 :            (const MachineBasicBlock *MBB) { return MBB == MBBFrom; });
     441             : }
     442             : 
     443             : // Hoist and merge identical SGPR initializations into a common predecessor.
     444         737 : // This is intended to combine M0 initializations, but can work with any
     445             : // SGPR. A VGPR cannot be processed since we cannot guarantee vector
     446             : // executioon.
     447        1489 : static bool hoistAndMergeSGPRInits(unsigned Reg,
     448             :                                    const MachineRegisterInfo &MRI,
     449             :                                    MachineDominatorTree &MDT) {
     450        1003 :   // List of inits by immediate value.
     451             :   using InitListMap = std::map<unsigned, std::list<MachineInstr *>>;
     452         787 :   InitListMap Inits;
     453             :   // List of clobbering instructions.
     454         787 :   SmallVector<MachineInstr*, 8> Clobbers;
     455             :   bool Changed = false;
     456             : 
     457         536 :   for (auto &MI : MRI.def_instructions(Reg)) {
     458             :     MachineOperand *Imm = nullptr;
     459             :     for (auto &MO: MI.operands()) {
     460             :       if ((MO.isReg() && ((MO.isDef() && MO.getReg() != Reg) || !MO.isDef())) ||
     461             :           (!MO.isImm() && !MO.isReg()) || (MO.isImm() && Imm)) {
     462             :         Imm = nullptr;
     463             :         break;
     464             :       } else if (MO.isImm())
     465         320 :         Imm = &MO;
     466         787 :     }
     467             :     if (Imm)
     468             :       Inits[Imm->getImm()].push_front(&MI);
     469             :     else
     470             :       Clobbers.push_back(&MI);
     471             :   }
     472         264 : 
     473             :   for (auto &Init : Inits) {
     474             :     auto &Defs = Init.second;
     475             : 
     476             :     for (auto I1 = Defs.begin(), E = Defs.end(); I1 != E; ) {
     477             :       MachineInstr *MI1 = *I1;
     478         264 : 
     479             :       for (auto I2 = std::next(I1); I2 != E; ) {
     480             :         MachineInstr *MI2 = *I2;
     481         203 : 
     482         203 :         // Check any possible interference
     483         203 :         auto intereferes = [&](MachineBasicBlock::iterator From,
     484             :                                MachineBasicBlock::iterator To) -> bool {
     485             : 
     486             :           assert(MDT.dominates(&*To, &*From));
     487             : 
     488             :           auto interferes = [&MDT, From, To](MachineInstr* &Clobber) -> bool {
     489         184 :             const MachineBasicBlock *MBBFrom = From->getParent();
     490         184 :             const MachineBasicBlock *MBBTo = To->getParent();
     491             :             bool MayClobberFrom = isReachable(Clobber, &*From, MBBTo, MDT);
     492             :             bool MayClobberTo = isReachable(Clobber, &*To, MBBTo, MDT);
     493             :             if (!MayClobberFrom && !MayClobberTo)
     494             :               return false;
     495             :             if ((MayClobberFrom && !MayClobberTo) ||
     496             :                 (!MayClobberFrom && MayClobberTo))
     497           1 :               return true;
     498             :             // Both can clobber, this is not an interference only if both are
     499             :             // dominated by Clobber and belong to the same block or if Clobber
     500             :             // properly dominates To, given that To >> From, so it dominates
     501             :             // both and located in a common dominator.
     502             :             return !((MBBFrom == MBBTo &&
     503             :                       MDT.dominates(Clobber, &*From) &&
     504             :                       MDT.dominates(Clobber, &*To)) ||
     505             :                      MDT.properlyDominates(Clobber->getParent(), MBBTo));
     506             :           };
     507          18 : 
     508             :           return (llvm::any_of(Clobbers, interferes)) ||
     509          47 :                  (llvm::any_of(Inits, [&](InitListMap::value_type &C) {
     510          32 :                     return C.first != Init.first &&
     511          62 :                            llvm::any_of(C.second, interferes);
     512             :                   }));
     513             :         };
     514          30 : 
     515             :         if (MDT.dominates(MI1, MI2)) {
     516             :           if (!intereferes(MI2, MI1)) {
     517          17 :             LLVM_DEBUG(dbgs()
     518          15 :                        << "Erasing from "
     519             :                        << printMBBReference(*MI2->getParent()) << " " << *MI2);
     520           2 :             MI2->eraseFromParent();
     521             :             Defs.erase(I2++);
     522             :             Changed = true;
     523           5 :             continue;
     524             :           }
     525             :         } else if (MDT.dominates(MI2, MI1)) {
     526          15 :           if (!intereferes(MI1, MI2)) {
     527          11 :             LLVM_DEBUG(dbgs()
     528             :                        << "Erasing from "
     529          30 :                        << printMBBReference(*MI1->getParent()) << " " << *MI1);
     530          20 :             MI1->eraseFromParent();
     531             :             Defs.erase(I1++);
     532             :             Changed = true;
     533             :             break;
     534             :           }
     535             :         } else {
     536             :           auto *MBB = MDT.findNearestCommonDominator(MI1->getParent(),
     537             :                                                      MI2->getParent());
     538             :           if (!MBB) {
     539             :             ++I2;
     540             :             continue;
     541             :           }
     542             : 
     543             :           MachineBasicBlock::iterator I = MBB->getFirstNonPHI();
     544             :           if (!intereferes(MI1, I) && !intereferes(MI2, I)) {
     545             :             LLVM_DEBUG(dbgs()
     546             :                        << "Erasing from "
     547             :                        << printMBBReference(*MI1->getParent()) << " " << *MI1
     548             :                        << "and moving from "
     549             :                        << printMBBReference(*MI2->getParent()) << " to "
     550             :                        << printMBBReference(*I->getParent()) << " " << *MI2);
     551             :             I->getParent()->splice(I, MI2->getParent(), MI2);
     552             :             MI1->eraseFromParent();
     553             :             Defs.erase(I1++);
     554             :             Changed = true;
     555             :             break;
     556             :           }
     557             :         }
     558             :         ++I2;
     559             :       }
     560         104 :       ++I1;
     561          48 :     }
     562             :   }
     563          20 : 
     564             :   if (Changed)
     565          20 :     MRI.clearKillFlags(Reg);
     566          16 : 
     567             :   return Changed;
     568             : }
     569             : 
     570           3 : bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
     571             :   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
     572             :   MachineRegisterInfo &MRI = MF.getRegInfo();
     573           3 :   const SIRegisterInfo *TRI = ST.getRegisterInfo();
     574             :   const SIInstrInfo *TII = ST.getInstrInfo();
     575           4 :   MDT = &getAnalysis<MachineDominatorTree>();
     576           0 : 
     577             :   SmallVector<MachineInstr *, 16> Worklist;
     578             : 
     579             :   for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
     580           0 :                                                   BI != BE; ++BI) {
     581             :     MachineBasicBlock &MBB = *BI;
     582             :     for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
     583           1 :          I != E; ++I) {
     584             :       MachineInstr &MI = *I;
     585             : 
     586           4 :       switch (MI.getOpcode()) {
     587             :       default:
     588           4 :         continue;
     589             :       case AMDGPU::COPY:
     590           0 :       case AMDGPU::WQM:
     591             :       case AMDGPU::WWM: {
     592             :         // If the destination register is a physical register there isn't really
     593           4 :         // much we can do to fix this.
     594           4 :         if (!TargetRegisterInfo::isVirtualRegister(MI.getOperand(0).getReg()))
     595             :           continue;
     596             : 
     597             :         const TargetRegisterClass *SrcRC, *DstRC;
     598             :         std::tie(SrcRC, DstRC) = getCopyRegClasses(MI, *TRI, MRI);
     599             :         if (isVGPRToSGPRCopy(SrcRC, DstRC, *TRI)) {
     600             :           unsigned SrcReg = MI.getOperand(1).getReg();
     601           2 :           if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) {
     602           1 :             TII->moveToVALU(MI, MDT);
     603             :             break;
     604             :           }
     605           1 : 
     606             :           MachineInstr *DefMI = MRI.getVRegDef(SrcReg);
     607             :           unsigned SMovOp;
     608             :           int64_t Imm;
     609             :           // If we are just copying an immediate, we can replace the copy with
     610             :           // s_mov_b32.
     611             :           if (isSafeToFoldImmIntoCopy(&MI, DefMI, TII, SMovOp, Imm)) {
     612             :             MI.getOperand(1).ChangeToImmediate(Imm);
     613             :             MI.addImplicitDefUseOperands(MF);
     614           1 :             MI.setDesc(TII->get(SMovOp));
     615           1 :             break;
     616             :           }
     617           1 :           TII->moveToVALU(MI, MDT);
     618             :         } else if (isSGPRToVGPRCopy(SrcRC, DstRC, *TRI)) {
     619             :           tryChangeVGPRtoSGPRinCopy(MI, TRI, TII);
     620       19722 :         }
     621       19722 : 
     622       19722 :         break;
     623       19722 :       }
     624       19722 :       case AMDGPU::PHI: {
     625       19722 :         unsigned Reg = MI.getOperand(0).getReg();
     626             :         if (!TRI->isSGPRClass(MRI.getRegClass(Reg)))
     627             :           break;
     628             : 
     629             :         // We don't need to fix the PHI if the common dominator of the
     630       41954 :         // two incoming blocks terminates with a uniform branch.
     631             :         bool HasVGPROperand = phiHasVGPROperands(MI, MRI, TRI, TII);
     632             :         if (MI.getNumExplicitOperands() == 5 && !HasVGPROperand) {
     633      647636 :           MachineBasicBlock *MBB0 = MI.getOperand(2).getMBB();
     634             :           MachineBasicBlock *MBB1 = MI.getOperand(4).getMBB();
     635             : 
     636     1250808 :           if (!predsHasDivergentTerminator(MBB0, TRI) &&
     637             :               !predsHasDivergentTerminator(MBB1, TRI)) {
     638             :             LLVM_DEBUG(dbgs()
     639      277239 :                        << "Not fixing PHI for uniform branch: " << MI << '\n');
     640             :             break;
     641             :           }
     642             :         }
     643             : 
     644      554478 :         // If a PHI node defines an SGPR and any of its operands are VGPRs,
     645       11067 :         // then we need to move it to the VALU.
     646             :         //
     647             :         // Also, if a PHI node defines an SGPR and has all SGPR operands
     648      266172 :         // we must move it to the VALU, because the SGPR operands will
     649      266172 :         // all end up being assigned the same register, which means
     650       32023 :         // there is a potential for a conflict if different threads take
     651       32023 :         // different control flow paths.
     652         376 :         //
     653         403 :         // For Example:
     654             :         //
     655             :         // sgpr0 = def;
     656       31647 :         // ...
     657             :         // sgpr1 = def;
     658             :         // ...
     659             :         // sgpr2 = PHI sgpr0, sgpr1
     660             :         // use sgpr2;
     661       31647 :         //
     662          54 :         // Will Become:
     663          27 :         //
     664          27 :         // sgpr2 = def;
     665             :         // ...
     666             :         // sgpr2 = def;
     667       31620 :         // ...
     668      234149 :         // use sgpr2
     669       42819 :         //
     670             :         // The one exception to this rule is when one of the operands
     671             :         // is defined by a SI_BREAK, SI_IF_BREAK, or SI_ELSE_BREAK
     672             :         // instruction.  In this case, there we know the program will
     673             :         // never enter the second block (the loop) without entering
     674        3271 :         // the first block (where the condition is computed), so there
     675        3271 :         // is no chance for values to be over-written.
     676        3271 : 
     677             :         SmallSet<unsigned, 8> Visited;
     678             :         if (HasVGPROperand || !phiHasBreakDef(MI, MRI, Visited)) {
     679             :           LLVM_DEBUG(dbgs() << "Fixing PHI: " << MI);
     680             :           TII->moveToVALU(MI, MDT);
     681         432 :         }
     682         432 :         break;
     683         417 :       }
     684         417 :       case AMDGPU::REG_SEQUENCE:
     685             :         if (TRI->hasVGPRs(TII->getOpRegClass(MI, 0)) ||
     686         737 :             !hasVGPROperands(MI, TRI)) {
     687             :           foldVGPRCopyIntoRegSequence(MI, TRI, TII, MRI);
     688             :           continue;
     689             :         }
     690             : 
     691             :         LLVM_DEBUG(dbgs() << "Fixing REG_SEQUENCE: " << MI);
     692             : 
     693             :         TII->moveToVALU(MI, MDT);
     694             :         break;
     695             :       case AMDGPU::INSERT_SUBREG: {
     696             :         const TargetRegisterClass *DstRC, *Src0RC, *Src1RC;
     697             :         DstRC = MRI.getRegClass(MI.getOperand(0).getReg());
     698             :         Src0RC = MRI.getRegClass(MI.getOperand(1).getReg());
     699             :         Src1RC = MRI.getRegClass(MI.getOperand(2).getReg());
     700             :         if (TRI->isSGPRClass(DstRC) &&
     701             :             (TRI->hasVGPRs(Src0RC) || TRI->hasVGPRs(Src1RC))) {
     702             :           LLVM_DEBUG(dbgs() << " Fixing INSERT_SUBREG: " << MI);
     703             :           TII->moveToVALU(MI, MDT);
     704             :         }
     705             :         break;
     706             :       }
     707             :       }
     708             :     }
     709             :   }
     710             : 
     711             :   if (MF.getTarget().getOptLevel() > CodeGenOpt::None && EnableM0Merge)
     712             :     hoistAndMergeSGPRInits(AMDGPU::M0, MRI, *MDT);
     713             : 
     714             :   return true;
     715             : }

Generated by: LCOV version 1.13