LCOV - code coverage report
Current view: top level - lib/Target/AMDGPU - SIFixSGPRCopies.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 233 252 92.5 %
Date: 2018-02-23 15:42:53 Functions: 27 28 96.4 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===- SIFixSGPRCopies.cpp - Remove potential VGPR => SGPR copies ---------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : /// \file
      11             : /// Copies from VGPR to SGPR registers are illegal and the register coalescer
      12             : /// will sometimes generate these illegal copies in situations like this:
      13             : ///
      14             : ///  Register Class <vsrc> is the union of <vgpr> and <sgpr>
      15             : ///
      16             : /// BB0:
      17             : ///   %0 <sgpr> = SCALAR_INST
      18             : ///   %1 <vsrc> = COPY %0 <sgpr>
      19             : ///    ...
      20             : ///    BRANCH %cond BB1, BB2
      21             : ///  BB1:
      22             : ///    %2 <vgpr> = VECTOR_INST
      23             : ///    %3 <vsrc> = COPY %2 <vgpr>
      24             : ///  BB2:
      25             : ///    %4 <vsrc> = PHI %1 <vsrc>, <%bb.0>, %3 <vrsc>, <%bb.1>
      26             : ///    %5 <vgpr> = VECTOR_INST %4 <vsrc>
      27             : ///
      28             : ///
      29             : /// The coalescer will begin at BB0 and eliminate its copy, then the resulting
      30             : /// code will look like this:
      31             : ///
      32             : /// BB0:
      33             : ///   %0 <sgpr> = SCALAR_INST
      34             : ///    ...
      35             : ///    BRANCH %cond BB1, BB2
      36             : /// BB1:
      37             : ///   %2 <vgpr> = VECTOR_INST
      38             : ///   %3 <vsrc> = COPY %2 <vgpr>
      39             : /// BB2:
      40             : ///   %4 <sgpr> = PHI %0 <sgpr>, <%bb.0>, %3 <vsrc>, <%bb.1>
      41             : ///   %5 <vgpr> = VECTOR_INST %4 <sgpr>
      42             : ///
      43             : /// Now that the result of the PHI instruction is an SGPR, the register
      44             : /// allocator is now forced to constrain the register class of %3 to
      45             : /// <sgpr> so we end up with final code like this:
      46             : ///
      47             : /// BB0:
      48             : ///   %0 <sgpr> = SCALAR_INST
      49             : ///    ...
      50             : ///    BRANCH %cond BB1, BB2
      51             : /// BB1:
      52             : ///   %2 <vgpr> = VECTOR_INST
      53             : ///   %3 <sgpr> = COPY %2 <vgpr>
      54             : /// BB2:
      55             : ///   %4 <sgpr> = PHI %0 <sgpr>, <%bb.0>, %3 <sgpr>, <%bb.1>
      56             : ///   %5 <vgpr> = VECTOR_INST %4 <sgpr>
      57             : ///
      58             : /// Now this code contains an illegal copy from a VGPR to an SGPR.
      59             : ///
      60             : /// In order to avoid this problem, this pass searches for PHI instructions
      61             : /// which define a <vsrc> register and constrains its definition class to
      62             : /// <vgpr> if the user of the PHI's definition register is a vector instruction.
      63             : /// If the PHI's definition class is constrained to <vgpr> then the coalescer
      64             : /// will be unable to perform the COPY removal from the above example  which
      65             : /// ultimately led to the creation of an illegal COPY.
      66             : //===----------------------------------------------------------------------===//
      67             : 
      68             : #include "AMDGPU.h"
      69             : #include "AMDGPUSubtarget.h"
      70             : #include "SIInstrInfo.h"
      71             : #include "SIRegisterInfo.h"
      72             : #include "llvm/ADT/DenseSet.h"
      73             : #include "llvm/ADT/STLExtras.h"
      74             : #include "llvm/ADT/SmallSet.h"
      75             : #include "llvm/ADT/SmallVector.h"
      76             : #include "llvm/CodeGen/MachineBasicBlock.h"
      77             : #include "llvm/CodeGen/MachineDominators.h"
      78             : #include "llvm/CodeGen/MachineFunction.h"
      79             : #include "llvm/CodeGen/MachineFunctionPass.h"
      80             : #include "llvm/CodeGen/MachineInstr.h"
      81             : #include "llvm/CodeGen/MachineInstrBuilder.h"
      82             : #include "llvm/CodeGen/MachineOperand.h"
      83             : #include "llvm/CodeGen/MachineRegisterInfo.h"
      84             : #include "llvm/CodeGen/MachinePostDominators.h"
      85             : #include "llvm/CodeGen/TargetRegisterInfo.h"
      86             : #include "llvm/Pass.h"
      87             : #include "llvm/Support/CodeGen.h"
      88             : #include "llvm/Support/CommandLine.h"
      89             : #include "llvm/Support/Debug.h"
      90             : #include "llvm/Support/raw_ostream.h"
      91             : #include "llvm/Target/TargetMachine.h"
      92             : #include <cassert>
      93             : #include <cstdint>
      94             : #include <iterator>
      95             : #include <list>
      96             : #include <map>
      97             : #include <tuple>
      98             : #include <utility>
      99             : 
     100             : using namespace llvm;
     101             : 
     102             : #define DEBUG_TYPE "si-fix-sgpr-copies"
     103             : 
     104       81686 : static cl::opt<bool> EnableM0Merge(
     105             :   "amdgpu-enable-merge-m0",
     106       81686 :   cl::desc("Merge and hoist M0 initializations"),
     107      245058 :   cl::init(false));
     108             : 
     109             : namespace {
     110             : 
     111        5028 : class SIFixSGPRCopies : public MachineFunctionPass {
     112             :   MachineDominatorTree *MDT;
     113             :   MachinePostDominatorTree *MPDT;
     114             :   DenseMap<MachineBasicBlock *, SetVector<MachineBasicBlock*>> PDF;
     115             :   void computePDF(MachineFunction * MF);
     116             : #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
     117             :   void printPDF();
     118             : #endif
     119             : public:
     120             :   static char ID;
     121             : 
     122        3368 :   SIFixSGPRCopies() : MachineFunctionPass(ID) {}
     123             : 
     124             :   bool runOnMachineFunction(MachineFunction &MF) override;
     125             : 
     126           3 :   StringRef getPassName() const override { return "SI Fix SGPR copies"; }
     127             : 
     128        1676 :   void getAnalysisUsage(AnalysisUsage &AU) const override {
     129             :     AU.addRequired<MachineDominatorTree>();
     130             :     AU.addPreserved<MachineDominatorTree>();
     131             :     AU.addRequired<MachinePostDominatorTree>();
     132             :     AU.addPreserved<MachinePostDominatorTree>();
     133        1676 :     AU.setPreservesCFG();
     134        1676 :     MachineFunctionPass::getAnalysisUsage(AU);
     135        1676 :   }
     136             : };
     137             : 
     138             : } // end anonymous namespace
     139             : 
     140       59438 : INITIALIZE_PASS_BEGIN(SIFixSGPRCopies, DEBUG_TYPE,
     141             :                      "SI Fix SGPR copies", false, false)
     142       59438 : INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
     143      279728 : INITIALIZE_PASS_END(SIFixSGPRCopies, DEBUG_TYPE,
     144             :                      "SI Fix SGPR copies", false, false)
     145             : 
     146             : char SIFixSGPRCopies::ID = 0;
     147             : 
     148             : char &llvm::SIFixSGPRCopiesID = SIFixSGPRCopies::ID;
     149             : 
     150           0 : FunctionPass *llvm::createSIFixSGPRCopiesPass() {
     151           0 :   return new SIFixSGPRCopies();
     152             : }
     153             : 
     154       23553 : static bool hasVGPROperands(const MachineInstr &MI, const SIRegisterInfo *TRI) {
     155       23553 :   const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
     156      189942 :   for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
     157      433716 :     if (!MI.getOperand(i).isReg() ||
     158       96960 :         !TargetRegisterInfo::isVirtualRegister(MI.getOperand(i).getReg()))
     159       71418 :       continue;
     160             : 
     161       96960 :     if (TRI->hasVGPRs(MRI.getRegClass(MI.getOperand(i).getReg())))
     162             :       return true;
     163             :   }
     164             :   return false;
     165             : }
     166             : 
     167             : static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
     168      198222 : getCopyRegClasses(const MachineInstr &Copy,
     169             :                   const SIRegisterInfo &TRI,
     170             :                   const MachineRegisterInfo &MRI) {
     171      198222 :   unsigned DstReg = Copy.getOperand(0).getReg();
     172      198222 :   unsigned SrcReg = Copy.getOperand(1).getReg();
     173             : 
     174             :   const TargetRegisterClass *SrcRC =
     175      223157 :     TargetRegisterInfo::isVirtualRegister(SrcReg) ?
     176             :     MRI.getRegClass(SrcReg) :
     177             :     TRI.getPhysRegClass(SrcReg);
     178             : 
     179             :   // We don't really care about the subregister here.
     180             :   // SrcRC = TRI.getSubRegClass(SrcRC, Copy.getOperand(1).getSubReg());
     181             : 
     182             :   const TargetRegisterClass *DstRC =
     183      198222 :     TargetRegisterInfo::isVirtualRegister(DstReg) ?
     184             :     MRI.getRegClass(DstReg) :
     185             :     TRI.getPhysRegClass(DstReg);
     186             : 
     187      198222 :   return std::make_pair(SrcRC, DstRC);
     188             : }
     189             : 
     190      195382 : static bool isVGPRToSGPRCopy(const TargetRegisterClass *SrcRC,
     191             :                              const TargetRegisterClass *DstRC,
     192             :                              const SIRegisterInfo &TRI) {
     193      195382 :   return TRI.isSGPRClass(DstRC) && TRI.hasVGPRs(SrcRC);
     194             : }
     195             : 
     196      166373 : static bool isSGPRToVGPRCopy(const TargetRegisterClass *SrcRC,
     197             :                              const TargetRegisterClass *DstRC,
     198             :                              const SIRegisterInfo &TRI) {
     199      166373 :   return TRI.isSGPRClass(SrcRC) && TRI.hasVGPRs(DstRC);
     200             : }
     201             : 
     202       36976 : static bool tryChangeVGPRtoSGPRinCopy(MachineInstr &MI,
     203             :                                       const SIRegisterInfo *TRI,
     204             :                                       const SIInstrInfo *TII) {
     205       36976 :   MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
     206       36976 :   auto &Src = MI.getOperand(1);
     207       36976 :   unsigned DstReg = MI.getOperand(0).getReg();
     208       36976 :   unsigned SrcReg = Src.getReg();
     209       73952 :   if (!TargetRegisterInfo::isVirtualRegister(SrcReg) ||
     210             :       !TargetRegisterInfo::isVirtualRegister(DstReg))
     211             :     return false;
     212             : 
     213       75568 :   for (const auto &MO : MRI.reg_nodbg_operands(DstReg)) {
     214       73934 :     const auto *UseMI = MO.getParent();
     215       73934 :     if (UseMI == &MI)
     216       36976 :       continue;
     217       73503 :     if (MO.isDef() || UseMI->getParent() != MI.getParent() ||
     218      105882 :         UseMI->getOpcode() <= TargetOpcode::GENERIC_OP_END ||
     219       32379 :         !TII->isOperandLegal(*UseMI, UseMI->getOperandNo(&MO), &Src))
     220             :       return false;
     221             :   }
     222             :   // Change VGPR to SGPR destination.
     223        1634 :   MRI.setRegClass(DstReg, TRI->getEquivalentSGPRClass(MRI.getRegClass(DstReg)));
     224        1634 :   return true;
     225             : }
     226             : 
     227             : // Distribute an SGPR->VGPR copy of a REG_SEQUENCE into a VGPR REG_SEQUENCE.
     228             : //
     229             : // SGPRx = ...
     230             : // SGPRy = REG_SEQUENCE SGPRx, sub0 ...
     231             : // VGPRz = COPY SGPRy
     232             : //
     233             : // ==>
     234             : //
     235             : // VGPRx = COPY SGPRx
     236             : // VGPRz = REG_SEQUENCE VGPRx, sub0
     237             : //
     238             : // This exposes immediate folding opportunities when materializing 64-bit
     239             : // immediates.
     240       43174 : static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI,
     241             :                                         const SIRegisterInfo *TRI,
     242             :                                         const SIInstrInfo *TII,
     243             :                                         MachineRegisterInfo &MRI) {
     244             :   assert(MI.isRegSequence());
     245             : 
     246       43174 :   unsigned DstReg = MI.getOperand(0).getReg();
     247       43174 :   if (!TRI->isSGPRClass(MRI.getRegClass(DstReg)))
     248             :     return false;
     249             : 
     250       21564 :   if (!MRI.hasOneUse(DstReg))
     251             :     return false;
     252             : 
     253       33780 :   MachineInstr &CopyUse = *MRI.use_instr_begin(DstReg);
     254       16890 :   if (!CopyUse.isCopy())
     255             :     return false;
     256             : 
     257             :   // It is illegal to have vreg inputs to a physreg defining reg_sequence.
     258        5700 :   if (TargetRegisterInfo::isPhysicalRegister(CopyUse.getOperand(0).getReg()))
     259             :     return false;
     260             : 
     261             :   const TargetRegisterClass *SrcRC, *DstRC;
     262        5680 :   std::tie(SrcRC, DstRC) = getCopyRegClasses(CopyUse, *TRI, MRI);
     263             : 
     264        2840 :   if (!isSGPRToVGPRCopy(SrcRC, DstRC, *TRI))
     265             :     return false;
     266             : 
     267        2444 :   if (tryChangeVGPRtoSGPRinCopy(CopyUse, TRI, TII))
     268             :     return true;
     269             : 
     270             :   // TODO: Could have multiple extracts?
     271        2397 :   unsigned SubReg = CopyUse.getOperand(1).getSubReg();
     272        2397 :   if (SubReg != AMDGPU::NoSubRegister)
     273             :     return false;
     274             : 
     275        2397 :   MRI.setRegClass(DstReg, DstRC);
     276             : 
     277             :   // SGPRx = ...
     278             :   // SGPRy = REG_SEQUENCE SGPRx, sub0 ...
     279             :   // VGPRz = COPY SGPRy
     280             : 
     281             :   // =>
     282             :   // VGPRx = COPY SGPRx
     283             :   // VGPRz = REG_SEQUENCE VGPRx, sub0
     284             : 
     285        2397 :   MI.getOperand(0).setReg(CopyUse.getOperand(0).getReg());
     286             : 
     287       10769 :   for (unsigned I = 1, N = MI.getNumOperands(); I != N; I += 2) {
     288       16744 :     unsigned SrcReg = MI.getOperand(I).getReg();
     289             :     unsigned SrcSubReg = MI.getOperand(I).getSubReg();
     290             : 
     291             :     const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg);
     292             :     assert(TRI->isSGPRClass(SrcRC) &&
     293             :            "Expected SGPR REG_SEQUENCE to only have SGPR inputs");
     294             : 
     295        8372 :     SrcRC = TRI->getSubRegClass(SrcRC, SrcSubReg);
     296        8372 :     const TargetRegisterClass *NewSrcRC = TRI->getEquivalentVGPRClass(SrcRC);
     297             : 
     298        8372 :     unsigned TmpReg = MRI.createVirtualRegister(NewSrcRC);
     299             : 
     300        8372 :     BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(), TII->get(AMDGPU::COPY),
     301        8372 :             TmpReg)
     302        8372 :         .add(MI.getOperand(I));
     303             : 
     304        8372 :     MI.getOperand(I).setReg(TmpReg);
     305             :   }
     306             : 
     307        2397 :   CopyUse.eraseFromParent();
     308        2397 :   return true;
     309             : }
     310             : 
     311         371 : static bool phiHasVGPROperands(const MachineInstr &PHI,
     312             :                                const MachineRegisterInfo &MRI,
     313             :                                const SIRegisterInfo *TRI,
     314             :                                const SIInstrInfo *TII) {
     315        1889 :   for (unsigned i = 1; i < PHI.getNumOperands(); i += 2) {
     316        1526 :     unsigned Reg = PHI.getOperand(i).getReg();
     317         763 :     if (TRI->hasVGPRs(MRI.getRegClass(Reg)))
     318             :       return true;
     319             :   }
     320             :   return false;
     321             : }
     322             : 
     323         260 : static bool phiHasBreakDef(const MachineInstr &PHI,
     324             :                            const MachineRegisterInfo &MRI,
     325             :                            SmallSet<unsigned, 8> &Visited) {
     326         960 :   for (unsigned i = 1; i < PHI.getNumOperands(); i += 2) {
     327         962 :     unsigned Reg = PHI.getOperand(i).getReg();
     328         481 :     if (Visited.count(Reg))
     329          28 :       continue;
     330             : 
     331         453 :     Visited.insert(Reg);
     332             : 
     333         453 :     MachineInstr *DefInstr = MRI.getVRegDef(Reg);
     334         906 :     switch (DefInstr->getOpcode()) {
     335             :     default:
     336             :       break;
     337             :     case AMDGPU::SI_BREAK:
     338             :     case AMDGPU::SI_IF_BREAK:
     339             :     case AMDGPU::SI_ELSE_BREAK:
     340         131 :       return true;
     341          70 :     case AMDGPU::PHI:
     342          70 :       if (phiHasBreakDef(*DefInstr, MRI, Visited))
     343             :         return true;
     344             :     }
     345             :   }
     346             :   return false;
     347             : }
     348             : 
     349         472 : static bool hasTerminatorThatModifiesExec(const MachineBasicBlock &MBB,
     350             :                                           const TargetRegisterInfo &TRI) {
     351         472 :   for (MachineBasicBlock::const_iterator I = MBB.getFirstTerminator(),
     352         914 :        E = MBB.end(); I != E; ++I) {
     353         693 :     if (I->modifiesRegister(AMDGPU::EXEC, &TRI))
     354         251 :       return true;
     355             :   }
     356         221 :   return false;
     357             : }
     358             : 
     359       31690 : static bool isSafeToFoldImmIntoCopy(const MachineInstr *Copy,
     360             :                                     const MachineInstr *MoveImm,
     361             :                                     const SIInstrInfo *TII,
     362             :                                     unsigned &SMovOp,
     363             :                                     int64_t &Imm) {
     364       63380 :   if (Copy->getOpcode() != AMDGPU::COPY)
     365             :     return false;
     366             : 
     367       31682 :   if (!MoveImm->isMoveImmediate())
     368             :     return false;
     369             : 
     370             :   const MachineOperand *ImmOp =
     371             :       TII->getNamedOperand(*MoveImm, AMDGPU::OpName::src0);
     372         386 :   if (!ImmOp->isImm())
     373             :     return false;
     374             : 
     375             :   // FIXME: Handle copies with sub-regs.
     376           0 :   if (Copy->getOperand(0).getSubReg())
     377             :     return false;
     378             : 
     379           0 :   switch (MoveImm->getOpcode()) {
     380             :   default:
     381             :     return false;
     382           0 :   case AMDGPU::V_MOV_B32_e32:
     383           0 :     SMovOp = AMDGPU::S_MOV_B32;
     384           0 :     break;
     385           0 :   case AMDGPU::V_MOV_B64_PSEUDO:
     386           0 :     SMovOp = AMDGPU::S_MOV_B64;
     387           0 :     break;
     388             :   }
     389           0 :   Imm = ImmOp->getImm();
     390           0 :   return true;
     391             : }
     392             : 
     393             : template <class UnaryPredicate>
     394         184 : bool searchPredecessors(const MachineBasicBlock *MBB,
     395             :                         const MachineBasicBlock *CutOff,
     396             :                         UnaryPredicate Predicate) {
     397         184 :   if (MBB == CutOff)
     398             :     return false;
     399             : 
     400             :   DenseSet<const MachineBasicBlock *> Visited;
     401             :   SmallVector<MachineBasicBlock *, 4> Worklist(MBB->pred_begin(),
     402             :                                                MBB->pred_end());
     403             : 
     404         132 :   while (!Worklist.empty()) {
     405             :     MachineBasicBlock *MBB = Worklist.pop_back_val();
     406             : 
     407         152 :     if (!Visited.insert(MBB).second)
     408          10 :       continue;
     409          66 :     if (MBB == CutOff)
     410          56 :       continue;
     411          10 :     if (Predicate(MBB))
     412             :       return true;
     413             : 
     414          10 :     Worklist.append(MBB->pred_begin(), MBB->pred_end());
     415             :   }
     416             : 
     417             :   return false;
     418             : }
     419             : 
     420             : // Checks if there is potential path From instruction To instruction.
     421             : // If CutOff is specified and it sits in between of that path we ignore
     422             : // a higher portion of the path and report it is not reachable.
     423         264 : static bool isReachable(const MachineInstr *From,
     424             :                         const MachineInstr *To,
     425             :                         const MachineBasicBlock *CutOff,
     426             :                         MachineDominatorTree &MDT) {
     427             :   // If either From block dominates To block or instructions are in the same
     428             :   // block and From is higher.
     429         264 :   if (MDT.dominates(From, To))
     430             :     return true;
     431             : 
     432         203 :   const MachineBasicBlock *MBBFrom = From->getParent();
     433         203 :   const MachineBasicBlock *MBBTo = To->getParent();
     434         203 :   if (MBBFrom == MBBTo)
     435             :     return false;
     436             : 
     437             :   // Instructions are in different blocks, do predecessor search.
     438             :   // We should almost never get here since we do not usually produce M0 stores
     439             :   // other than -1.
     440         184 :   return searchPredecessors(MBBTo, CutOff, [MBBFrom]
     441         194 :            (const MachineBasicBlock *MBB) { return MBB == MBBFrom; });
     442             : }
     443             : 
     444             : // Hoist and merge identical SGPR initializations into a common predecessor.
     445             : // This is intended to combine M0 initializations, but can work with any
     446             : // SGPR. A VGPR cannot be processed since we cannot guarantee vector
     447             : // executioon.
     448           1 : static bool hoistAndMergeSGPRInits(unsigned Reg,
     449             :                                    const MachineRegisterInfo &MRI,
     450             :                                    MachineDominatorTree &MDT) {
     451             :   // List of inits by immediate value.
     452             :   using InitListMap = std::map<unsigned, std::list<MachineInstr *>>;
     453             :   InitListMap Inits;
     454             :   // List of clobbering instructions.
     455             :   SmallVector<MachineInstr*, 8> Clobbers;
     456             :   bool Changed = false;
     457             : 
     458          18 :   for (auto &MI : MRI.def_instructions(Reg)) {
     459             :     MachineOperand *Imm = nullptr;
     460          77 :     for (auto &MO: MI.operands()) {
     461          17 :       if ((MO.isReg() && ((MO.isDef() && MO.getReg() != Reg) || !MO.isDef())) ||
     462          47 :           (!MO.isImm() && !MO.isReg()) || (MO.isImm() && Imm)) {
     463             :         Imm = nullptr;
     464             :         break;
     465          30 :       } else if (MO.isImm())
     466             :         Imm = &MO;
     467             :     }
     468          17 :     if (Imm)
     469          30 :       Inits[Imm->getImm()].push_front(&MI);
     470             :     else
     471           2 :       Clobbers.push_back(&MI);
     472             :   }
     473             : 
     474           5 :   for (auto &Init : Inits) {
     475             :     auto &Defs = Init.second;
     476             : 
     477          19 :     for (auto I1 = Defs.begin(), E = Defs.end(); I1 != E; ) {
     478          11 :       MachineInstr *MI1 = *I1;
     479             : 
     480          30 :       for (auto I2 = std::next(I1); I2 != E; ) {
     481          20 :         MachineInstr *MI2 = *I2;
     482             : 
     483             :         // Check any possible interference
     484             :         auto intereferes = [&](MachineBasicBlock::iterator From,
     485          21 :                                MachineBasicBlock::iterator To) -> bool {
     486             : 
     487             :           assert(MDT.dominates(&*To, &*From));
     488             : 
     489         448 :           auto interferes = [&MDT, From, To](MachineInstr* &Clobber) -> bool {
     490         132 :             const MachineBasicBlock *MBBFrom = From->getParent();
     491         132 :             const MachineBasicBlock *MBBTo = To->getParent();
     492         132 :             bool MayClobberFrom = isReachable(Clobber, &*From, MBBTo, MDT);
     493         132 :             bool MayClobberTo = isReachable(Clobber, &*To, MBBTo, MDT);
     494         132 :             if (!MayClobberFrom && !MayClobberTo)
     495             :               return false;
     496          36 :             if ((MayClobberFrom && !MayClobberTo) ||
     497             :                 (!MayClobberFrom && MayClobberTo))
     498             :               return true;
     499             :             // Both can clobber, this is not an interference only if both are
     500             :             // dominated by Clobber and belong to the same block or if Clobber
     501             :             // properly dominates To, given that To >> From, so it dominates
     502             :             // both and located in a common dominator.
     503          50 :             return !((MBBFrom == MBBTo &&
     504          12 :                       MDT.dominates(Clobber, &*From) &&
     505           6 :                       MDT.dominates(Clobber, &*To)) ||
     506          19 :                      MDT.properlyDominates(Clobber->getParent(), MBBTo));
     507          21 :           };
     508             : 
     509          60 :           return (llvm::any_of(Clobbers, interferes)) ||
     510          74 :                  (llvm::any_of(Inits, [&](InitListMap::value_type &C) {
     511         122 :                     return C.first != Init.first &&
     512          48 :                            llvm::any_of(C.second, interferes);
     513          77 :                   }));
     514          20 :         };
     515             : 
     516          20 :         if (MDT.dominates(MI1, MI2)) {
     517          19 :           if (!intereferes(MI2, MI1)) {
     518             :             DEBUG(dbgs() << "Erasing from "
     519             :                          << printMBBReference(*MI2->getParent()) << " "
     520             :                          << *MI2);
     521           3 :             MI2->eraseFromParent();
     522             :             Defs.erase(I2++);
     523             :             Changed = true;
     524           6 :             continue;
     525             :           }
     526           4 :         } else if (MDT.dominates(MI2, MI1)) {
     527           0 :           if (!intereferes(MI1, MI2)) {
     528             :             DEBUG(dbgs() << "Erasing from "
     529             :                          << printMBBReference(*MI1->getParent()) << " "
     530             :                          << *MI1);
     531           0 :             MI1->eraseFromParent();
     532             :             Defs.erase(I1++);
     533             :             Changed = true;
     534           0 :             break;
     535             :           }
     536             :         } else {
     537           4 :           auto *MBB = MDT.findNearestCommonDominator(MI1->getParent(),
     538             :                                                      MI2->getParent());
     539           4 :           if (!MBB) {
     540             :             ++I2;
     541           0 :             continue;
     542             :           }
     543             : 
     544           4 :           MachineBasicBlock::iterator I = MBB->getFirstNonPHI();
     545           5 :           if (!intereferes(MI1, I) && !intereferes(MI2, I)) {
     546             :             DEBUG(dbgs() << "Erasing from "
     547             :                          << printMBBReference(*MI1->getParent()) << " " << *MI1
     548             :                          << "and moving from "
     549             :                          << printMBBReference(*MI2->getParent()) << " to "
     550             :                          << printMBBReference(*I->getParent()) << " " << *MI2);
     551           2 :             I->getParent()->splice(I, MI2->getParent(), MI2);
     552           1 :             MI1->eraseFromParent();
     553             :             Defs.erase(I1++);
     554             :             Changed = true;
     555           1 :             break;
     556             :           }
     557             :         }
     558             :         ++I2;
     559             :       }
     560             :       ++I1;
     561             :     }
     562             :   }
     563             : 
     564           1 :   if (Changed)
     565           1 :     MRI.clearKillFlags(Reg);
     566             : 
     567           1 :   return Changed;
     568             : }
     569             : 
     570       16562 : void SIFixSGPRCopies::computePDF(MachineFunction *MF) {
     571             :   MachineFunction::iterator B = MF->begin();
     572             :   MachineFunction::iterator E = MF->end();
     573       35224 :   for (; B != E; ++B) {
     574       18662 :     if (B->succ_size() > 1) {
     575        2994 :       for (auto S : B->successors()) {
     576        1996 :         MachineDomTreeNode *runner = MPDT->getNode(&*S);
     577        3992 :         MachineDomTreeNode *sentinel = MPDT->getNode(&*B)->getIDom();
     578        4596 :         while (runner && runner != sentinel) {
     579        2600 :           PDF[runner->getBlock()].insert(&*B);
     580        1300 :           runner = runner->getIDom();
     581             :         }
     582             :       }
     583             :     }
     584             :   }
     585       16562 : }
     586             : 
     587             : #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
     588             : void SIFixSGPRCopies::printPDF() {
     589             :   dbgs() << "\n######## PostDominanceFrontiers set #########\n";
     590             :   for (auto &I : PDF) {
     591             :     dbgs() << "PDF[ " << I.first->getNumber() << "] : ";
     592             :     for (auto &J : I.second) {
     593             :       dbgs() << J->getNumber() << ' ';
     594             :     }
     595             :     dbgs() << '\n';
     596             :   }
     597             :   dbgs() << "\n##############################################\n";
     598             : }
     599             : #endif
     600             : 
     601       16562 : bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
     602       16562 :   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
     603       16562 :   MachineRegisterInfo &MRI = MF.getRegInfo();
     604             :   const SIRegisterInfo *TRI = ST.getRegisterInfo();
     605             :   const SIInstrInfo *TII = ST.getInstrInfo();
     606       16562 :   MDT = &getAnalysis<MachineDominatorTree>();
     607       16562 :   MPDT = &getAnalysis<MachinePostDominatorTree>();
     608       16562 :   PDF.clear();
     609       16562 :   computePDF(&MF);
     610             :   DEBUG(printPDF());
     611             : 
     612             :   SmallVector<MachineInstr *, 16> Worklist;
     613             : 
     614             :   for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
     615       35224 :                                                   BI != BE; ++BI) {
     616             :     MachineBasicBlock &MBB = *BI;
     617       18662 :     for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
     618      519876 :          I != E; ++I) {
     619             :       MachineInstr &MI = *I;
     620             : 
     621     1002428 :       switch (MI.getOpcode()) {
     622      251742 :       default:
     623      251742 :         continue;
     624      201103 :       case AMDGPU::COPY:
     625             :       case AMDGPU::WQM:
     626             :       case AMDGPU::WWM: {
     627             :         // If the destination register is a physical register there isn't really
     628             :         // much we can do to fix this.
     629      402206 :         if (!TargetRegisterInfo::isVirtualRegister(MI.getOperand(0).getReg()))
     630        5721 :           continue;
     631             : 
     632             :         const TargetRegisterClass *SrcRC, *DstRC;
     633      390764 :         std::tie(SrcRC, DstRC) = getCopyRegClasses(MI, *TRI, MRI);
     634      195382 :         if (isVGPRToSGPRCopy(SrcRC, DstRC, *TRI)) {
     635       31849 :           unsigned SrcReg = MI.getOperand(1).getReg();
     636       31849 :           if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) {
     637         159 :             TII->moveToVALU(MI);
     638         159 :             break;
     639             :           }
     640             : 
     641       31690 :           MachineInstr *DefMI = MRI.getVRegDef(SrcReg);
     642             :           unsigned SMovOp;
     643             :           int64_t Imm;
     644             :           // If we are just copying an immediate, we can replace the copy with
     645             :           // s_mov_b32.
     646       31690 :           if (isSafeToFoldImmIntoCopy(&MI, DefMI, TII, SMovOp, Imm)) {
     647           0 :             MI.getOperand(1).ChangeToImmediate(Imm);
     648           0 :             MI.addImplicitDefUseOperands(MF);
     649           0 :             MI.setDesc(TII->get(SMovOp));
     650             :             break;
     651             :           }
     652       31690 :           TII->moveToVALU(MI);
     653      163533 :         } else if (isSGPRToVGPRCopy(SrcRC, DstRC, *TRI)) {
     654       34532 :           tryChangeVGPRtoSGPRinCopy(MI, TRI, TII);
     655             :         }
     656             : 
     657             :         break;
     658             :       }
     659        3160 :       case AMDGPU::PHI: {
     660        3160 :         unsigned Reg = MI.getOperand(0).getReg();
     661        3160 :         if (!TRI->isSGPRClass(MRI.getRegClass(Reg)))
     662             :           break;
     663             : 
     664             :         // We don't need to fix the PHI if all the source blocks
     665             :         // have no divergent control dependecies
     666         371 :         bool HasVGPROperand = phiHasVGPROperands(MI, MRI, TRI, TII);
     667         371 :         if (!HasVGPROperand) {
     668             :           bool Uniform = true;
     669         367 :           MachineBasicBlock * Join = MI.getParent();
     670        4137 :           for (auto &O : MI.explicit_operands()) {
     671        1885 :             if (O.isMBB()) {
     672         759 :               MachineBasicBlock * Source = O.getMBB();
     673             :               SetVector<MachineBasicBlock*> &SourcePDF = PDF[Source];
     674             :               SetVector<MachineBasicBlock*> &JoinPDF   = PDF[Join];
     675         759 :               SetVector<MachineBasicBlock*> CDList;
     676        1331 :               for (auto &I : SourcePDF) {
     677        1484 :                 if (!JoinPDF.count(I) || /* back edge */MDT->dominates(Join, I)) {
     678         472 :                   if (hasTerminatorThatModifiesExec(*I, *TRI))
     679             :                     Uniform = false;
     680             :                 }
     681             :               }
     682             :             }
     683             :           }
     684         367 :           if (Uniform) {
     685             :             DEBUG(dbgs() << "Not fixing PHI for uniform branch: " << MI << '\n');
     686         177 :             break;
     687             :           }
     688             :         }
     689             : 
     690             :         // If a PHI node defines an SGPR and any of its operands are VGPRs,
     691             :         // then we need to move it to the VALU.
     692             :         //
     693             :         // Also, if a PHI node defines an SGPR and has all SGPR operands
     694             :         // we must move it to the VALU, because the SGPR operands will
     695             :         // all end up being assigned the same register, which means
     696             :         // there is a potential for a conflict if different threads take
     697             :         // different control flow paths.
     698             :         //
     699             :         // For Example:
     700             :         //
     701             :         // sgpr0 = def;
     702             :         // ...
     703             :         // sgpr1 = def;
     704             :         // ...
     705             :         // sgpr2 = PHI sgpr0, sgpr1
     706             :         // use sgpr2;
     707             :         //
     708             :         // Will Become:
     709             :         //
     710             :         // sgpr2 = def;
     711             :         // ...
     712             :         // sgpr2 = def;
     713             :         // ...
     714             :         // use sgpr2
     715             :         //
     716             :         // The one exception to this rule is when one of the operands
     717             :         // is defined by a SI_BREAK, SI_IF_BREAK, or SI_ELSE_BREAK
     718             :         // instruction.  In this case, there we know the program will
     719             :         // never enter the second block (the loop) without entering
     720             :         // the first block (where the condition is computed), so there
     721             :         // is no chance for values to be over-written.
     722             : 
     723         194 :         SmallSet<unsigned, 8> Visited;
     724         194 :         if (HasVGPROperand || !phiHasBreakDef(MI, MRI, Visited)) {
     725             :           DEBUG(dbgs() << "Fixing PHI: " << MI);
     726         103 :           TII->moveToVALU(MI);
     727             :         }
     728             :         break;
     729             :       }
     730       45163 :       case AMDGPU::REG_SEQUENCE:
     731      111890 :         if (TRI->hasVGPRs(TII->getOpRegClass(MI, 0)) ||
     732       23553 :             !hasVGPROperands(MI, TRI)) {
     733       43174 :           foldVGPRCopyIntoRegSequence(MI, TRI, TII, MRI);
     734       43174 :           continue;
     735             :         }
     736             : 
     737             :         DEBUG(dbgs() << "Fixing REG_SEQUENCE: " << MI);
     738             : 
     739        1989 :         TII->moveToVALU(MI);
     740        1989 :         break;
     741          46 :       case AMDGPU::INSERT_SUBREG: {
     742             :         const TargetRegisterClass *DstRC, *Src0RC, *Src1RC;
     743          46 :         DstRC = MRI.getRegClass(MI.getOperand(0).getReg());
     744          46 :         Src0RC = MRI.getRegClass(MI.getOperand(1).getReg());
     745          46 :         Src1RC = MRI.getRegClass(MI.getOperand(2).getReg());
     746          56 :         if (TRI->isSGPRClass(DstRC) &&
     747          20 :             (TRI->hasVGPRs(Src0RC) || TRI->hasVGPRs(Src1RC))) {
     748             :           DEBUG(dbgs() << " Fixing INSERT_SUBREG: " << MI);
     749           2 :           TII->moveToVALU(MI);
     750             :         }
     751             :         break;
     752      251742 :       }
     753             :       }
     754             :     }
     755             :   }
     756             : 
     757       32946 :   if (MF.getTarget().getOptLevel() > CodeGenOpt::None && EnableM0Merge)
     758           1 :     hoistAndMergeSGPRInits(AMDGPU::M0, MRI, *MDT);
     759             : 
     760       16562 :   return true;
     761      245058 : }

Generated by: LCOV version 1.13