LCOV - code coverage report
Current view: top level - lib/Target/AMDGPU - R600OptimizeVectorRegisters.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 168 172 97.7 %
Date: 2017-09-14 15:23:50 Functions: 21 21 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===- R600MergeVectorRegisters.cpp ---------------------------------------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : /// \file
      11             : /// This pass merges inputs of swizzeable instructions into vector sharing
      12             : /// common data and/or have enough undef subreg using swizzle abilities.
      13             : ///
      14             : /// For instance let's consider the following pseudo code :
      15             : /// vreg5<def> = REG_SEQ vreg1, sub0, vreg2, sub1, vreg3, sub2, undef, sub3
      16             : /// ...
      17             : /// vreg7<def> = REG_SEQ vreg1, sub0, vreg3, sub1, undef, sub2, vreg4, sub3
      18             : /// (swizzable Inst) vreg7, SwizzleMask : sub0, sub1, sub2, sub3
      19             : ///
      20             : /// is turned into :
      21             : /// vreg5<def> = REG_SEQ vreg1, sub0, vreg2, sub1, vreg3, sub2, undef, sub3
      22             : /// ...
      23             : /// vreg7<def> = INSERT_SUBREG vreg4, sub3
      24             : /// (swizzable Inst) vreg7, SwizzleMask : sub0, sub2, sub1, sub3
      25             : ///
      26             : /// This allow regalloc to reduce register pressure for vector registers and
      27             : /// to reduce MOV count.
      28             : //===----------------------------------------------------------------------===//
      29             : 
      30             : #include "AMDGPU.h"
      31             : #include "AMDGPUSubtarget.h"
      32             : #include "R600Defines.h"
      33             : #include "R600InstrInfo.h"
      34             : #include "llvm/ADT/DenseMap.h"
      35             : #include "llvm/ADT/STLExtras.h"
      36             : #include "llvm/ADT/StringRef.h"
      37             : #include "llvm/CodeGen/MachineBasicBlock.h"
      38             : #include "llvm/CodeGen/MachineDominators.h"
      39             : #include "llvm/CodeGen/MachineFunction.h"
      40             : #include "llvm/CodeGen/MachineFunctionPass.h"
      41             : #include "llvm/CodeGen/MachineInstr.h"
      42             : #include "llvm/CodeGen/MachineInstrBuilder.h"
      43             : #include "llvm/CodeGen/MachineLoopInfo.h"
      44             : #include "llvm/CodeGen/MachineOperand.h"
      45             : #include "llvm/CodeGen/MachineRegisterInfo.h"
      46             : #include "llvm/IR/DebugLoc.h"
      47             : #include "llvm/Pass.h"
      48             : #include "llvm/Support/Debug.h"
      49             : #include "llvm/Support/ErrorHandling.h"
      50             : #include "llvm/Support/raw_ostream.h"
      51             : #include <cassert>
      52             : #include <utility>
      53             : #include <vector>
      54             : 
      55             : using namespace llvm;
      56             : 
      57             : #define DEBUG_TYPE "vec-merger"
      58             : 
      59             : static bool
      60        8802 : isImplicitlyDef(MachineRegisterInfo &MRI, unsigned Reg) {
      61        8802 :   for (MachineRegisterInfo::def_instr_iterator It = MRI.def_instr_begin(Reg),
      62       17604 :       E = MRI.def_instr_end(); It != E; ++It) {
      63       15054 :     return (*It).isImplicitDef();
      64             :   }
      65        1275 :   if (MRI.isReserved(Reg)) {
      66             :     return false;
      67             :   }
      68           0 :   llvm_unreachable("Reg without a def");
      69             :   return false;
      70             : }
      71             : 
      72             : namespace {
      73             : 
      74        9608 : class RegSeqInfo {
      75             : public:
      76             :   MachineInstr *Instr;
      77             :   DenseMap<unsigned, unsigned> RegToChan;
      78             :   std::vector<unsigned> UndefReg;
      79             : 
      80        7158 :   RegSeqInfo(MachineRegisterInfo &MRI, MachineInstr *MI) : Instr(MI) {
      81             :     assert(MI->getOpcode() == AMDGPU::REG_SEQUENCE);
      82       11188 :     for (unsigned i = 1, e = Instr->getNumOperands(); i < e; i+=2) {
      83       17604 :       MachineOperand &MO = Instr->getOperand(i);
      84       17604 :       unsigned Chan = Instr->getOperand(i + 1).getImm();
      85        8802 :       if (isImplicitlyDef(MRI, MO.getReg()))
      86         104 :         UndefReg.push_back(Chan);
      87             :       else
      88       17396 :         RegToChan[MO.getReg()] = Chan;
      89             :     }
      90        2386 :   }
      91             : 
      92        1830 :   RegSeqInfo() = default;
      93             : 
      94             :   bool operator==(const RegSeqInfo &RSI) const {
      95             :     return RSI.Instr == Instr;
      96             :   }
      97             : };
      98             : 
      99        1215 : class R600VectorRegMerger : public MachineFunctionPass {
     100             : private:
     101             :   using InstructionSetMap = DenseMap<unsigned, std::vector<MachineInstr *>>;
     102             : 
     103             :   MachineRegisterInfo *MRI;
     104             :   const R600InstrInfo *TII = nullptr;
     105             :   DenseMap<MachineInstr *, RegSeqInfo> PreviousRegSeq;
     106             :   InstructionSetMap PreviousRegSeqByReg;
     107             :   InstructionSetMap PreviousRegSeqByUndefCount;
     108             : 
     109             :   bool canSwizzle(const MachineInstr &MI) const;
     110             :   bool areAllUsesSwizzeable(unsigned Reg) const;
     111             :   void SwizzleInput(MachineInstr &,
     112             :       const std::vector<std::pair<unsigned, unsigned>> &RemapChan) const;
     113             :   bool tryMergeVector(const RegSeqInfo *Untouched, RegSeqInfo *ToMerge,
     114             :       std::vector<std::pair<unsigned, unsigned>> &Remap) const;
     115             :   bool tryMergeUsingCommonSlot(RegSeqInfo &RSI, RegSeqInfo &CompatibleRSI,
     116             :       std::vector<std::pair<unsigned, unsigned>> &RemapChan);
     117             :   bool tryMergeUsingFreeSlot(RegSeqInfo &RSI, RegSeqInfo &CompatibleRSI,
     118             :       std::vector<std::pair<unsigned, unsigned>> &RemapChan);
     119             :   MachineInstr *RebuildVector(RegSeqInfo *MI, const RegSeqInfo *BaseVec,
     120             :       const std::vector<std::pair<unsigned, unsigned>> &RemapChan) const;
     121             :   void RemoveMI(MachineInstr *);
     122             :   void trackRSI(const RegSeqInfo &RSI);
     123             : 
     124             : public:
     125             :   static char ID;
     126             : 
     127         976 :   R600VectorRegMerger() : MachineFunctionPass(ID) {}
     128             : 
     129         244 :   void getAnalysisUsage(AnalysisUsage &AU) const override {
     130         244 :     AU.setPreservesCFG();
     131         244 :     AU.addRequired<MachineDominatorTree>();
     132         244 :     AU.addPreserved<MachineDominatorTree>();
     133         244 :     AU.addRequired<MachineLoopInfo>();
     134         244 :     AU.addPreserved<MachineLoopInfo>();
     135         244 :     MachineFunctionPass::getAnalysisUsage(AU);
     136         244 :   }
     137             : 
     138         244 :   StringRef getPassName() const override {
     139         244 :     return "R600 Vector Registers Merge Pass";
     140             :   }
     141             : 
     142             :   bool runOnMachineFunction(MachineFunction &Fn) override;
     143             : };
     144             : 
     145             : } // end anonymous namespace
     146             : 
     147       53042 : INITIALIZE_PASS_BEGIN(R600VectorRegMerger, DEBUG_TYPE,
     148             :                      "R600 Vector Reg Merger", false, false)
     149      312538 : INITIALIZE_PASS_END(R600VectorRegMerger, DEBUG_TYPE,
     150             :                     "R600 Vector Reg Merger", false, false)
     151             : 
     152             : char R600VectorRegMerger::ID = 0;
     153             : 
     154             : char &llvm::R600VectorRegMergerID = R600VectorRegMerger::ID;
     155             : 
     156             : bool R600VectorRegMerger::canSwizzle(const MachineInstr &MI)
     157             :     const {
     158        7158 :   if (TII->get(MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST)
     159             :     return true;
     160        2134 :   switch (MI.getOpcode()) {
     161             :   case AMDGPU::R600_ExportSwz:
     162             :   case AMDGPU::EG_ExportSwz:
     163             :     return true;
     164             :   default:
     165             :     return false;
     166             :   }
     167             : }
     168             : 
     169           5 : bool R600VectorRegMerger::tryMergeVector(const RegSeqInfo *Untouched,
     170             :     RegSeqInfo *ToMerge, std::vector< std::pair<unsigned, unsigned>> &Remap)
     171             :     const {
     172           5 :   unsigned CurrentUndexIdx = 0;
     173           5 :   for (DenseMap<unsigned, unsigned>::iterator It = ToMerge->RegToChan.begin(),
     174          24 :       E = ToMerge->RegToChan.end(); It != E; ++It) {
     175             :     DenseMap<unsigned, unsigned>::const_iterator PosInUntouched =
     176           9 :         Untouched->RegToChan.find((*It).first);
     177          23 :     if (PosInUntouched != Untouched->RegToChan.end()) {
     178          20 :       Remap.push_back(std::pair<unsigned, unsigned>
     179          10 :           ((*It).second, (*PosInUntouched).second));
     180           5 :       continue;
     181             :     }
     182           8 :     if (CurrentUndexIdx >= Untouched->UndefReg.size())
     183           0 :       return false;
     184          12 :     Remap.push_back(std::pair<unsigned, unsigned>
     185           8 :         ((*It).second, Untouched->UndefReg[CurrentUndexIdx++]));
     186             :   }
     187             : 
     188             :   return true;
     189             : }
     190             : 
     191             : static
     192             : unsigned getReassignedChan(
     193             :     const std::vector<std::pair<unsigned, unsigned>> &RemapChan,
     194             :     unsigned Chan) {
     195          22 :   for (unsigned j = 0, je = RemapChan.size(); j < je; j++) {
     196          26 :     if (RemapChan[j].first == Chan)
     197           9 :       return RemapChan[j].second;
     198             :   }
     199           0 :   llvm_unreachable("Chan wasn't reassigned");
     200             : }
     201             : 
     202           5 : MachineInstr *R600VectorRegMerger::RebuildVector(
     203             :     RegSeqInfo *RSI, const RegSeqInfo *BaseRSI,
     204             :     const std::vector<std::pair<unsigned, unsigned>> &RemapChan) const {
     205           5 :   unsigned Reg = RSI->Instr->getOperand(0).getReg();
     206          10 :   MachineBasicBlock::iterator Pos = RSI->Instr;
     207           5 :   MachineBasicBlock &MBB = *Pos->getParent();
     208          15 :   DebugLoc DL = Pos->getDebugLoc();
     209             : 
     210           5 :   unsigned SrcVec = BaseRSI->Instr->getOperand(0).getReg();
     211          15 :   DenseMap<unsigned, unsigned> UpdatedRegToChan = BaseRSI->RegToChan;
     212          10 :   std::vector<unsigned> UpdatedUndef = BaseRSI->UndefReg;
     213           5 :   for (DenseMap<unsigned, unsigned>::iterator It = RSI->RegToChan.begin(),
     214          24 :       E = RSI->RegToChan.end(); It != E; ++It) {
     215           9 :     unsigned DstReg = MRI->createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
     216           9 :     unsigned SubReg = (*It).first;
     217           9 :     unsigned Swizzle = (*It).second;
     218           9 :     unsigned Chan = getReassignedChan(RemapChan, Swizzle);
     219             : 
     220          27 :     MachineInstr *Tmp = BuildMI(MBB, Pos, DL, TII->get(AMDGPU::INSERT_SUBREG),
     221          18 :         DstReg)
     222           9 :         .addReg(SrcVec)
     223           9 :         .addReg(SubReg)
     224          27 :         .addImm(Chan);
     225           9 :     UpdatedRegToChan[SubReg] = Chan;
     226           9 :     std::vector<unsigned>::iterator ChanPos = llvm::find(UpdatedUndef, Chan);
     227          18 :     if (ChanPos != UpdatedUndef.end())
     228           8 :       UpdatedUndef.erase(ChanPos);
     229             :     assert(!is_contained(UpdatedUndef, Chan) &&
     230             :            "UpdatedUndef shouldn't contain Chan more than once!");
     231             :     DEBUG(dbgs() << "    ->"; Tmp->dump(););
     232             :     (void)Tmp;
     233           9 :     SrcVec = DstReg;
     234             :   }
     235             :   MachineInstr *NewMI =
     236          10 :       BuildMI(MBB, Pos, DL, TII->get(AMDGPU::COPY), Reg).addReg(SrcVec);
     237             :   DEBUG(dbgs() << "    ->"; NewMI->dump(););
     238             : 
     239             :   DEBUG(dbgs() << "  Updating Swizzle:\n");
     240           5 :   for (MachineRegisterInfo::use_instr_iterator It = MRI->use_instr_begin(Reg),
     241          15 :       E = MRI->use_instr_end(); It != E; ++It) {
     242             :     DEBUG(dbgs() << "    ";(*It).dump(); dbgs() << "    ->");
     243           5 :     SwizzleInput(*It, RemapChan);
     244             :     DEBUG((*It).dump());
     245             :   }
     246           5 :   RSI->Instr->eraseFromParent();
     247             : 
     248             :   // Update RSI
     249           5 :   RSI->Instr = NewMI;
     250          10 :   RSI->RegToChan = UpdatedRegToChan;
     251           5 :   RSI->UndefReg = UpdatedUndef;
     252             : 
     253          10 :   return NewMI;
     254             : }
     255             : 
     256         281 : void R600VectorRegMerger::RemoveMI(MachineInstr *MI) {
     257         281 :   for (InstructionSetMap::iterator It = PreviousRegSeqByReg.begin(),
     258       19816 :       E = PreviousRegSeqByReg.end(); It != E; ++It) {
     259       18973 :     std::vector<MachineInstr *> &MIs = (*It).second;
     260       94865 :     MIs.erase(llvm::find(MIs, MI), MIs.end());
     261             :   }
     262         281 :   for (InstructionSetMap::iterator It = PreviousRegSeqByUndefCount.begin(),
     263        1369 :       E = PreviousRegSeqByUndefCount.end(); It != E; ++It) {
     264         526 :     std::vector<MachineInstr *> &MIs = (*It).second;
     265        2630 :     MIs.erase(llvm::find(MIs, MI), MIs.end());
     266             :   }
     267         281 : }
     268             : 
     269           5 : void R600VectorRegMerger::SwizzleInput(MachineInstr &MI,
     270             :     const std::vector<std::pair<unsigned, unsigned>> &RemapChan) const {
     271             :   unsigned Offset;
     272          15 :   if (TII->get(MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST)
     273             :     Offset = 2;
     274             :   else
     275           3 :     Offset = 3;
     276          25 :   for (unsigned i = 0; i < 4; i++) {
     277          40 :     unsigned Swizzle = MI.getOperand(i + Offset).getImm() + 1;
     278          55 :     for (unsigned j = 0, e = RemapChan.size(); j < e; j++) {
     279          60 :       if (RemapChan[j].first == Swizzle) {
     280          15 :         MI.getOperand(i + Offset).setImm(RemapChan[j].second - 1);
     281             :         break;
     282             :       }
     283             :     }
     284             :   }
     285           5 : }
     286             : 
     287        2386 : bool R600VectorRegMerger::areAllUsesSwizzeable(unsigned Reg) const {
     288        2386 :   for (MachineRegisterInfo::use_instr_iterator It = MRI->use_instr_begin(Reg),
     289        5077 :       E = MRI->use_instr_end(); It != E; ++It) {
     290        2386 :     if (!canSwizzle(*It))
     291             :       return false;
     292             :   }
     293             :   return true;
     294             : }
     295             : 
     296         305 : bool R600VectorRegMerger::tryMergeUsingCommonSlot(RegSeqInfo &RSI,
     297             :     RegSeqInfo &CompatibleRSI,
     298             :     std::vector<std::pair<unsigned, unsigned>> &RemapChan) {
     299        3026 :   for (MachineInstr::mop_iterator MOp = RSI.Instr->operands_begin(),
     300         610 :       MOE = RSI.Instr->operands_end(); MOp != MOE; ++MOp) {
     301        2725 :     if (!MOp->isReg())
     302        1208 :       continue;
     303        4551 :     if (PreviousRegSeqByReg[MOp->getReg()].empty())
     304        1513 :       continue;
     305          20 :     for (MachineInstr *MI : PreviousRegSeqByReg[MOp->getReg()]) {
     306           8 :       CompatibleRSI = PreviousRegSeq[MI];
     307           4 :       if (RSI == CompatibleRSI)
     308           0 :         continue;
     309           4 :       if (tryMergeVector(&CompatibleRSI, &RSI, RemapChan))
     310           4 :         return true;
     311             :     }
     312             :   }
     313             :   return false;
     314             : }
     315             : 
     316         301 : bool R600VectorRegMerger::tryMergeUsingFreeSlot(RegSeqInfo &RSI,
     317             :     RegSeqInfo &CompatibleRSI,
     318             :     std::vector<std::pair<unsigned, unsigned>> &RemapChan) {
     319         602 :   unsigned NeededUndefs = 4 - RSI.UndefReg.size();
     320         903 :   if (PreviousRegSeqByUndefCount[NeededUndefs].empty())
     321             :     return false;
     322             :   std::vector<MachineInstr *> &MIs =
     323           2 :       PreviousRegSeqByUndefCount[NeededUndefs];
     324           2 :   CompatibleRSI = PreviousRegSeq[MIs.back()];
     325           1 :   tryMergeVector(&CompatibleRSI, &RSI, RemapChan);
     326           1 :   return true;
     327             : }
     328             : 
     329         305 : void R600VectorRegMerger::trackRSI(const RegSeqInfo &RSI) {
     330             :   for (DenseMap<unsigned, unsigned>::const_iterator
     331         610 :   It = RSI.RegToChan.begin(), E = RSI.RegToChan.end(); It != E; ++It) {
     332        2260 :     PreviousRegSeqByReg[(*It).first].push_back(RSI.Instr);
     333             :   }
     334         915 :   PreviousRegSeqByUndefCount[RSI.UndefReg.size()].push_back(RSI.Instr);
     335         610 :   PreviousRegSeq[RSI.Instr] = RSI;
     336         305 : }
     337             : 
     338        2057 : bool R600VectorRegMerger::runOnMachineFunction(MachineFunction &Fn) {
     339        2057 :   if (skipFunction(*Fn.getFunction()))
     340             :     return false;
     341             : 
     342        2057 :   const R600Subtarget &ST = Fn.getSubtarget<R600Subtarget>();
     343        2057 :   TII = ST.getInstrInfo();
     344        2057 :   MRI = &Fn.getRegInfo();
     345             : 
     346        4114 :   for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
     347        4287 :        MBB != MBBe; ++MBB) {
     348        2230 :     MachineBasicBlock *MB = &*MBB;
     349        2230 :     PreviousRegSeq.clear();
     350        2230 :     PreviousRegSeqByReg.clear();
     351        2230 :     PreviousRegSeqByUndefCount.clear();
     352             : 
     353        4460 :     for (MachineBasicBlock::iterator MII = MB->begin(), MIIE = MB->end();
     354       60328 :          MII != MIIE; ++MII) {
     355       58098 :       MachineInstr &MI = *MII;
     356       58098 :       if (MI.getOpcode() != AMDGPU::REG_SEQUENCE) {
     357      111424 :         if (TII->get(MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST) {
     358         276 :           unsigned Reg = MI.getOperand(1).getReg();
     359             :           for (MachineRegisterInfo::def_instr_iterator
     360         552 :                It = MRI->def_instr_begin(Reg), E = MRI->def_instr_end();
     361         552 :                It != E; ++It) {
     362         276 :             RemoveMI(&(*It));
     363             :           }
     364             :         }
     365      113510 :         continue;
     366             :       }
     367             : 
     368        2686 :       RegSeqInfo RSI(*MRI, &MI);
     369             : 
     370             :       // All uses of MI are swizzeable ?
     371        2386 :       unsigned Reg = MI.getOperand(0).getReg();
     372        2386 :       if (!areAllUsesSwizzeable(Reg))
     373        4167 :         continue;
     374             : 
     375             :       DEBUG({
     376             :         dbgs() << "Trying to optimize ";
     377             :         MI.dump();
     378             :       });
     379             : 
     380         605 :       RegSeqInfo CandidateRSI;
     381         605 :       std::vector<std::pair<unsigned, unsigned>> RemapChan;
     382             :       DEBUG(dbgs() << "Using common slots...\n";);
     383         309 :       if (tryMergeUsingCommonSlot(RSI, CandidateRSI, RemapChan)) {
     384             :         // Remove CandidateRSI mapping
     385           4 :         RemoveMI(CandidateRSI.Instr);
     386           8 :         MII = RebuildVector(&RSI, &CandidateRSI, RemapChan);
     387           4 :         trackRSI(RSI);
     388           9 :         continue;
     389             :       }
     390             :       DEBUG(dbgs() << "Using free slots...\n";);
     391         301 :       RemapChan.clear();
     392         302 :       if (tryMergeUsingFreeSlot(RSI, CandidateRSI, RemapChan)) {
     393           1 :         RemoveMI(CandidateRSI.Instr);
     394           2 :         MII = RebuildVector(&RSI, &CandidateRSI, RemapChan);
     395           1 :         trackRSI(RSI);
     396           1 :         continue;
     397             :       }
     398             :       //Failed to merge
     399         300 :       trackRSI(RSI);
     400             :     }
     401             :   }
     402             :   return false;
     403             : }
     404             : 
     405         244 : llvm::FunctionPass *llvm::createR600VectorRegMerger() {
     406         244 :   return new R600VectorRegMerger();
     407             : }

Generated by: LCOV version 1.13