LCOV - code coverage report
Current view: top level - lib/Target/AMDGPU - R600OptimizeVectorRegisters.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 148 152 97.4 %
Date: 2018-07-13 00:08:38 Functions: 21 21 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===- R600MergeVectorRegisters.cpp ---------------------------------------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : /// \file
      11             : /// This pass merges inputs of swizzeable instructions into vector sharing
      12             : /// common data and/or have enough undef subreg using swizzle abilities.
      13             : ///
      14             : /// For instance let's consider the following pseudo code :
      15             : /// %5 = REG_SEQ %1, sub0, %2, sub1, %3, sub2, undef, sub3
      16             : /// ...
      17             : /// %7 = REG_SEQ %1, sub0, %3, sub1, undef, sub2, %4, sub3
      18             : /// (swizzable Inst) %7, SwizzleMask : sub0, sub1, sub2, sub3
      19             : ///
      20             : /// is turned into :
      21             : /// %5 = REG_SEQ %1, sub0, %2, sub1, %3, sub2, undef, sub3
      22             : /// ...
      23             : /// %7 = INSERT_SUBREG %4, sub3
      24             : /// (swizzable Inst) %7, SwizzleMask : sub0, sub2, sub1, sub3
      25             : ///
      26             : /// This allow regalloc to reduce register pressure for vector registers and
      27             : /// to reduce MOV count.
      28             : //===----------------------------------------------------------------------===//
      29             : 
      30             : #include "AMDGPU.h"
      31             : #include "AMDGPUSubtarget.h"
      32             : #include "R600Defines.h"
      33             : #include "R600InstrInfo.h"
      34             : #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
      35             : #include "llvm/ADT/DenseMap.h"
      36             : #include "llvm/ADT/STLExtras.h"
      37             : #include "llvm/ADT/StringRef.h"
      38             : #include "llvm/CodeGen/MachineBasicBlock.h"
      39             : #include "llvm/CodeGen/MachineDominators.h"
      40             : #include "llvm/CodeGen/MachineFunction.h"
      41             : #include "llvm/CodeGen/MachineFunctionPass.h"
      42             : #include "llvm/CodeGen/MachineInstr.h"
      43             : #include "llvm/CodeGen/MachineInstrBuilder.h"
      44             : #include "llvm/CodeGen/MachineLoopInfo.h"
      45             : #include "llvm/CodeGen/MachineOperand.h"
      46             : #include "llvm/CodeGen/MachineRegisterInfo.h"
      47             : #include "llvm/IR/DebugLoc.h"
      48             : #include "llvm/Pass.h"
      49             : #include "llvm/Support/Debug.h"
      50             : #include "llvm/Support/ErrorHandling.h"
      51             : #include "llvm/Support/raw_ostream.h"
      52             : #include <cassert>
      53             : #include <utility>
      54             : #include <vector>
      55             : 
      56             : using namespace llvm;
      57             : 
      58             : #define DEBUG_TYPE "vec-merger"
      59             : 
      60             : static bool
      61        8940 : isImplicitlyDef(MachineRegisterInfo &MRI, unsigned Reg) {
      62        8940 :   for (MachineRegisterInfo::def_instr_iterator It = MRI.def_instr_begin(Reg),
      63        8940 :       E = MRI.def_instr_end(); It != E; ++It) {
      64             :     return (*It).isImplicitDef();
      65             :   }
      66        1295 :   if (MRI.isReserved(Reg)) {
      67             :     return false;
      68             :   }
      69           0 :   llvm_unreachable("Reg without a def");
      70             :   return false;
      71             : }
      72             : 
      73             : namespace {
      74             : 
      75        6700 : class RegSeqInfo {
      76             : public:
      77             :   MachineInstr *Instr;
      78             :   DenseMap<unsigned, unsigned> RegToChan;
      79             :   std::vector<unsigned> UndefReg;
      80             : 
      81        4860 :   RegSeqInfo(MachineRegisterInfo &MRI, MachineInstr *MI) : Instr(MI) {
      82             :     assert(MI->getOpcode() == R600::REG_SEQUENCE);
      83       11370 :     for (unsigned i = 1, e = Instr->getNumOperands(); i < e; i+=2) {
      84        8940 :       MachineOperand &MO = Instr->getOperand(i);
      85       17880 :       unsigned Chan = Instr->getOperand(i + 1).getImm();
      86        8940 :       if (isImplicitlyDef(MRI, MO.getReg()))
      87         104 :         UndefReg.push_back(Chan);
      88             :       else
      89       17672 :         RegToChan[MO.getReg()] = Chan;
      90             :     }
      91        2430 :   }
      92             : 
      93         305 :   RegSeqInfo() = default;
      94             : 
      95             :   bool operator==(const RegSeqInfo &RSI) const {
      96             :     return RSI.Instr == Instr;
      97             :   }
      98             : };
      99             : 
     100        1385 : class R600VectorRegMerger : public MachineFunctionPass {
     101             : private:
     102             :   using InstructionSetMap = DenseMap<unsigned, std::vector<MachineInstr *>>;
     103             : 
     104             :   MachineRegisterInfo *MRI;
     105             :   const R600InstrInfo *TII = nullptr;
     106             :   DenseMap<MachineInstr *, RegSeqInfo> PreviousRegSeq;
     107             :   InstructionSetMap PreviousRegSeqByReg;
     108             :   InstructionSetMap PreviousRegSeqByUndefCount;
     109             : 
     110             :   bool canSwizzle(const MachineInstr &MI) const;
     111             :   bool areAllUsesSwizzeable(unsigned Reg) const;
     112             :   void SwizzleInput(MachineInstr &,
     113             :       const std::vector<std::pair<unsigned, unsigned>> &RemapChan) const;
     114             :   bool tryMergeVector(const RegSeqInfo *Untouched, RegSeqInfo *ToMerge,
     115             :       std::vector<std::pair<unsigned, unsigned>> &Remap) const;
     116             :   bool tryMergeUsingCommonSlot(RegSeqInfo &RSI, RegSeqInfo &CompatibleRSI,
     117             :       std::vector<std::pair<unsigned, unsigned>> &RemapChan);
     118             :   bool tryMergeUsingFreeSlot(RegSeqInfo &RSI, RegSeqInfo &CompatibleRSI,
     119             :       std::vector<std::pair<unsigned, unsigned>> &RemapChan);
     120             :   MachineInstr *RebuildVector(RegSeqInfo *MI, const RegSeqInfo *BaseVec,
     121             :       const std::vector<std::pair<unsigned, unsigned>> &RemapChan) const;
     122             :   void RemoveMI(MachineInstr *);
     123             :   void trackRSI(const RegSeqInfo &RSI);
     124             : 
     125             : public:
     126             :   static char ID;
     127             : 
     128        1112 :   R600VectorRegMerger() : MachineFunctionPass(ID) {}
     129             : 
     130         278 :   void getAnalysisUsage(AnalysisUsage &AU) const override {
     131         278 :     AU.setPreservesCFG();
     132             :     AU.addRequired<MachineDominatorTree>();
     133             :     AU.addPreserved<MachineDominatorTree>();
     134             :     AU.addRequired<MachineLoopInfo>();
     135             :     AU.addPreserved<MachineLoopInfo>();
     136         278 :     MachineFunctionPass::getAnalysisUsage(AU);
     137         278 :   }
     138             : 
     139         278 :   StringRef getPassName() const override {
     140         278 :     return "R600 Vector Registers Merge Pass";
     141             :   }
     142             : 
     143             :   bool runOnMachineFunction(MachineFunction &Fn) override;
     144             : };
     145             : 
     146             : } // end anonymous namespace
     147             : 
     148       73254 : INITIALIZE_PASS_BEGIN(R600VectorRegMerger, DEBUG_TYPE,
     149             :                      "R600 Vector Reg Merger", false, false)
     150      342570 : INITIALIZE_PASS_END(R600VectorRegMerger, DEBUG_TYPE,
     151             :                     "R600 Vector Reg Merger", false, false)
     152             : 
     153             : char R600VectorRegMerger::ID = 0;
     154             : 
     155             : char &llvm::R600VectorRegMergerID = R600VectorRegMerger::ID;
     156             : 
     157             : bool R600VectorRegMerger::canSwizzle(const MachineInstr &MI)
     158             :     const {
     159        7290 :   if (TII->get(MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST)
     160             :     return true;
     161        2178 :   switch (MI.getOpcode()) {
     162             :   case R600::R600_ExportSwz:
     163             :   case R600::EG_ExportSwz:
     164             :     return true;
     165             :   default:
     166             :     return false;
     167             :   }
     168             : }
     169             : 
     170           5 : bool R600VectorRegMerger::tryMergeVector(const RegSeqInfo *Untouched,
     171             :     RegSeqInfo *ToMerge, std::vector< std::pair<unsigned, unsigned>> &Remap)
     172             :     const {
     173             :   unsigned CurrentUndexIdx = 0;
     174           5 :   for (DenseMap<unsigned, unsigned>::iterator It = ToMerge->RegToChan.begin(),
     175          14 :       E = ToMerge->RegToChan.end(); It != E; ++It) {
     176             :     DenseMap<unsigned, unsigned>::const_iterator PosInUntouched =
     177           9 :         Untouched->RegToChan.find((*It).first);
     178          14 :     if (PosInUntouched != Untouched->RegToChan.end()) {
     179           5 :       Remap.push_back(std::pair<unsigned, unsigned>
     180             :           ((*It).second, (*PosInUntouched).second));
     181           5 :       continue;
     182             :     }
     183           8 :     if (CurrentUndexIdx >= Untouched->UndefReg.size())
     184           0 :       return false;
     185           4 :     Remap.push_back(std::pair<unsigned, unsigned>
     186           4 :         ((*It).second, Untouched->UndefReg[CurrentUndexIdx++]));
     187             :   }
     188             : 
     189             :   return true;
     190             : }
     191             : 
     192             : static
     193             : unsigned getReassignedChan(
     194             :     const std::vector<std::pair<unsigned, unsigned>> &RemapChan,
     195             :     unsigned Chan) {
     196          22 :   for (unsigned j = 0, je = RemapChan.size(); j < je; j++) {
     197          26 :     if (RemapChan[j].first == Chan)
     198           9 :       return RemapChan[j].second;
     199             :   }
     200           0 :   llvm_unreachable("Chan wasn't reassigned");
     201             : }
     202             : 
     203           5 : MachineInstr *R600VectorRegMerger::RebuildVector(
     204             :     RegSeqInfo *RSI, const RegSeqInfo *BaseRSI,
     205             :     const std::vector<std::pair<unsigned, unsigned>> &RemapChan) const {
     206           5 :   unsigned Reg = RSI->Instr->getOperand(0).getReg();
     207             :   MachineBasicBlock::iterator Pos = RSI->Instr;
     208           5 :   MachineBasicBlock &MBB = *Pos->getParent();
     209             :   DebugLoc DL = Pos->getDebugLoc();
     210             : 
     211           5 :   unsigned SrcVec = BaseRSI->Instr->getOperand(0).getReg();
     212           5 :   DenseMap<unsigned, unsigned> UpdatedRegToChan = BaseRSI->RegToChan;
     213           5 :   std::vector<unsigned> UpdatedUndef = BaseRSI->UndefReg;
     214           5 :   for (DenseMap<unsigned, unsigned>::iterator It = RSI->RegToChan.begin(),
     215          14 :       E = RSI->RegToChan.end(); It != E; ++It) {
     216          18 :     unsigned DstReg = MRI->createVirtualRegister(&R600::R600_Reg128RegClass);
     217           9 :     unsigned SubReg = (*It).first;
     218           9 :     unsigned Swizzle = (*It).second;
     219           9 :     unsigned Chan = getReassignedChan(RemapChan, Swizzle);
     220             : 
     221          27 :     MachineInstr *Tmp = BuildMI(MBB, Pos, DL, TII->get(R600::INSERT_SUBREG),
     222           9 :         DstReg)
     223           9 :         .addReg(SrcVec)
     224           9 :         .addReg(SubReg)
     225          18 :         .addImm(Chan);
     226           9 :     UpdatedRegToChan[SubReg] = Chan;
     227             :     std::vector<unsigned>::iterator ChanPos = llvm::find(UpdatedUndef, Chan);
     228           9 :     if (ChanPos != UpdatedUndef.end())
     229             :       UpdatedUndef.erase(ChanPos);
     230             :     assert(!is_contained(UpdatedUndef, Chan) &&
     231             :            "UpdatedUndef shouldn't contain Chan more than once!");
     232             :     LLVM_DEBUG(dbgs() << "    ->"; Tmp->dump(););
     233             :     (void)Tmp;
     234             :     SrcVec = DstReg;
     235             :   }
     236             :   MachineInstr *NewMI =
     237          10 :       BuildMI(MBB, Pos, DL, TII->get(R600::COPY), Reg).addReg(SrcVec);
     238             :   LLVM_DEBUG(dbgs() << "    ->"; NewMI->dump(););
     239             : 
     240             :   LLVM_DEBUG(dbgs() << "  Updating Swizzle:\n");
     241          10 :   for (MachineRegisterInfo::use_instr_iterator It = MRI->use_instr_begin(Reg),
     242          10 :       E = MRI->use_instr_end(); It != E; ++It) {
     243             :     LLVM_DEBUG(dbgs() << "    "; (*It).dump(); dbgs() << "    ->");
     244           5 :     SwizzleInput(*It, RemapChan);
     245             :     LLVM_DEBUG((*It).dump());
     246             :   }
     247           5 :   RSI->Instr->eraseFromParent();
     248             : 
     249             :   // Update RSI
     250           5 :   RSI->Instr = NewMI;
     251             :   RSI->RegToChan = UpdatedRegToChan;
     252           5 :   RSI->UndefReg = UpdatedUndef;
     253             : 
     254           5 :   return NewMI;
     255             : }
     256             : 
     257         281 : void R600VectorRegMerger::RemoveMI(MachineInstr *MI) {
     258         281 :   for (InstructionSetMap::iterator It = PreviousRegSeqByReg.begin(),
     259       19254 :       E = PreviousRegSeqByReg.end(); It != E; ++It) {
     260       18973 :     std::vector<MachineInstr *> &MIs = (*It).second;
     261             :     MIs.erase(llvm::find(MIs, MI), MIs.end());
     262             :   }
     263         281 :   for (InstructionSetMap::iterator It = PreviousRegSeqByUndefCount.begin(),
     264         807 :       E = PreviousRegSeqByUndefCount.end(); It != E; ++It) {
     265         526 :     std::vector<MachineInstr *> &MIs = (*It).second;
     266             :     MIs.erase(llvm::find(MIs, MI), MIs.end());
     267             :   }
     268         281 : }
     269             : 
     270           5 : void R600VectorRegMerger::SwizzleInput(MachineInstr &MI,
     271             :     const std::vector<std::pair<unsigned, unsigned>> &RemapChan) const {
     272             :   unsigned Offset;
     273          15 :   if (TII->get(MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST)
     274             :     Offset = 2;
     275             :   else
     276             :     Offset = 3;
     277          45 :   for (unsigned i = 0; i < 4; i++) {
     278          40 :     unsigned Swizzle = MI.getOperand(i + Offset).getImm() + 1;
     279          55 :     for (unsigned j = 0, e = RemapChan.size(); j < e; j++) {
     280          60 :       if (RemapChan[j].first == Swizzle) {
     281          15 :         MI.getOperand(i + Offset).setImm(RemapChan[j].second - 1);
     282             :         break;
     283             :       }
     284             :     }
     285             :   }
     286           5 : }
     287             : 
     288        2430 : bool R600VectorRegMerger::areAllUsesSwizzeable(unsigned Reg) const {
     289        2735 :   for (MachineRegisterInfo::use_instr_iterator It = MRI->use_instr_begin(Reg),
     290        2735 :       E = MRI->use_instr_end(); It != E; ++It) {
     291        2430 :     if (!canSwizzle(*It))
     292        2125 :       return false;
     293             :   }
     294         305 :   return true;
     295             : }
     296             : 
     297         305 : bool R600VectorRegMerger::tryMergeUsingCommonSlot(RegSeqInfo &RSI,
     298             :     RegSeqInfo &CompatibleRSI,
     299             :     std::vector<std::pair<unsigned, unsigned>> &RemapChan) {
     300        3026 :   for (MachineInstr::mop_iterator MOp = RSI.Instr->operands_begin(),
     301        3331 :       MOE = RSI.Instr->operands_end(); MOp != MOE; ++MOp) {
     302        2725 :     if (!MOp->isReg())
     303        1208 :       continue;
     304        3034 :     if (PreviousRegSeqByReg[MOp->getReg()].empty())
     305        1513 :       continue;
     306          12 :     for (MachineInstr *MI : PreviousRegSeqByReg[MOp->getReg()]) {
     307           8 :       CompatibleRSI = PreviousRegSeq[MI];
     308           4 :       if (RSI == CompatibleRSI)
     309           0 :         continue;
     310           4 :       if (tryMergeVector(&CompatibleRSI, &RSI, RemapChan))
     311           4 :         return true;
     312             :     }
     313             :   }
     314             :   return false;
     315             : }
     316             : 
     317         301 : bool R600VectorRegMerger::tryMergeUsingFreeSlot(RegSeqInfo &RSI,
     318             :     RegSeqInfo &CompatibleRSI,
     319             :     std::vector<std::pair<unsigned, unsigned>> &RemapChan) {
     320         602 :   unsigned NeededUndefs = 4 - RSI.UndefReg.size();
     321         602 :   if (PreviousRegSeqByUndefCount[NeededUndefs].empty())
     322             :     return false;
     323             :   std::vector<MachineInstr *> &MIs =
     324             :       PreviousRegSeqByUndefCount[NeededUndefs];
     325           2 :   CompatibleRSI = PreviousRegSeq[MIs.back()];
     326           1 :   tryMergeVector(&CompatibleRSI, &RSI, RemapChan);
     327           1 :   return true;
     328             : }
     329             : 
     330         305 : void R600VectorRegMerger::trackRSI(const RegSeqInfo &RSI) {
     331             :   for (DenseMap<unsigned, unsigned>::const_iterator
     332        1740 :   It = RSI.RegToChan.begin(), E = RSI.RegToChan.end(); It != E; ++It) {
     333        2260 :     PreviousRegSeqByReg[(*It).first].push_back(RSI.Instr);
     334             :   }
     335         915 :   PreviousRegSeqByUndefCount[RSI.UndefReg.size()].push_back(RSI.Instr);
     336         610 :   PreviousRegSeq[RSI.Instr] = RSI;
     337         305 : }
     338             : 
     339        2238 : bool R600VectorRegMerger::runOnMachineFunction(MachineFunction &Fn) {
     340        2238 :   if (skipFunction(Fn.getFunction()))
     341             :     return false;
     342             : 
     343        2238 :   const R600Subtarget &ST = Fn.getSubtarget<R600Subtarget>();
     344        2238 :   TII = ST.getInstrInfo();
     345        2238 :   MRI = &Fn.getRegInfo();
     346             : 
     347             :   for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
     348        4653 :        MBB != MBBe; ++MBB) {
     349             :     MachineBasicBlock *MB = &*MBB;
     350        2415 :     PreviousRegSeq.clear();
     351        2415 :     PreviousRegSeqByReg.clear();
     352        2415 :     PreviousRegSeqByUndefCount.clear();
     353             : 
     354        2415 :     for (MachineBasicBlock::iterator MII = MB->begin(), MIIE = MB->end();
     355       62475 :          MII != MIIE; ++MII) {
     356             :       MachineInstr &MI = *MII;
     357      120120 :       if (MI.getOpcode() != R600::REG_SEQUENCE) {
     358      115260 :         if (TII->get(MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST) {
     359         276 :           unsigned Reg = MI.getOperand(1).getReg();
     360             :           for (MachineRegisterInfo::def_instr_iterator
     361         276 :                It = MRI->def_instr_begin(Reg), E = MRI->def_instr_end();
     362         552 :                It != E; ++It) {
     363         276 :             RemoveMI(&(*It));
     364             :           }
     365             :         }
     366      117390 :         continue;
     367             :       }
     368             : 
     369        2730 :       RegSeqInfo RSI(*MRI, &MI);
     370             : 
     371             :       // All uses of MI are swizzeable ?
     372        2430 :       unsigned Reg = MI.getOperand(0).getReg();
     373        2430 :       if (!areAllUsesSwizzeable(Reg))
     374        4255 :         continue;
     375             : 
     376             :       LLVM_DEBUG({
     377             :         dbgs() << "Trying to optimize ";
     378             :         MI.dump();
     379             :       });
     380             : 
     381         300 :       RegSeqInfo CandidateRSI;
     382             :       std::vector<std::pair<unsigned, unsigned>> RemapChan;
     383             :       LLVM_DEBUG(dbgs() << "Using common slots...\n";);
     384         309 :       if (tryMergeUsingCommonSlot(RSI, CandidateRSI, RemapChan)) {
     385             :         // Remove CandidateRSI mapping
     386           4 :         RemoveMI(CandidateRSI.Instr);
     387           4 :         MII = RebuildVector(&RSI, &CandidateRSI, RemapChan);
     388           4 :         trackRSI(RSI);
     389           4 :         continue;
     390             :       }
     391             :       LLVM_DEBUG(dbgs() << "Using free slots...\n";);
     392             :       RemapChan.clear();
     393         302 :       if (tryMergeUsingFreeSlot(RSI, CandidateRSI, RemapChan)) {
     394           1 :         RemoveMI(CandidateRSI.Instr);
     395           1 :         MII = RebuildVector(&RSI, &CandidateRSI, RemapChan);
     396           1 :         trackRSI(RSI);
     397           1 :         continue;
     398             :       }
     399             :       //Failed to merge
     400         300 :       trackRSI(RSI);
     401             :     }
     402             :   }
     403             :   return false;
     404             : }
     405             : 
     406         278 : llvm::FunctionPass *llvm::createR600VectorRegMerger() {
     407         278 :   return new R600VectorRegMerger();
     408             : }

Generated by: LCOV version 1.13