LCOV - code coverage report
Current view: top level - lib/Target/AMDGPU - R600Packetizer.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 185 189 97.9 %
Date: 2017-09-14 15:23:50 Functions: 16 19 84.2 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===----- R600Packetizer.cpp - VLIW packetizer ---------------------------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : /// \file
      11             : /// This pass implements instructions packetization for R600. It unsets isLast
      12             : /// bit of instructions inside a bundle and substitutes src register with
      13             : /// PreviousVector when applicable.
      14             : //
      15             : //===----------------------------------------------------------------------===//
      16             : 
      17             : #include "AMDGPU.h"
      18             : #include "AMDGPUSubtarget.h"
      19             : #include "R600InstrInfo.h"
      20             : #include "llvm/CodeGen/DFAPacketizer.h"
      21             : #include "llvm/CodeGen/MachineDominators.h"
      22             : #include "llvm/CodeGen/MachineFunctionPass.h"
      23             : #include "llvm/CodeGen/MachineLoopInfo.h"
      24             : #include "llvm/CodeGen/Passes.h"
      25             : #include "llvm/CodeGen/ScheduleDAG.h"
      26             : #include "llvm/Support/Debug.h"
      27             : #include "llvm/Support/raw_ostream.h"
      28             : 
      29             : using namespace llvm;
      30             : 
      31             : #define DEBUG_TYPE "packets"
      32             : 
      33             : namespace {
      34             : 
      35         243 : class R600Packetizer : public MachineFunctionPass {
      36             : 
      37             : public:
      38             :   static char ID;
      39         244 :   R600Packetizer() : MachineFunctionPass(ID) {}
      40             : 
      41         244 :   void getAnalysisUsage(AnalysisUsage &AU) const override {
      42         244 :     AU.setPreservesCFG();
      43         244 :     AU.addRequired<MachineDominatorTree>();
      44         244 :     AU.addPreserved<MachineDominatorTree>();
      45         244 :     AU.addRequired<MachineLoopInfo>();
      46         244 :     AU.addPreserved<MachineLoopInfo>();
      47         244 :     MachineFunctionPass::getAnalysisUsage(AU);
      48         244 :   }
      49             : 
      50         244 :   StringRef getPassName() const override { return "R600 Packetizer"; }
      51             : 
      52             :   bool runOnMachineFunction(MachineFunction &Fn) override;
      53             : };
      54             : 
      55        2057 : class R600PacketizerList : public VLIWPacketizerList {
      56             : private:
      57             :   const R600InstrInfo *TII;
      58             :   const R600RegisterInfo &TRI;
      59             :   bool VLIW5;
      60             :   bool ConsideredInstUsesAlreadyWrittenVectorElement;
      61             : 
      62             :   unsigned getSlot(const MachineInstr &MI) const {
      63      252613 :     return TRI.getHWRegChan(MI.getOperand(0).getReg());
      64             :   }
      65             : 
      66             :   /// \returns register to PV chan mapping for bundle/single instructions that
      67             :   /// immediately precedes I.
      68       44424 :   DenseMap<unsigned, unsigned> getPreviousVector(MachineBasicBlock::iterator I)
      69             :       const {
      70       44424 :     DenseMap<unsigned, unsigned> Result;
      71       88848 :     I--;
      72      193492 :     if (!TII->isALUInstr(I->getOpcode()) && !I->isBundle())
      73             :       return Result;
      74       39081 :     MachineBasicBlock::instr_iterator BI = I.getInstrIterator();
      75       78162 :     if (I->isBundle())
      76       24767 :       BI++;
      77             :     int LastDstChan = -1;
      78             :     do {
      79       96909 :       bool isTrans = false;
      80      193818 :       int BISlot = getSlot(*BI);
      81       96909 :       if (LastDstChan >= BISlot)
      82       17425 :         isTrans = true;
      83       96909 :       LastDstChan = BISlot;
      84      193818 :       if (TII->isPredicated(*BI))
      85       25124 :         continue;
      86      289401 :       int OperandIdx = TII->getOperandIdx(BI->getOpcode(), AMDGPU::OpName::write);
      87      240859 :       if (OperandIdx > -1 && BI->getOperand(OperandIdx).getImm() == 0)
      88         474 :         continue;
      89      287979 :       int DstIdx = TII->getOperandIdx(BI->getOpcode(), AMDGPU::OpName::dst);
      90      100074 :       if (DstIdx == -1) {
      91        4081 :         continue;
      92             :       }
      93      183824 :       unsigned Dst = BI->getOperand(DstIdx).getReg();
      94      184611 :       if (isTrans || TII->isTransOnly(*BI)) {
      95       18132 :         Result[Dst] = AMDGPU::PS;
      96       18132 :         continue;
      97             :       }
      98      221356 :       if (BI->getOpcode() == AMDGPU::DOT4_r600 ||
      99       73772 :           BI->getOpcode() == AMDGPU::DOT4_eg) {
     100          24 :         Result[Dst] = AMDGPU::PV_X;
     101          24 :         continue;
     102             :       }
     103       75285 :       if (Dst == AMDGPU::OQAP) {
     104        1529 :         continue;
     105             :       }
     106       72227 :       unsigned PVReg = 0;
     107       72227 :       switch (TRI.getHWRegChan(Dst)) {
     108             :       case 0:
     109             :         PVReg = AMDGPU::PV_X;
     110             :         break;
     111       14162 :       case 1:
     112       14162 :         PVReg = AMDGPU::PV_Y;
     113       14162 :         break;
     114       15355 :       case 2:
     115       15355 :         PVReg = AMDGPU::PV_Z;
     116       15355 :         break;
     117       27984 :       case 3:
     118       27984 :         PVReg = AMDGPU::PV_W;
     119       27984 :         break;
     120           0 :       default:
     121           0 :         llvm_unreachable("Invalid Chan");
     122             :       }
     123       72227 :       Result[Dst] = PVReg;
     124      290727 :     } while ((++BI)->isBundledWithPred());
     125             :     return Result;
     126             :   }
     127             : 
     128       40915 :   void substitutePV(MachineInstr &MI, const DenseMap<unsigned, unsigned> &PVs)
     129             :       const {
     130       40915 :     unsigned Ops[] = {
     131             :       AMDGPU::OpName::src0,
     132             :       AMDGPU::OpName::src1,
     133             :       AMDGPU::OpName::src2
     134             :     };
     135      163660 :     for (unsigned i = 0; i < 3; i++) {
     136      245490 :       int OperandIdx = TII->getOperandIdx(MI.getOpcode(), Ops[i]);
     137      122745 :       if (OperandIdx < 0)
     138       41786 :         continue;
     139      161918 :       unsigned Src = MI.getOperand(OperandIdx).getReg();
     140       80959 :       const DenseMap<unsigned, unsigned>::const_iterator It = PVs.find(Src);
     141       80959 :       if (It != PVs.end())
     142       44562 :         MI.getOperand(OperandIdx).setReg(It->second);
     143             :     }
     144       40915 :   }
     145             : public:
     146             :   // Ctor.
     147             :   R600PacketizerList(MachineFunction &MF, const R600Subtarget &ST,
     148             :                      MachineLoopInfo &MLI)
     149        2057 :       : VLIWPacketizerList(MF, MLI, nullptr),
     150        2057 :         TII(ST.getInstrInfo()),
     151        6171 :         TRI(TII->getRegisterInfo()) {
     152        2057 :     VLIW5 = !ST.hasCaymanISA();
     153             :   }
     154             : 
     155             :   // initPacketizerState - initialize some internal flags.
     156       58394 :   void initPacketizerState() override {
     157       58394 :     ConsideredInstUsesAlreadyWrittenVectorElement = false;
     158       58394 :   }
     159             : 
     160             :   // ignorePseudoInstruction - Ignore bundling of pseudo instructions.
     161       44424 :   bool ignorePseudoInstruction(const MachineInstr &MI,
     162             :                                const MachineBasicBlock *MBB) override {
     163       44424 :     return false;
     164             :   }
     165             : 
     166             :   // isSoloInstruction - return true if instruction MI can not be packetized
     167             :   // with any other instruction, which means that MI itself is a packet.
     168       58394 :   bool isSoloInstruction(const MachineInstr &MI) override {
     169       58394 :     if (TII->isVector(MI))
     170             :       return true;
     171      116788 :     if (!TII->isALUInstr(MI.getOpcode()))
     172             :       return true;
     173       95474 :     if (MI.getOpcode() == AMDGPU::GROUP_BARRIER)
     174             :       return true;
     175             :     // XXX: This can be removed once the packetizer properly handles all the
     176             :     // LDS instruction group restrictions.
     177       47733 :     return TII->isLDSInstr(MI.getOpcode());
     178             :   }
     179             : 
     180             :   // isLegalToPacketizeTogether - Is it legal to packetize SUI and SUJ
     181             :   // together.
     182       51929 :   bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) override {
     183       51929 :     MachineInstr *MII = SUI->getInstr(), *MIJ = SUJ->getInstr();
     184      155787 :     if (getSlot(*MII) == getSlot(*MIJ))
     185       11385 :       ConsideredInstUsesAlreadyWrittenVectorElement = true;
     186             :     // Does MII and MIJ share the same pred_sel ?
     187      103858 :     int OpI = TII->getOperandIdx(MII->getOpcode(), AMDGPU::OpName::pred_sel),
     188      103858 :         OpJ = TII->getOperandIdx(MIJ->getOpcode(), AMDGPU::OpName::pred_sel);
     189      103858 :     unsigned PredI = (OpI > -1)?MII->getOperand(OpI).getReg():0,
     190      103858 :         PredJ = (OpJ > -1)?MIJ->getOperand(OpJ).getReg():0;
     191       51929 :     if (PredI != PredJ)
     192             :       return false;
     193       51893 :     if (SUJ->isSucc(SUI)) {
     194       47595 :       for (unsigned i = 0, e = SUJ->Succs.size(); i < e; ++i) {
     195       62550 :         const SDep &Dep = SUJ->Succs[i];
     196       31275 :         if (Dep.getSUnit() != SUI)
     197       20599 :           continue;
     198       10676 :         if (Dep.getKind() == SDep::Anti)
     199        5652 :           continue;
     200        5024 :         if (Dep.getKind() == SDep::Output)
     201         738 :           if (MII->getOperand(0).getReg() != MIJ->getOperand(0).getReg())
     202          12 :             continue;
     203             :         return false;
     204             :       }
     205             :     }
     206             : 
     207             :     bool ARDef =
     208       46881 :         TII->definesAddressRegister(*MII) || TII->definesAddressRegister(*MIJ);
     209             :     bool ARUse =
     210       46881 :         TII->usesAddressRegister(*MII) || TII->usesAddressRegister(*MIJ);
     211             : 
     212       46881 :     return !ARDef || !ARUse;
     213             :   }
     214             : 
     215             :   // isLegalToPruneDependencies - Is it legal to prune dependece between SUI
     216             :   // and SUJ.
     217        5153 :   bool isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) override {
     218        5153 :     return false;
     219             :   }
     220             : 
     221             :   void setIsLastBit(MachineInstr *MI, unsigned Bit) const {
     222       45730 :     unsigned LastOp = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::last);
     223       68595 :     MI->getOperand(LastOp).setImm(Bit);
     224             :   }
     225             : 
     226       44424 :   bool isBundlableWithCurrentPMI(MachineInstr &MI,
     227             :                                  const DenseMap<unsigned, unsigned> &PV,
     228             :                                  std::vector<R600InstrInfo::BankSwizzle> &BS,
     229             :                                  bool &isTransSlot) {
     230       44424 :     isTransSlot = TII->isTransOnly(MI);
     231             :     assert (!isTransSlot || VLIW5);
     232             : 
     233             :     // Is the dst reg sequence legal ?
     234       87901 :     if (!isTransSlot && !CurrentPacketMIs.empty()) {
     235      103692 :       if (getSlot(MI) <= getSlot(*CurrentPacketMIs.back())) {
     236       19978 :         if (ConsideredInstUsesAlreadyWrittenVectorElement &&
     237       19687 :             !TII->isVectorOnly(MI) && VLIW5) {
     238        8314 :           isTransSlot = true;
     239             :           DEBUG({
     240             :             dbgs() << "Considering as Trans Inst :";
     241             :             MI.dump();
     242             :           });
     243             :         }
     244             :         else
     245             :           return false;
     246             :       }
     247             :     }
     248             : 
     249             :     // Are the Constants limitations met ?
     250       83498 :     CurrentPacketMIs.push_back(&MI);
     251       41749 :     if (!TII->fitsConstReadLimitations(CurrentPacketMIs)) {
     252             :       DEBUG({
     253             :         dbgs() << "Couldn't pack :\n";
     254             :         MI.dump();
     255             :         dbgs() << "with the following packets :\n";
     256             :         for (unsigned i = 0, e = CurrentPacketMIs.size() - 1; i < e; i++) {
     257             :           CurrentPacketMIs[i]->dump();
     258             :           dbgs() << "\n";
     259             :         }
     260             :         dbgs() << "because of Consts read limitations\n";
     261             :       });
     262         321 :       CurrentPacketMIs.pop_back();
     263         321 :       return false;
     264             :     }
     265             : 
     266             :     // Is there a BankSwizzle set that meet Read Port limitations ?
     267       82856 :     if (!TII->fitsReadPortLimitations(CurrentPacketMIs,
     268       41428 :             PV, BS, isTransSlot)) {
     269             :       DEBUG({
     270             :         dbgs() << "Couldn't pack :\n";
     271             :         MI.dump();
     272             :         dbgs() << "with the following packets :\n";
     273             :         for (unsigned i = 0, e = CurrentPacketMIs.size() - 1; i < e; i++) {
     274             :           CurrentPacketMIs[i]->dump();
     275             :           dbgs() << "\n";
     276             :         }
     277             :         dbgs() << "because of Read port limitations\n";
     278             :       });
     279         513 :       CurrentPacketMIs.pop_back();
     280         513 :       return false;
     281             :     }
     282             : 
     283             :     // We cannot read LDS source registers from the Trans slot.
     284       40915 :     if (isTransSlot && TII->readsLDSSrcReg(MI))
     285             :       return false;
     286             : 
     287       40915 :     CurrentPacketMIs.pop_back();
     288       40915 :     return true;
     289             :   }
     290             : 
     291       44424 :   MachineBasicBlock::iterator addToPacket(MachineInstr &MI) override {
     292             :     MachineBasicBlock::iterator FirstInBundle =
     293      159646 :         CurrentPacketMIs.empty() ? &MI : CurrentPacketMIs.front();
     294             :     const DenseMap<unsigned, unsigned> &PV =
     295       88848 :         getPreviousVector(FirstInBundle);
     296       88848 :     std::vector<R600InstrInfo::BankSwizzle> BS;
     297             :     bool isTransSlot;
     298             : 
     299       44424 :     if (isBundlableWithCurrentPMI(MI, PV, BS, isTransSlot)) {
     300      120927 :       for (unsigned i = 0, e = CurrentPacketMIs.size(); i < e; i++) {
     301       78194 :         MachineInstr *MI = CurrentPacketMIs[i];
     302       78194 :         unsigned Op = TII->getOperandIdx(MI->getOpcode(),
     303       39097 :             AMDGPU::OpName::bank_swizzle);
     304      156388 :         MI->getOperand(Op).setImm(BS[i]);
     305             :       }
     306             :       unsigned Op =
     307       81830 :           TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::bank_swizzle);
     308      163660 :       MI.getOperand(Op).setImm(BS.back());
     309       81830 :       if (!CurrentPacketMIs.empty())
     310       45730 :         setIsLastBit(CurrentPacketMIs.back(), 0);
     311       40915 :       substitutePV(MI, PV);
     312       40915 :       MachineBasicBlock::iterator It = VLIWPacketizerList::addToPacket(MI);
     313       40915 :       if (isTransSlot) {
     314       26742 :         endPacket(std::next(It)->getParent(), std::next(It));
     315             :       }
     316       40915 :       return It;
     317             :     }
     318        7018 :     endPacket(MI.getParent(), MI);
     319        3509 :     if (TII->isTransOnly(MI))
     320          10 :       return MI;
     321        3499 :     return VLIWPacketizerList::addToPacket(MI);
     322             :   }
     323             : };
     324             : 
     325        2057 : bool R600Packetizer::runOnMachineFunction(MachineFunction &Fn) {
     326        2057 :   const R600Subtarget &ST = Fn.getSubtarget<R600Subtarget>();
     327        2057 :   const R600InstrInfo *TII = ST.getInstrInfo();
     328             : 
     329        2057 :   MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
     330             : 
     331             :   // Instantiate the packetizer.
     332        4114 :   R600PacketizerList Packetizer(Fn, ST, MLI);
     333             : 
     334             :   // DFA state table should not be empty.
     335             :   assert(Packetizer.getResourceTracker() && "Empty DFA table!");
     336             : 
     337        2057 :   if (Packetizer.getResourceTracker()->getInstrItins()->isEmpty())
     338             :     return false;
     339             : 
     340             :   //
     341             :   // Loop over all basic blocks and remove KILL pseudo-instructions
     342             :   // These instructions confuse the dependence analysis. Consider:
     343             :   // D0 = ...   (Insn 0)
     344             :   // R0 = KILL R0, D0 (Insn 1)
     345             :   // R0 = ... (Insn 2)
     346             :   // Here, Insn 1 will result in the dependence graph not emitting an output
     347             :   // dependence between Insn 0 and Insn 2. This can lead to incorrect
     348             :   // packetization
     349             :   //
     350        4112 :   for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
     351        4112 :        MBB != MBBe; ++MBB) {
     352        4112 :     MachineBasicBlock::iterator End = MBB->end();
     353        4112 :     MachineBasicBlock::iterator MI = MBB->begin();
     354       60486 :     while (MI != End) {
     355      292133 :       if (MI->isKill() || MI->getOpcode() == AMDGPU::IMPLICIT_DEF ||
     356       61861 :           (MI->getOpcode() == AMDGPU::CF_ALU && !MI->getOperand(8).getImm())) {
     357          17 :         MachineBasicBlock::iterator DeleteMI = MI;
     358          17 :         ++MI;
     359          17 :         MBB->erase(DeleteMI);
     360          34 :         End = MBB->end();
     361          17 :         continue;
     362             :       }
     363             :       ++MI;
     364             :     }
     365             :   }
     366             : 
     367             :   // Loop over all of the basic blocks.
     368        4112 :   for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
     369        4112 :        MBB != MBBe; ++MBB) {
     370             :     // Find scheduling regions and schedule / packetize each region.
     371        2056 :     unsigned RemainingCount = MBB->size();
     372             :     for(MachineBasicBlock::iterator RegionEnd = MBB->end();
     373       12330 :         RegionEnd != MBB->begin();) {
     374             :       // The next region starts above the previous region. Look backward in the
     375             :       // instruction stream until we find the nearest boundary.
     376        2054 :       MachineBasicBlock::iterator I = RegionEnd;
     377        6162 :       for(;I != MBB->begin(); --I, --RemainingCount) {
     378        8216 :         if (TII->isSchedulingBoundary(*std::prev(I), &*MBB, Fn))
     379             :           break;
     380             :       }
     381        4108 :       I = MBB->begin();
     382             : 
     383             :       // Skip empty scheduling regions.
     384        2054 :       if (I == RegionEnd) {
     385           0 :         RegionEnd = std::prev(RegionEnd);
     386           0 :         --RemainingCount;
     387          19 :         continue;
     388             :       }
     389             :       // Skip regions with one instruction.
     390        4127 :       if (I == std::prev(RegionEnd)) {
     391          19 :         RegionEnd = std::prev(RegionEnd);
     392          19 :         continue;
     393             :       }
     394             : 
     395        6105 :       Packetizer.PacketizeMIs(&*MBB, &*I, RegionEnd);
     396        2035 :       RegionEnd = I;
     397             :     }
     398             :   }
     399             : 
     400             :   return true;
     401             : 
     402             : }
     403             : 
     404             : } // end anonymous namespace
     405             : 
     406       53042 : INITIALIZE_PASS_BEGIN(R600Packetizer, DEBUG_TYPE,
     407             :                      "R600 Packetizer", false, false)
     408      312538 : INITIALIZE_PASS_END(R600Packetizer, DEBUG_TYPE,
     409             :                     "R600 Packetizer", false, false)
     410             : 
     411             : char R600Packetizer::ID = 0;
     412             : 
     413             : char &llvm::R600PacketizerID = R600Packetizer::ID;
     414             : 
     415         244 : llvm::FunctionPass *llvm::createR600Packetizer() {
     416         488 :   return new R600Packetizer();
     417             : }

Generated by: LCOV version 1.13