LCOV - code coverage report
Current view: top level - lib/Target/AMDGPU - R600Packetizer.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 130 148 87.8 %
Date: 2018-10-20 13:21:21 Functions: 14 17 82.4 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===----- R600Packetizer.cpp - VLIW packetizer ---------------------------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : /// \file
      11             : /// This pass implements instructions packetization for R600. It unsets isLast
      12             : /// bit of instructions inside a bundle and substitutes src register with
      13             : /// PreviousVector when applicable.
      14             : //
      15             : //===----------------------------------------------------------------------===//
      16             : 
      17             : #include "AMDGPU.h"
      18             : #include "AMDGPUSubtarget.h"
      19             : #include "R600InstrInfo.h"
      20             : #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
      21             : #include "llvm/CodeGen/DFAPacketizer.h"
      22             : #include "llvm/CodeGen/MachineDominators.h"
      23             : #include "llvm/CodeGen/MachineFunctionPass.h"
      24             : #include "llvm/CodeGen/MachineLoopInfo.h"
      25             : #include "llvm/CodeGen/Passes.h"
      26             : #include "llvm/CodeGen/ScheduleDAG.h"
      27             : #include "llvm/Support/Debug.h"
      28             : #include "llvm/Support/raw_ostream.h"
      29             : 
      30             : using namespace llvm;
      31             : 
      32             : #define DEBUG_TYPE "packets"
      33             : 
      34             : namespace {
      35             : 
      36             : class R600Packetizer : public MachineFunctionPass {
      37             : 
      38             : public:
      39             :   static char ID;
      40         282 :   R600Packetizer() : MachineFunctionPass(ID) {}
      41             : 
      42         282 :   void getAnalysisUsage(AnalysisUsage &AU) const override {
      43         282 :     AU.setPreservesCFG();
      44             :     AU.addRequired<MachineDominatorTree>();
      45             :     AU.addPreserved<MachineDominatorTree>();
      46             :     AU.addRequired<MachineLoopInfo>();
      47             :     AU.addPreserved<MachineLoopInfo>();
      48         282 :     MachineFunctionPass::getAnalysisUsage(AU);
      49         282 :   }
      50             : 
      51         282 :   StringRef getPassName() const override { return "R600 Packetizer"; }
      52             : 
      53             :   bool runOnMachineFunction(MachineFunction &Fn) override;
      54             : };
      55             : 
      56        2297 : class R600PacketizerList : public VLIWPacketizerList {
      57             : private:
      58             :   const R600InstrInfo *TII;
      59             :   const R600RegisterInfo &TRI;
      60             :   bool VLIW5;
      61             :   bool ConsideredInstUsesAlreadyWrittenVectorElement;
      62             : 
      63           0 :   unsigned getSlot(const MachineInstr &MI) const {
      64           0 :     return TRI.getHWRegChan(MI.getOperand(0).getReg());
      65             :   }
      66             : 
      67             :   /// \returns register to PV chan mapping for bundle/single instructions that
      68             :   /// immediately precedes I.
      69       46396 :   DenseMap<unsigned, unsigned> getPreviousVector(MachineBasicBlock::iterator I)
      70             :       const {
      71             :     DenseMap<unsigned, unsigned> Result;
      72             :     I--;
      73      139188 :     if (!TII->isALUInstr(I->getOpcode()) && !I->isBundle())
      74             :       return Result;
      75             :     MachineBasicBlock::instr_iterator BI = I.getInstrIterator();
      76       40574 :     if (I->isBundle())
      77             :       BI++;
      78             :     int LastDstChan = -1;
      79             :     do {
      80             :       bool isTrans = false;
      81       99365 :       int BISlot = getSlot(*BI);
      82       99365 :       if (LastDstChan >= BISlot)
      83             :         isTrans = true;
      84             :       LastDstChan = BISlot;
      85       99365 :       if (TII->isPredicated(*BI))
      86       25424 :         continue;
      87      197846 :       int OperandIdx = TII->getOperandIdx(BI->getOpcode(), R600::OpName::write);
      88       98923 :       if (OperandIdx > -1 && BI->getOperand(OperandIdx).getImm() == 0)
      89             :         continue;
      90      196646 :       int DstIdx = TII->getOperandIdx(BI->getOpcode(), R600::OpName::dst);
      91       98323 :       if (DstIdx == -1) {
      92             :         continue;
      93             :       }
      94       94141 :       unsigned Dst = BI->getOperand(DstIdx).getReg();
      95       94141 :       if (isTrans || TII->isTransOnly(*BI)) {
      96       18516 :         Result[Dst] = R600::PS;
      97       18516 :         continue;
      98             :       }
      99      151250 :       if (BI->getOpcode() == R600::DOT4_r600 ||
     100             :           BI->getOpcode() == R600::DOT4_eg) {
     101          24 :         Result[Dst] = R600::PV_X;
     102          24 :         continue;
     103             :       }
     104       75601 :       if (Dst == R600::OQAP) {
     105             :         continue;
     106             :       }
     107             :       unsigned PVReg = 0;
     108       73941 :       switch (TRI.getHWRegChan(Dst)) {
     109             :       case 0:
     110             :         PVReg = R600::PV_X;
     111             :         break;
     112             :       case 1:
     113             :         PVReg = R600::PV_Y;
     114             :         break;
     115             :       case 2:
     116             :         PVReg = R600::PV_Z;
     117             :         break;
     118             :       case 3:
     119             :         PVReg = R600::PV_W;
     120             :         break;
     121           0 :       default:
     122           0 :         llvm_unreachable("Invalid Chan");
     123             :       }
     124       73941 :       Result[Dst] = PVReg;
     125       99365 :     } while ((++BI)->isBundledWithPred());
     126             :     return Result;
     127             :   }
     128             : 
     129           0 :   void substitutePV(MachineInstr &MI, const DenseMap<unsigned, unsigned> &PVs)
     130             :       const {
     131           0 :     unsigned Ops[] = {
     132             :       R600::OpName::src0,
     133             :       R600::OpName::src1,
     134             :       R600::OpName::src2
     135             :     };
     136           0 :     for (unsigned i = 0; i < 3; i++) {
     137           0 :       int OperandIdx = TII->getOperandIdx(MI.getOpcode(), Ops[i]);
     138           0 :       if (OperandIdx < 0)
     139           0 :         continue;
     140           0 :       unsigned Src = MI.getOperand(OperandIdx).getReg();
     141           0 :       const DenseMap<unsigned, unsigned>::const_iterator It = PVs.find(Src);
     142           0 :       if (It != PVs.end())
     143           0 :         MI.getOperand(OperandIdx).setReg(It->second);
     144             :     }
     145           0 :   }
     146             : public:
     147             :   // Ctor.
     148             :   R600PacketizerList(MachineFunction &MF, const R600Subtarget &ST,
     149             :                      MachineLoopInfo &MLI)
     150        2297 :       : VLIWPacketizerList(MF, MLI, nullptr),
     151             :         TII(ST.getInstrInfo()),
     152        2297 :         TRI(TII->getRegisterInfo()) {
     153        2297 :     VLIW5 = !ST.hasCaymanISA();
     154             :   }
     155             : 
     156             :   // initPacketizerState - initialize some internal flags.
     157       61510 :   void initPacketizerState() override {
     158       61510 :     ConsideredInstUsesAlreadyWrittenVectorElement = false;
     159       61510 :   }
     160             : 
     161             :   // ignorePseudoInstruction - Ignore bundling of pseudo instructions.
     162       46396 :   bool ignorePseudoInstruction(const MachineInstr &MI,
     163             :                                const MachineBasicBlock *MBB) override {
     164       46396 :     return false;
     165             :   }
     166             : 
     167             :   // isSoloInstruction - return true if instruction MI can not be packetized
     168             :   // with any other instruction, which means that MI itself is a packet.
     169       61510 :   bool isSoloInstruction(const MachineInstr &MI) override {
     170       61510 :     if (TII->isVector(MI))
     171             :       return true;
     172      123020 :     if (!TII->isALUInstr(MI.getOpcode()))
     173             :       return true;
     174       99746 :     if (MI.getOpcode() == R600::GROUP_BARRIER)
     175             :       return true;
     176             :     // XXX: This can be removed once the packetizer properly handles all the
     177             :     // LDS instruction group restrictions.
     178       49869 :     return TII->isLDSInstr(MI.getOpcode());
     179             :   }
     180             : 
     181             :   // isLegalToPacketizeTogether - Is it legal to packetize SUI and SUJ
     182             :   // together.
     183       53553 :   bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) override {
     184       53553 :     MachineInstr *MII = SUI->getInstr(), *MIJ = SUJ->getInstr();
     185       53553 :     if (getSlot(*MII) == getSlot(*MIJ))
     186       12006 :       ConsideredInstUsesAlreadyWrittenVectorElement = true;
     187             :     // Does MII and MIJ share the same pred_sel ?
     188      107106 :     int OpI = TII->getOperandIdx(MII->getOpcode(), R600::OpName::pred_sel),
     189      107106 :         OpJ = TII->getOperandIdx(MIJ->getOpcode(), R600::OpName::pred_sel);
     190       53553 :     unsigned PredI = (OpI > -1)?MII->getOperand(OpI).getReg():0,
     191       53553 :         PredJ = (OpJ > -1)?MIJ->getOperand(OpJ).getReg():0;
     192       53553 :     if (PredI != PredJ)
     193             :       return false;
     194       53517 :     if (SUJ->isSucc(SUI)) {
     195       37977 :       for (unsigned i = 0, e = SUJ->Succs.size(); i < e; ++i) {
     196       32176 :         const SDep &Dep = SUJ->Succs[i];
     197       32176 :         if (Dep.getSUnit() != SUI)
     198             :           continue;
     199       11214 :         if (Dep.getKind() == SDep::Anti)
     200             :           continue;
     201        5415 :         if (Dep.getKind() == SDep::Output)
     202         856 :           if (MII->getOperand(0).getReg() != MIJ->getOperand(0).getReg())
     203             :             continue;
     204             :         return false;
     205             :       }
     206             :     }
     207             : 
     208             :     bool ARDef =
     209       48110 :         TII->definesAddressRegister(*MII) || TII->definesAddressRegister(*MIJ);
     210             :     bool ARUse =
     211       48110 :         TII->usesAddressRegister(*MII) || TII->usesAddressRegister(*MIJ);
     212             : 
     213       48110 :     return !ARDef || !ARUse;
     214             :   }
     215             : 
     216             :   // isLegalToPruneDependencies - Is it legal to prune dependece between SUI
     217             :   // and SUJ.
     218        5580 :   bool isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) override {
     219        5580 :     return false;
     220             :   }
     221             : 
     222           0 :   void setIsLastBit(MachineInstr *MI, unsigned Bit) const {
     223       23509 :     unsigned LastOp = TII->getOperandIdx(MI->getOpcode(), R600::OpName::last);
     224       23509 :     MI->getOperand(LastOp).setImm(Bit);
     225           0 :   }
     226             : 
     227       46396 :   bool isBundlableWithCurrentPMI(MachineInstr &MI,
     228             :                                  const DenseMap<unsigned, unsigned> &PV,
     229             :                                  std::vector<R600InstrInfo::BankSwizzle> &BS,
     230             :                                  bool &isTransSlot) {
     231       46396 :     isTransSlot = TII->isTransOnly(MI);
     232             :     assert (!isTransSlot || VLIW5);
     233             : 
     234             :     // Is the dst reg sequence legal ?
     235       46396 :     if (!isTransSlot && !CurrentPacketMIs.empty()) {
     236       26774 :       if (getSlot(MI) <= getSlot(*CurrentPacketMIs.back())) {
     237        9383 :         if (ConsideredInstUsesAlreadyWrittenVectorElement &&
     238       20581 :             !TII->isVectorOnly(MI) && VLIW5) {
     239        8650 :           isTransSlot = true;
     240             :           LLVM_DEBUG({
     241             :             dbgs() << "Considering as Trans Inst :";
     242             :             MI.dump();
     243             :           });
     244             :         }
     245             :         else
     246        2857 :           return false;
     247             :       }
     248             :     }
     249             : 
     250             :     // Are the Constants limitations met ?
     251       43539 :     CurrentPacketMIs.push_back(&MI);
     252       43539 :     if (!TII->fitsConstReadLimitations(CurrentPacketMIs)) {
     253             :       LLVM_DEBUG({
     254             :         dbgs() << "Couldn't pack :\n";
     255             :         MI.dump();
     256             :         dbgs() << "with the following packets :\n";
     257             :         for (unsigned i = 0, e = CurrentPacketMIs.size() - 1; i < e; i++) {
     258             :           CurrentPacketMIs[i]->dump();
     259             :           dbgs() << "\n";
     260             :         }
     261             :         dbgs() << "because of Consts read limitations\n";
     262             :       });
     263             :       CurrentPacketMIs.pop_back();
     264         346 :       return false;
     265             :     }
     266             : 
     267             :     // Is there a BankSwizzle set that meet Read Port limitations ?
     268       43193 :     if (!TII->fitsReadPortLimitations(CurrentPacketMIs,
     269       43193 :             PV, BS, isTransSlot)) {
     270             :       LLVM_DEBUG({
     271             :         dbgs() << "Couldn't pack :\n";
     272             :         MI.dump();
     273             :         dbgs() << "with the following packets :\n";
     274             :         for (unsigned i = 0, e = CurrentPacketMIs.size() - 1; i < e; i++) {
     275             :           CurrentPacketMIs[i]->dump();
     276             :           dbgs() << "\n";
     277             :         }
     278             :         dbgs() << "because of Read port limitations\n";
     279             :       });
     280             :       CurrentPacketMIs.pop_back();
     281         521 :       return false;
     282             :     }
     283             : 
     284             :     // We cannot read LDS source registers from the Trans slot.
     285       42672 :     if (isTransSlot && TII->readsLDSSrcReg(MI))
     286             :       return false;
     287             : 
     288             :     CurrentPacketMIs.pop_back();
     289       42672 :     return true;
     290             :   }
     291             : 
     292       46396 :   MachineBasicBlock::iterator addToPacket(MachineInstr &MI) override {
     293             :     MachineBasicBlock::iterator FirstInBundle =
     294       46396 :         CurrentPacketMIs.empty() ? &MI : CurrentPacketMIs.front();
     295             :     const DenseMap<unsigned, unsigned> &PV =
     296       46396 :         getPreviousVector(FirstInBundle);
     297             :     std::vector<R600InstrInfo::BankSwizzle> BS;
     298             :     bool isTransSlot;
     299             : 
     300       46396 :     if (isBundlableWithCurrentPMI(MI, PV, BS, isTransSlot)) {
     301      125276 :       for (unsigned i = 0, e = CurrentPacketMIs.size(); i < e; i++) {
     302       39932 :         MachineInstr *MI = CurrentPacketMIs[i];
     303       79864 :         unsigned Op = TII->getOperandIdx(MI->getOpcode(),
     304       39932 :             R600::OpName::bank_swizzle);
     305      119796 :         MI->getOperand(Op).setImm(BS[i]);
     306             :       }
     307             :       unsigned Op =
     308       85344 :           TII->getOperandIdx(MI.getOpcode(), R600::OpName::bank_swizzle);
     309       85344 :       MI.getOperand(Op).setImm(BS.back());
     310       42672 :       if (!CurrentPacketMIs.empty())
     311       23509 :         setIsLastBit(CurrentPacketMIs.back(), 0);
     312       42672 :       substitutePV(MI, PV);
     313       42672 :       MachineBasicBlock::iterator It = VLIWPacketizerList::addToPacket(MI);
     314       42672 :       if (isTransSlot) {
     315        9235 :         endPacket(std::next(It)->getParent(), std::next(It));
     316             :       }
     317       42672 :       return It;
     318             :     }
     319        7448 :     endPacket(MI.getParent(), MI);
     320        3724 :     if (TII->isTransOnly(MI))
     321          11 :       return MI;
     322        3713 :     return VLIWPacketizerList::addToPacket(MI);
     323             :   }
     324             : };
     325             : 
     326        2297 : bool R600Packetizer::runOnMachineFunction(MachineFunction &Fn) {
     327        2297 :   const R600Subtarget &ST = Fn.getSubtarget<R600Subtarget>();
     328             :   const R600InstrInfo *TII = ST.getInstrInfo();
     329             : 
     330        2297 :   MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
     331             : 
     332             :   // Instantiate the packetizer.
     333             :   R600PacketizerList Packetizer(Fn, ST, MLI);
     334             : 
     335             :   // DFA state table should not be empty.
     336             :   assert(Packetizer.getResourceTracker() && "Empty DFA table!");
     337             :   assert(Packetizer.getResourceTracker()->getInstrItins());
     338             : 
     339        2297 :   if (Packetizer.getResourceTracker()->getInstrItins()->isEmpty())
     340             :     return false;
     341             : 
     342             :   //
     343             :   // Loop over all basic blocks and remove KILL pseudo-instructions
     344             :   // These instructions confuse the dependence analysis. Consider:
     345             :   // D0 = ...   (Insn 0)
     346             :   // R0 = KILL R0, D0 (Insn 1)
     347             :   // R0 = ... (Insn 2)
     348             :   // Here, Insn 1 will result in the dependence graph not emitting an output
     349             :   // dependence between Insn 0 and Insn 2. This can lead to incorrect
     350             :   // packetization
     351             :   //
     352             :   for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
     353        4592 :        MBB != MBBe; ++MBB) {
     354             :     MachineBasicBlock::iterator End = MBB->end();
     355             :     MachineBasicBlock::iterator MI = MBB->begin();
     356       63914 :     while (MI != End) {
     357       61618 :       if (MI->isKill() || MI->getOpcode() == R600::IMPLICIT_DEF ||
     358        3781 :           (MI->getOpcode() == R600::CF_ALU && !MI->getOperand(8).getImm())) {
     359          17 :         MachineBasicBlock::iterator DeleteMI = MI;
     360             :         ++MI;
     361          17 :         MBB->erase(DeleteMI);
     362             :         End = MBB->end();
     363             :         continue;
     364             :       }
     365             :       ++MI;
     366             :     }
     367             :   }
     368             : 
     369             :   // Loop over all of the basic blocks.
     370             :   for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
     371        4592 :        MBB != MBBe; ++MBB) {
     372             :     // Find scheduling regions and schedule / packetize each region.
     373             :     unsigned RemainingCount = MBB->size();
     374             :     for(MachineBasicBlock::iterator RegionEnd = MBB->end();
     375        4590 :         RegionEnd != MBB->begin();) {
     376             :       // The next region starts above the previous region. Look backward in the
     377             :       // instruction stream until we find the nearest boundary.
     378        2294 :       MachineBasicBlock::iterator I = RegionEnd;
     379        2294 :       for(;I != MBB->begin(); --I, --RemainingCount) {
     380        2294 :         if (TII->isSchedulingBoundary(*std::prev(I), &*MBB, Fn))
     381             :           break;
     382             :       }
     383        2294 :       I = MBB->begin();
     384             : 
     385             :       // Skip empty scheduling regions.
     386        2294 :       if (I == RegionEnd) {
     387           0 :         RegionEnd = std::prev(RegionEnd);
     388             :         --RemainingCount;
     389          91 :         continue;
     390             :       }
     391             :       // Skip regions with one instruction.
     392        4588 :       if (I == std::prev(RegionEnd)) {
     393          91 :         RegionEnd = std::prev(RegionEnd);
     394          91 :         continue;
     395             :       }
     396             : 
     397        2203 :       Packetizer.PacketizeMIs(&*MBB, &*I, RegionEnd);
     398        2203 :       RegionEnd = I;
     399             :     }
     400             :   }
     401             : 
     402             :   return true;
     403             : 
     404             : }
     405             : 
     406             : } // end anonymous namespace
     407             : 
     408       85105 : INITIALIZE_PASS_BEGIN(R600Packetizer, DEBUG_TYPE,
     409             :                      "R600 Packetizer", false, false)
     410      199024 : INITIALIZE_PASS_END(R600Packetizer, DEBUG_TYPE,
     411             :                     "R600 Packetizer", false, false)
     412             : 
     413             : char R600Packetizer::ID = 0;
     414             : 
     415             : char &llvm::R600PacketizerID = R600Packetizer::ID;
     416             : 
     417         282 : llvm::FunctionPass *llvm::createR600Packetizer() {
     418         282 :   return new R600Packetizer();
     419             : }

Generated by: LCOV version 1.13