LCOV - code coverage report
Current view: top level - lib/Target/AMDGPU - R600ControlFlowFinalizer.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 212 311 68.2 %
Date: 2018-10-20 13:21:21 Functions: 11 20 55.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===- R600ControlFlowFinalizer.cpp - Finalize Control Flow Inst ----------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : /// \file
      11             : /// This pass compute turns all control flow pseudo instructions into native one
      12             : /// computing their address on the fly; it also sets STACK_SIZE info.
      13             : //
      14             : //===----------------------------------------------------------------------===//
      15             : 
      16             : #include "AMDGPU.h"
      17             : #include "AMDGPUSubtarget.h"
      18             : #include "R600Defines.h"
      19             : #include "R600InstrInfo.h"
      20             : #include "R600MachineFunctionInfo.h"
      21             : #include "R600RegisterInfo.h"
      22             : #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
      23             : #include "llvm/ADT/STLExtras.h"
      24             : #include "llvm/ADT/SmallVector.h"
      25             : #include "llvm/ADT/StringRef.h"
      26             : #include "llvm/CodeGen/MachineBasicBlock.h"
      27             : #include "llvm/CodeGen/MachineFunction.h"
      28             : #include "llvm/CodeGen/MachineFunctionPass.h"
      29             : #include "llvm/CodeGen/MachineInstr.h"
      30             : #include "llvm/CodeGen/MachineInstrBuilder.h"
      31             : #include "llvm/CodeGen/MachineOperand.h"
      32             : #include "llvm/IR/CallingConv.h"
      33             : #include "llvm/IR/DebugLoc.h"
      34             : #include "llvm/IR/Function.h"
      35             : #include "llvm/Pass.h"
      36             : #include "llvm/Support/Compiler.h"
      37             : #include "llvm/Support/Debug.h"
      38             : #include "llvm/Support/MathExtras.h"
      39             : #include "llvm/Support/raw_ostream.h"
      40             : #include <algorithm>
      41             : #include <cassert>
      42             : #include <cstdint>
      43             : #include <set>
      44             : #include <utility>
      45             : #include <vector>
      46             : 
      47             : using namespace llvm;
      48             : 
      49             : #define DEBUG_TYPE "r600cf"
      50             : 
      51             : namespace {
      52             : 
      53             : struct CFStack {
      54             :   enum StackItem {
      55             :     ENTRY = 0,
      56             :     SUB_ENTRY = 1,
      57             :     FIRST_NON_WQM_PUSH = 2,
      58             :     FIRST_NON_WQM_PUSH_W_FULL_ENTRY = 3
      59             :   };
      60             : 
      61             :   const R600Subtarget *ST;
      62             :   std::vector<StackItem> BranchStack;
      63             :   std::vector<StackItem> LoopStack;
      64             :   unsigned MaxStackSize;
      65             :   unsigned CurrentEntries = 0;
      66             :   unsigned CurrentSubEntries = 0;
      67             : 
      68        2297 :   CFStack(const R600Subtarget *st, CallingConv::ID cc) : ST(st),
      69             :       // We need to reserve a stack entry for CALL_FS in vertex shaders.
      70        4579 :       MaxStackSize(cc == CallingConv::AMDGPU_VS ? 1 : 0) {}
      71             : 
      72             :   unsigned getLoopDepth();
      73             :   bool branchStackContains(CFStack::StackItem);
      74             :   bool requiresWorkAroundForInst(unsigned Opcode);
      75             :   unsigned getSubEntrySize(CFStack::StackItem Item);
      76             :   void updateMaxStackSize();
      77             :   void pushBranch(unsigned Opcode, bool isWQM = false);
      78             :   void pushLoop();
      79             :   void popBranch();
      80             :   void popLoop();
      81             : };
      82             : 
      83             : unsigned CFStack::getLoopDepth() {
      84           8 :   return LoopStack.size();
      85             : }
      86             : 
      87             : bool CFStack::branchStackContains(CFStack::StackItem Item) {
      88             :   for (std::vector<CFStack::StackItem>::const_iterator I = BranchStack.begin(),
      89          54 :        E = BranchStack.end(); I != E; ++I) {
      90          10 :     if (*I == Item)
      91             :       return true;
      92             :   }
      93             :   return false;
      94             : }
      95             : 
      96        9393 : bool CFStack::requiresWorkAroundForInst(unsigned Opcode) {
      97        9393 :   if (Opcode == R600::CF_ALU_PUSH_BEFORE && ST->hasCaymanISA() &&
      98             :       getLoopDepth() > 1)
      99             :     return true;
     100             : 
     101        9392 :   if (!ST->hasCFAluBug())
     102             :     return false;
     103             : 
     104             :   switch(Opcode) {
     105             :   default: return false;
     106          45 :   case R600::CF_ALU_PUSH_BEFORE:
     107             :   case R600::CF_ALU_ELSE_AFTER:
     108             :   case R600::CF_ALU_BREAK:
     109             :   case R600::CF_ALU_CONTINUE:
     110          45 :     if (CurrentSubEntries == 0)
     111             :       return false;
     112          10 :     if (ST->getWavefrontSize() == 64) {
     113             :       // We are being conservative here.  We only require this work-around if
     114             :       // CurrentSubEntries > 3 &&
     115             :       // (CurrentSubEntries % 4 == 3 || CurrentSubEntries % 4 == 0)
     116             :       //
     117             :       // We have to be conservative, because we don't know for certain that
     118             :       // our stack allocation algorithm for Evergreen/NI is correct.  Applying this
     119             :       // work-around when CurrentSubEntries > 3 allows us to over-allocate stack
     120             :       // resources without any problems.
     121           8 :       return CurrentSubEntries > 3;
     122             :     } else {
     123             :       assert(ST->getWavefrontSize() == 32);
     124             :       // We are being conservative here.  We only require the work-around if
     125             :       // CurrentSubEntries > 7 &&
     126             :       // (CurrentSubEntries % 8 == 7 || CurrentSubEntries % 8 == 0)
     127             :       // See the comment on the wavefront size == 64 case for why we are
     128             :       // being conservative.
     129           2 :       return CurrentSubEntries > 7;
     130             :     }
     131             :   }
     132             : }
     133             : 
     134           0 : unsigned CFStack::getSubEntrySize(CFStack::StackItem Item) {
     135         124 :   switch(Item) {
     136             :   default:
     137             :     return 0;
     138          88 :   case CFStack::FIRST_NON_WQM_PUSH:
     139             :   assert(!ST->hasCaymanISA());
     140          88 :   if (ST->getGeneration() <= AMDGPUSubtarget::R700) {
     141             :     // +1 For the push operation.
     142             :     // +2 Extra space required.
     143           0 :     return 3;
     144             :   } else {
     145             :     // Some documentation says that this is not necessary on Evergreen,
     146             :     // but experimentation has show that we need to allocate 1 extra
     147             :     // sub-entry for the first non-WQM push.
     148             :     // +1 For the push operation.
     149             :     // +1 Extra space required.
     150           0 :     return 2;
     151             :   }
     152           0 :   case CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY:
     153             :     assert(ST->getGeneration() >= AMDGPUSubtarget::EVERGREEN);
     154             :     // +1 For the push operation.
     155             :     // +1 Extra space required.
     156           0 :     return 2;
     157          36 :   case CFStack::SUB_ENTRY:
     158           0 :     return 1;
     159             :   }
     160             : }
     161             : 
     162             : void CFStack::updateMaxStackSize() {
     163             :   unsigned CurrentStackSize =
     164          62 :       CurrentEntries + (alignTo(CurrentSubEntries, 4) / 4);
     165          80 :   MaxStackSize = std::max(CurrentStackSize, MaxStackSize);
     166             : }
     167             : 
     168          62 : void CFStack::pushBranch(unsigned Opcode, bool isWQM) {
     169          62 :   CFStack::StackItem Item = CFStack::ENTRY;
     170          62 :   switch(Opcode) {
     171          62 :   case R600::CF_PUSH_EG:
     172             :   case R600::CF_ALU_PUSH_BEFORE:
     173          62 :     if (!isWQM) {
     174         116 :       if (!ST->hasCaymanISA() &&
     175             :           !branchStackContains(CFStack::FIRST_NON_WQM_PUSH))
     176          44 :         Item = CFStack::FIRST_NON_WQM_PUSH;  // May not be required on Evergreen/NI
     177             :                                              // See comment in
     178             :                                              // CFStack::getSubEntrySize()
     179          31 :       else if (CurrentEntries > 0 &&
     180          13 :                ST->getGeneration() > AMDGPUSubtarget::EVERGREEN &&
     181          18 :                !ST->hasCaymanISA() &&
     182             :                !branchStackContains(CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY))
     183           0 :         Item = CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY;
     184             :       else
     185          18 :         Item = CFStack::SUB_ENTRY;
     186             :     } else
     187             :       Item = CFStack::ENTRY;
     188             :     break;
     189             :   }
     190          62 :   BranchStack.push_back(Item);
     191          62 :   if (Item == CFStack::ENTRY)
     192           0 :     CurrentEntries++;
     193             :   else
     194          62 :     CurrentSubEntries += getSubEntrySize(Item);
     195             :   updateMaxStackSize();
     196          62 : }
     197             : 
     198          18 : void CFStack::pushLoop() {
     199          18 :   LoopStack.push_back(CFStack::ENTRY);
     200          18 :   CurrentEntries++;
     201             :   updateMaxStackSize();
     202          18 : }
     203             : 
     204          62 : void CFStack::popBranch() {
     205          62 :   CFStack::StackItem Top = BranchStack.back();
     206          62 :   if (Top == CFStack::ENTRY)
     207           0 :     CurrentEntries--;
     208             :   else
     209          62 :     CurrentSubEntries-= getSubEntrySize(Top);
     210             :   BranchStack.pop_back();
     211          62 : }
     212             : 
     213             : void CFStack::popLoop() {
     214          18 :   CurrentEntries--;
     215             :   LoopStack.pop_back();
     216             : }
     217             : 
     218             : class R600ControlFlowFinalizer : public MachineFunctionPass {
     219             : private:
     220             :   using ClauseFile = std::pair<MachineInstr *, std::vector<MachineInstr *>>;
     221             : 
     222             :   enum ControlFlowInstruction {
     223             :     CF_TC,
     224             :     CF_VC,
     225             :     CF_CALL_FS,
     226             :     CF_WHILE_LOOP,
     227             :     CF_END_LOOP,
     228             :     CF_LOOP_BREAK,
     229             :     CF_LOOP_CONTINUE,
     230             :     CF_JUMP,
     231             :     CF_ELSE,
     232             :     CF_POP,
     233             :     CF_END
     234             :   };
     235             : 
     236             :   const R600InstrInfo *TII = nullptr;
     237             :   const R600RegisterInfo *TRI = nullptr;
     238             :   unsigned MaxFetchInst;
     239             :   const R600Subtarget *ST = nullptr;
     240             : 
     241           0 :   bool IsTrivialInst(MachineInstr &MI) const {
     242       67184 :     switch (MI.getOpcode()) {
     243             :     case R600::KILL:
     244             :     case R600::RETURN:
     245             :       return true;
     246           0 :     default:
     247           0 :       return false;
     248             :     }
     249             :   }
     250             : 
     251           0 :   const MCInstrDesc &getHWInstrDesc(ControlFlowInstruction CFI) const {
     252             :     unsigned Opcode = 0;
     253        2180 :     bool isEg = (ST->getGeneration() >= AMDGPUSubtarget::EVERGREEN);
     254           0 :     switch (CFI) {
     255           0 :     case CF_TC:
     256           0 :       Opcode = isEg ? R600::CF_TC_EG : R600::CF_TC_R600;
     257             :       break;
     258           0 :     case CF_VC:
     259           0 :       Opcode = isEg ? R600::CF_VC_EG : R600::CF_VC_R600;
     260             :       break;
     261           0 :     case CF_CALL_FS:
     262           0 :       Opcode = isEg ? R600::CF_CALL_FS_EG : R600::CF_CALL_FS_R600;
     263             :       break;
     264           0 :     case CF_WHILE_LOOP:
     265           0 :       Opcode = isEg ? R600::WHILE_LOOP_EG : R600::WHILE_LOOP_R600;
     266             :       break;
     267           0 :     case CF_END_LOOP:
     268           0 :       Opcode = isEg ? R600::END_LOOP_EG : R600::END_LOOP_R600;
     269             :       break;
     270           0 :     case CF_LOOP_BREAK:
     271           0 :       Opcode = isEg ? R600::LOOP_BREAK_EG : R600::LOOP_BREAK_R600;
     272             :       break;
     273           0 :     case CF_LOOP_CONTINUE:
     274           0 :       Opcode = isEg ? R600::CF_CONTINUE_EG : R600::CF_CONTINUE_R600;
     275             :       break;
     276           0 :     case CF_JUMP:
     277           0 :       Opcode = isEg ? R600::CF_JUMP_EG : R600::CF_JUMP_R600;
     278             :       break;
     279           0 :     case CF_ELSE:
     280           0 :       Opcode = isEg ? R600::CF_ELSE_EG : R600::CF_ELSE_R600;
     281             :       break;
     282           0 :     case CF_POP:
     283           0 :       Opcode = isEg ? R600::POP_EG : R600::POP_R600;
     284             :       break;
     285           0 :     case CF_END:
     286           0 :       if (ST->hasCaymanISA()) {
     287             :         Opcode = R600::CF_END_CM;
     288             :         break;
     289             :       }
     290        1704 :       Opcode = isEg ? R600::CF_END_EG : R600::CF_END_R600;
     291             :       break;
     292             :     }
     293             :     assert (Opcode && "No opcode selected");
     294        2180 :     return TII->get(Opcode);
     295             :   }
     296             : 
     297           0 :   bool isCompatibleWithClause(const MachineInstr &MI,
     298             :                               std::set<unsigned> &DstRegs) const {
     299             :     unsigned DstMI, SrcMI;
     300           0 :     for (MachineInstr::const_mop_iterator I = MI.operands_begin(),
     301           0 :                                           E = MI.operands_end();
     302           0 :          I != E; ++I) {
     303             :       const MachineOperand &MO = *I;
     304           0 :       if (!MO.isReg())
     305           0 :         continue;
     306           0 :       if (MO.isDef()) {
     307           0 :         unsigned Reg = MO.getReg();
     308           0 :         if (R600::R600_Reg128RegClass.contains(Reg))
     309           0 :           DstMI = Reg;
     310             :         else
     311           0 :           DstMI = TRI->getMatchingSuperReg(Reg,
     312             :               AMDGPURegisterInfo::getSubRegFromChannel(TRI->getHWRegChan(Reg)),
     313             :               &R600::R600_Reg128RegClass);
     314             :       }
     315           0 :       if (MO.isUse()) {
     316           0 :         unsigned Reg = MO.getReg();
     317           0 :         if (R600::R600_Reg128RegClass.contains(Reg))
     318             :           SrcMI = Reg;
     319             :         else
     320           0 :           SrcMI = TRI->getMatchingSuperReg(Reg,
     321             :               AMDGPURegisterInfo::getSubRegFromChannel(TRI->getHWRegChan(Reg)),
     322             :               &R600::R600_Reg128RegClass);
     323             :       }
     324             :     }
     325           0 :     if ((DstRegs.find(SrcMI) == DstRegs.end())) {
     326             :       DstRegs.insert(DstMI);
     327           0 :       return true;
     328             :     } else
     329           0 :       return false;
     330             :   }
     331             : 
     332             :   ClauseFile
     333        1317 :   MakeFetchClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I)
     334             :       const {
     335        1317 :     MachineBasicBlock::iterator ClauseHead = I;
     336             :     std::vector<MachineInstr *> ClauseContent;
     337             :     unsigned AluInstCount = 0;
     338        1317 :     bool IsTex = TII->usesTextureCache(*ClauseHead);
     339             :     std::set<unsigned> DstRegs;
     340        3300 :     for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) {
     341             :       if (IsTrivialInst(*I))
     342             :         continue;
     343        3300 :       if (AluInstCount >= MaxFetchInst)
     344             :         break;
     345        3300 :       if ((IsTex && !TII->usesTextureCache(*I)) ||
     346           0 :           (!IsTex && !TII->usesVertexCache(*I)))
     347             :         break;
     348        2000 :       if (!isCompatibleWithClause(*I, DstRegs))
     349             :         break;
     350        1983 :       AluInstCount ++;
     351        1983 :       ClauseContent.push_back(&*I);
     352             :     }
     353        1317 :     MachineInstr *MIb = BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead),
     354        1317 :         getHWInstrDesc(IsTex?CF_TC:CF_VC))
     355             :         .addImm(0) // ADDR
     356        1317 :         .addImm(AluInstCount - 1); // COUNT
     357        1317 :     return ClauseFile(MIb, std::move(ClauseContent));
     358             :   }
     359             : 
     360           0 :   void getLiteral(MachineInstr &MI, std::vector<MachineOperand *> &Lits) const {
     361             :     static const unsigned LiteralRegs[] = {
     362             :       R600::ALU_LITERAL_X,
     363             :       R600::ALU_LITERAL_Y,
     364             :       R600::ALU_LITERAL_Z,
     365             :       R600::ALU_LITERAL_W
     366             :     };
     367             :     const SmallVector<std::pair<MachineOperand *, int64_t>, 3> Srcs =
     368           0 :         TII->getSrcs(MI);
     369           0 :     for (const auto &Src:Srcs) {
     370           0 :       if (Src.first->getReg() != R600::ALU_LITERAL_X)
     371           0 :         continue;
     372           0 :       int64_t Imm = Src.second;
     373             :       std::vector<MachineOperand *>::iterator It =
     374             :           llvm::find_if(Lits, [&](MachineOperand *val) {
     375           0 :             return val->isImm() && (val->getImm() == Imm);
     376             :           });
     377             : 
     378             :       // Get corresponding Operand
     379             :       MachineOperand &Operand = MI.getOperand(
     380           0 :           TII->getOperandIdx(MI.getOpcode(), R600::OpName::literal));
     381             : 
     382           0 :       if (It != Lits.end()) {
     383             :         // Reuse existing literal reg
     384           0 :         unsigned Index = It - Lits.begin();
     385           0 :         Src.first->setReg(LiteralRegs[Index]);
     386             :       } else {
     387             :         // Allocate new literal reg
     388             :         assert(Lits.size() < 4 && "Too many literals in Instruction Group");
     389           0 :         Src.first->setReg(LiteralRegs[Lits.size()]);
     390           0 :         Lits.push_back(&Operand);
     391             :       }
     392             :     }
     393           0 :   }
     394             : 
     395             :   MachineBasicBlock::iterator insertLiterals(
     396             :       MachineBasicBlock::iterator InsertPos,
     397             :       const std::vector<unsigned> &Literals) const {
     398             :     MachineBasicBlock *MBB = InsertPos->getParent();
     399             :     for (unsigned i = 0, e = Literals.size(); i < e; i+=2) {
     400             :       unsigned LiteralPair0 = Literals[i];
     401             :       unsigned LiteralPair1 = (i + 1 < e)?Literals[i + 1]:0;
     402             :       InsertPos = BuildMI(MBB, InsertPos->getDebugLoc(),
     403             :           TII->get(R600::LITERALS))
     404             :           .addImm(LiteralPair0)
     405             :           .addImm(LiteralPair1);
     406             :     }
     407             :     return InsertPos;
     408             :   }
     409             : 
     410             :   ClauseFile
     411        3844 :   MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I)
     412             :       const {
     413             :     MachineInstr &ClauseHead = *I;
     414             :     std::vector<MachineInstr *> ClauseContent;
     415             :     I++;
     416       30579 :     for (MachineBasicBlock::instr_iterator E = MBB.instr_end(); I != E;) {
     417             :       if (IsTrivialInst(*I)) {
     418             :         ++I;
     419         287 :         continue;
     420             :       }
     421       30005 :       if (!I->isBundle() && !TII->isALUInstr(I->getOpcode()))
     422             :         break;
     423             :       std::vector<MachineOperand *>Literals;
     424       26448 :       if (I->isBundle()) {
     425             :         MachineInstr &DeleteMI = *I;
     426             :         MachineBasicBlock::instr_iterator BI = I.getInstrIterator();
     427       51221 :         while (++BI != E && BI->isBundledWithPred()) {
     428       37488 :           BI->unbundleFromPred();
     429      766052 :           for (MachineOperand &MO : BI->operands()) {
     430      728564 :             if (MO.isReg() && MO.isInternalRead())
     431             :               MO.setIsInternalRead(false);
     432             :           }
     433       37488 :           getLiteral(*BI, Literals);
     434       37488 :           ClauseContent.push_back(&*BI);
     435             :         }
     436       13733 :         I = BI;
     437       13733 :         DeleteMI.eraseFromParent();
     438             :       } else {
     439       12715 :         getLiteral(*I, Literals);
     440       12715 :         ClauseContent.push_back(&*I);
     441             :         I++;
     442             :       }
     443       66697 :       for (unsigned i = 0, e = Literals.size(); i < e; i += 2) {
     444             :         MachineInstrBuilder MILit = BuildMI(MBB, I, I->getDebugLoc(),
     445       27602 :             TII->get(R600::LITERALS));
     446       41403 :         if (Literals[i]->isImm()) {
     447       13786 :             MILit.addImm(Literals[i]->getImm());
     448             :         } else {
     449             :             MILit.addGlobalAddress(Literals[i]->getGlobal(),
     450          30 :                                    Literals[i]->getOffset());
     451             :         }
     452       13801 :         if (i + 1 < e) {
     453       16257 :           if (Literals[i + 1]->isImm()) {
     454        5419 :             MILit.addImm(Literals[i + 1]->getImm());
     455             :           } else {
     456             :             MILit.addGlobalAddress(Literals[i + 1]->getGlobal(),
     457           0 :                                    Literals[i + 1]->getOffset());
     458             :           }
     459             :         } else
     460             :           MILit.addImm(0);
     461       13801 :         ClauseContent.push_back(MILit);
     462             :       }
     463             :     }
     464             :     assert(ClauseContent.size() < 128 && "ALU clause is too big");
     465        7688 :     ClauseHead.getOperand(7).setImm(ClauseContent.size() - 1);
     466        3844 :     return ClauseFile(&ClauseHead, std::move(ClauseContent));
     467             :   }
     468             : 
     469           0 :   void EmitFetchClause(MachineBasicBlock::iterator InsertPos,
     470             :                        const DebugLoc &DL, ClauseFile &Clause,
     471             :                        unsigned &CfCount) {
     472           0 :     CounterPropagateAddr(*Clause.first, CfCount);
     473           0 :     MachineBasicBlock *BB = Clause.first->getParent();
     474           0 :     BuildMI(BB, DL, TII->get(R600::FETCH_CLAUSE)).addImm(CfCount);
     475           0 :     for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) {
     476           0 :       BB->splice(InsertPos, BB, Clause.second[i]);
     477             :     }
     478           0 :     CfCount += 2 * Clause.second.size();
     479           0 :   }
     480             : 
     481           0 :   void EmitALUClause(MachineBasicBlock::iterator InsertPos, const DebugLoc &DL,
     482             :                      ClauseFile &Clause, unsigned &CfCount) {
     483           0 :     Clause.first->getOperand(0).setImm(0);
     484           0 :     CounterPropagateAddr(*Clause.first, CfCount);
     485           0 :     MachineBasicBlock *BB = Clause.first->getParent();
     486           0 :     BuildMI(BB, DL, TII->get(R600::ALU_CLAUSE)).addImm(CfCount);
     487           0 :     for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) {
     488           0 :       BB->splice(InsertPos, BB, Clause.second[i]);
     489             :     }
     490           0 :     CfCount += Clause.second.size();
     491           0 :   }
     492             : 
     493           0 :   void CounterPropagateAddr(MachineInstr &MI, unsigned Addr) const {
     494           0 :     MI.getOperand(0).setImm(Addr + MI.getOperand(0).getImm());
     495           0 :   }
     496           0 :   void CounterPropagateAddr(const std::set<MachineInstr *> &MIs,
     497             :                             unsigned Addr) const {
     498           0 :     for (MachineInstr *MI : MIs) {
     499             :       CounterPropagateAddr(*MI, Addr);
     500             :     }
     501           0 :   }
     502             : 
     503             : public:
     504             :   static char ID;
     505             : 
     506         282 :   R600ControlFlowFinalizer() : MachineFunctionPass(ID) {}
     507             : 
     508        2297 :   bool runOnMachineFunction(MachineFunction &MF) override {
     509        2297 :     ST = &MF.getSubtarget<R600Subtarget>();
     510        2297 :     MaxFetchInst = ST->getTexVTXClauseSize();
     511        2297 :     TII = ST->getInstrInfo();
     512        2297 :     TRI = ST->getRegisterInfo();
     513             : 
     514        2297 :     R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
     515             : 
     516        6891 :     CFStack CFStack(ST, MF.getFunction().getCallingConv());
     517        4594 :     for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME;
     518             :         ++MB) {
     519             :       MachineBasicBlock &MBB = *MB;
     520        2297 :       unsigned CfCount = 0;
     521        2297 :       std::vector<std::pair<unsigned, std::set<MachineInstr *>>> LoopStack;
     522             :       std::vector<MachineInstr * > IfThenElseStack;
     523        4594 :       if (MF.getFunction().getCallingConv() == CallingConv::AMDGPU_VS) {
     524          15 :         BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()),
     525          30 :             getHWInstrDesc(CF_CALL_FS));
     526          15 :         CfCount++;
     527             :       }
     528        2297 :       std::vector<ClauseFile> FetchClauses, AluClauses;
     529        2297 :       std::vector<MachineInstr *> LastAlu(1);
     530             :       std::vector<MachineInstr *> ToPopAfter;
     531             : 
     532        2297 :       for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
     533       13007 :           I != E;) {
     534       10710 :         if (TII->usesTextureCache(*I) || TII->usesVertexCache(*I)) {
     535             :           LLVM_DEBUG(dbgs() << CfCount << ":"; I->dump(););
     536        1317 :           FetchClauses.push_back(MakeFetchClause(MBB, I));
     537        1317 :           CfCount++;
     538        1317 :           LastAlu.back() = nullptr;
     539             :           continue;
     540             :         }
     541             : 
     542        9393 :         MachineBasicBlock::iterator MI = I;
     543       18786 :         if (MI->getOpcode() != R600::ENDIF)
     544        9331 :           LastAlu.back() = nullptr;
     545        9393 :         if (MI->getOpcode() == R600::CF_ALU)
     546        3782 :           LastAlu.back() = &*MI;
     547             :         I++;
     548             :         bool RequiresWorkAround =
     549        9393 :             CFStack.requiresWorkAroundForInst(MI->getOpcode());
     550        9393 :         switch (MI->getOpcode()) {
     551          62 :         case R600::CF_ALU_PUSH_BEFORE:
     552          62 :           if (RequiresWorkAround) {
     553             :             LLVM_DEBUG(dbgs()
     554             :                        << "Applying bug work-around for ALU_PUSH_BEFORE\n");
     555           1 :             BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(R600::CF_PUSH_EG))
     556           1 :                 .addImm(CfCount + 1)
     557             :                 .addImm(1);
     558           1 :             MI->setDesc(TII->get(R600::CF_ALU));
     559           1 :             CfCount++;
     560           1 :             CFStack.pushBranch(R600::CF_PUSH_EG);
     561             :           } else
     562          61 :             CFStack.pushBranch(R600::CF_ALU_PUSH_BEFORE);
     563             :           LLVM_FALLTHROUGH;
     564             :         case R600::CF_ALU:
     565        3844 :           I = MI;
     566        3844 :           AluClauses.push_back(MakeALUClause(MBB, I));
     567             :           LLVM_DEBUG(dbgs() << CfCount << ":"; MI->dump(););
     568        3844 :           CfCount++;
     569        3844 :           break;
     570          18 :         case R600::WHILELOOP: {
     571          18 :           CFStack.pushLoop();
     572          18 :           MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
     573          36 :               getHWInstrDesc(CF_WHILE_LOOP))
     574          18 :               .addImm(1);
     575             :           std::pair<unsigned, std::set<MachineInstr *>> Pair(CfCount,
     576          18 :               std::set<MachineInstr *>());
     577             :           Pair.second.insert(MIb);
     578             :           LoopStack.push_back(std::move(Pair));
     579          18 :           MI->eraseFromParent();
     580          18 :           CfCount++;
     581             :           break;
     582             :         }
     583             :         case R600::ENDLOOP: {
     584             :           CFStack.popLoop();
     585             :           std::pair<unsigned, std::set<MachineInstr *>> Pair =
     586             :               std::move(LoopStack.back());
     587             :           LoopStack.pop_back();
     588          18 :           CounterPropagateAddr(Pair.second, CfCount);
     589          54 :           BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END_LOOP))
     590          18 :               .addImm(Pair.first + 1);
     591          18 :           MI->eraseFromParent();
     592          18 :           CfCount++;
     593             :           break;
     594             :         }
     595          62 :         case R600::IF_PREDICATE_SET: {
     596          62 :           LastAlu.push_back(nullptr);
     597          62 :           MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
     598         124 :               getHWInstrDesc(CF_JUMP))
     599             :               .addImm(0)
     600          62 :               .addImm(0);
     601          62 :           IfThenElseStack.push_back(MIb);
     602             :           LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
     603          62 :           MI->eraseFromParent();
     604          62 :           CfCount++;
     605             :           break;
     606             :         }
     607             :         case R600::ELSE: {
     608           3 :           MachineInstr * JumpInst = IfThenElseStack.back();
     609             :           IfThenElseStack.pop_back();
     610           3 :           CounterPropagateAddr(*JumpInst, CfCount);
     611           3 :           MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
     612           6 :               getHWInstrDesc(CF_ELSE))
     613             :               .addImm(0)
     614           3 :               .addImm(0);
     615             :           LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
     616           3 :           IfThenElseStack.push_back(MIb);
     617           3 :           MI->eraseFromParent();
     618           3 :           CfCount++;
     619             :           break;
     620             :         }
     621          62 :         case R600::ENDIF: {
     622          62 :           CFStack.popBranch();
     623          62 :           if (LastAlu.back()) {
     624          24 :             ToPopAfter.push_back(LastAlu.back());
     625             :           } else {
     626          38 :             MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
     627          76 :                 getHWInstrDesc(CF_POP))
     628          38 :                 .addImm(CfCount + 1)
     629             :                 .addImm(1);
     630             :             (void)MIb;
     631             :             LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
     632          38 :             CfCount++;
     633             :           }
     634             : 
     635          62 :           MachineInstr *IfOrElseInst = IfThenElseStack.back();
     636             :           IfThenElseStack.pop_back();
     637          62 :           CounterPropagateAddr(*IfOrElseInst, CfCount);
     638             :           IfOrElseInst->getOperand(1).setImm(1);
     639             :           LastAlu.pop_back();
     640          62 :           MI->eraseFromParent();
     641          62 :           break;
     642             :         }
     643          18 :         case R600::BREAK: {
     644          18 :           CfCount ++;
     645          18 :           MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
     646          36 :               getHWInstrDesc(CF_LOOP_BREAK))
     647          18 :               .addImm(0);
     648             :           LoopStack.back().second.insert(MIb);
     649          18 :           MI->eraseFromParent();
     650             :           break;
     651             :         }
     652           0 :         case R600::CONTINUE: {
     653           0 :           MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
     654           0 :               getHWInstrDesc(CF_LOOP_CONTINUE))
     655           0 :               .addImm(0);
     656             :           LoopStack.back().second.insert(MIb);
     657           0 :           MI->eraseFromParent();
     658           0 :           CfCount++;
     659             :           break;
     660             :         }
     661        2008 :         case R600::RETURN: {
     662             :           DebugLoc DL = MBB.findDebugLoc(MI);
     663        3712 :           BuildMI(MBB, MI, DL, getHWInstrDesc(CF_END));
     664        2008 :           CfCount++;
     665        2008 :           if (CfCount % 2) {
     666        3314 :             BuildMI(MBB, I, DL, TII->get(R600::PAD));
     667        1657 :             CfCount++;
     668             :           }
     669        2008 :           MI->eraseFromParent();
     670        5312 :           for (unsigned i = 0, e = FetchClauses.size(); i < e; i++)
     671        2592 :             EmitFetchClause(I, DL, FetchClauses[i], CfCount);
     672        7473 :           for (unsigned i = 0, e = AluClauses.size(); i < e; i++)
     673        6914 :             EmitALUClause(I, DL, AluClauses[i], CfCount);
     674             :           break;
     675             :         }
     676        3360 :         default:
     677        3360 :           if (TII->isExport(MI->getOpcode())) {
     678             :             LLVM_DEBUG(dbgs() << CfCount << ":"; MI->dump(););
     679        3343 :             CfCount++;
     680             :           }
     681             :           break;
     682             :         }
     683             :       }
     684        4618 :       for (unsigned i = 0, e = ToPopAfter.size(); i < e; ++i) {
     685          24 :         MachineInstr *Alu = ToPopAfter[i];
     686          24 :         BuildMI(MBB, Alu, MBB.findDebugLoc((MachineBasicBlock::iterator)Alu),
     687          24 :             TII->get(R600::CF_ALU_POP_AFTER))
     688          24 :             .addImm(Alu->getOperand(0).getImm())
     689          24 :             .addImm(Alu->getOperand(1).getImm())
     690          24 :             .addImm(Alu->getOperand(2).getImm())
     691          24 :             .addImm(Alu->getOperand(3).getImm())
     692          24 :             .addImm(Alu->getOperand(4).getImm())
     693          24 :             .addImm(Alu->getOperand(5).getImm())
     694          24 :             .addImm(Alu->getOperand(6).getImm())
     695          24 :             .addImm(Alu->getOperand(7).getImm())
     696          24 :             .addImm(Alu->getOperand(8).getImm());
     697          24 :         Alu->eraseFromParent();
     698             :       }
     699        2297 :       MFI->CFStackSize = CFStack.MaxStackSize;
     700             :     }
     701             : 
     702        2297 :     return false;
     703             :   }
     704             : 
     705         282 :   StringRef getPassName() const override {
     706         282 :     return "R600 Control Flow Finalizer Pass";
     707             :   }
     708             : };
     709             : 
     710             : } // end anonymous namespace
     711             : 
     712       85105 : INITIALIZE_PASS_BEGIN(R600ControlFlowFinalizer, DEBUG_TYPE,
     713             :                      "R600 Control Flow Finalizer", false, false)
     714      199024 : INITIALIZE_PASS_END(R600ControlFlowFinalizer, DEBUG_TYPE,
     715             :                     "R600 Control Flow Finalizer", false, false)
     716             : 
     717             : char R600ControlFlowFinalizer::ID = 0;
     718             : 
     719             : char &llvm::R600ControlFlowFinalizerID = R600ControlFlowFinalizer::ID;
     720             : 
     721         282 : FunctionPass *llvm::createR600ControlFlowFinalizer() {
     722         282 :   return new R600ControlFlowFinalizer();
     723             : }

Generated by: LCOV version 1.13