LCOV - code coverage report
Current view: top level - lib/Target/AMDGPU - AMDILCFGStructurizer.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 478 581 82.3 %
Date: 2017-09-14 15:23:50 Functions: 52 54 96.3 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===- AMDILCFGStructurizer.cpp - CFG Structurizer ------------------------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //==-----------------------------------------------------------------------===//
       9             : 
      10             : #include "AMDGPU.h"
      11             : #include "AMDGPUSubtarget.h"
      12             : #include "R600InstrInfo.h"
      13             : #include "R600RegisterInfo.h"
      14             : #include "llvm/ADT/DepthFirstIterator.h"
      15             : #include "llvm/ADT/SCCIterator.h"
      16             : #include "llvm/ADT/SmallPtrSet.h"
      17             : #include "llvm/ADT/SmallVector.h"
      18             : #include "llvm/ADT/Statistic.h"
      19             : #include "llvm/ADT/StringRef.h"
      20             : #include "llvm/CodeGen/MachineBasicBlock.h"
      21             : #include "llvm/CodeGen/MachineDominators.h"
      22             : #include "llvm/CodeGen/MachineFunction.h"
      23             : #include "llvm/CodeGen/MachineFunctionPass.h"
      24             : #include "llvm/CodeGen/MachineInstr.h"
      25             : #include "llvm/CodeGen/MachineInstrBuilder.h"
      26             : #include "llvm/CodeGen/MachineJumpTableInfo.h"
      27             : #include "llvm/CodeGen/MachineLoopInfo.h"
      28             : #include "llvm/CodeGen/MachineOperand.h"
      29             : #include "llvm/CodeGen/MachinePostDominators.h"
      30             : #include "llvm/CodeGen/MachineRegisterInfo.h"
      31             : #include "llvm/CodeGen/MachineValueType.h"
      32             : #include "llvm/IR/DebugLoc.h"
      33             : #include "llvm/IR/LLVMContext.h"
      34             : #include "llvm/Pass.h"
      35             : #include "llvm/Support/Debug.h"
      36             : #include "llvm/Support/ErrorHandling.h"
      37             : #include "llvm/Support/raw_ostream.h"
      38             : #include <cassert>
      39             : #include <cstddef>
      40             : #include <deque>
      41             : #include <iterator>
      42             : #include <map>
      43             : #include <utility>
      44             : #include <vector>
      45             : 
      46             : using namespace llvm;
      47             : 
      48             : #define DEBUG_TYPE "structcfg"
      49             : 
      50             : #define DEFAULT_VEC_SLOTS 8
      51             : 
      52             : // TODO: move-begin.
      53             : 
      54             : //===----------------------------------------------------------------------===//
      55             : //
      56             : // Statistics for CFGStructurizer.
      57             : //
      58             : //===----------------------------------------------------------------------===//
      59             : 
      60             : STATISTIC(numSerialPatternMatch,    "CFGStructurizer number of serial pattern "
      61             :     "matched");
      62             : STATISTIC(numIfPatternMatch,        "CFGStructurizer number of if pattern "
      63             :     "matched");
      64             : STATISTIC(numClonedBlock,           "CFGStructurizer cloned blocks");
      65             : STATISTIC(numClonedInstr,           "CFGStructurizer cloned instructions");
      66             : 
      67             : namespace llvm {
      68             : 
      69             : void initializeAMDGPUCFGStructurizerPass(PassRegistry &);
      70             : 
      71             : } // end namespace llvm
      72             : 
      73             : namespace {
      74             : 
      75             : //===----------------------------------------------------------------------===//
      76             : //
      77             : // Miscellaneous utility for CFGStructurizer.
      78             : //
      79             : //===----------------------------------------------------------------------===//
      80             : 
      81             : #define SHOWNEWINSTR(i) \
      82             :   DEBUG(dbgs() << "New instr: " << *i << "\n");
      83             : 
      84             : #define SHOWNEWBLK(b, msg) \
      85             : DEBUG( \
      86             :   dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
      87             :   dbgs() << "\n"; \
      88             : );
      89             : 
      90             : #define SHOWBLK_DETAIL(b, msg) \
      91             : DEBUG( \
      92             :   if (b) { \
      93             :   dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
      94             :   b->print(dbgs()); \
      95             :   dbgs() << "\n"; \
      96             :   } \
      97             : );
      98             : 
      99             : #define INVALIDSCCNUM -1
     100             : 
     101             : //===----------------------------------------------------------------------===//
     102             : //
     103             : // supporting data structure for CFGStructurizer
     104             : //
     105             : //===----------------------------------------------------------------------===//
     106             : 
     107             : class BlockInformation {
     108             : public:
     109             :   bool IsRetired = false;
     110             :   int SccNum = INVALIDSCCNUM;
     111             : 
     112        2179 :   BlockInformation() = default;
     113             : };
     114             : 
     115             : //===----------------------------------------------------------------------===//
     116             : //
     117             : // CFGStructurizer
     118             : //
     119             : //===----------------------------------------------------------------------===//
     120             : 
     121        1458 : class AMDGPUCFGStructurizer : public MachineFunctionPass {
     122             : public:
     123             :   using MBBVector = SmallVector<MachineBasicBlock *, 32>;
     124             :   using MBBInfoMap = std::map<MachineBasicBlock *, BlockInformation *>;
     125             :   using LoopLandInfoMap = std::map<MachineLoop *, MachineBasicBlock *>;
     126             : 
     127             :   enum PathToKind {
     128             :     Not_SinglePath = 0,
     129             :     SinglePath_InPath = 1,
     130             :     SinglePath_NotInPath = 2
     131             :   };
     132             : 
     133             :   static char ID;
     134             : 
     135        1220 :   AMDGPUCFGStructurizer() : MachineFunctionPass(ID) {
     136         244 :     initializeAMDGPUCFGStructurizerPass(*PassRegistry::getPassRegistry());
     137         244 :   }
     138             : 
     139         244 :   StringRef getPassName() const override {
     140         244 :     return "AMDGPU Control Flow Graph structurizer Pass";
     141             :   }
     142             : 
     143         244 :   void getAnalysisUsage(AnalysisUsage &AU) const override {
     144         244 :     AU.addRequired<MachineDominatorTree>();
     145         244 :     AU.addRequired<MachinePostDominatorTree>();
     146         244 :     AU.addRequired<MachineLoopInfo>();
     147         244 :     MachineFunctionPass::getAnalysisUsage(AU);
     148         244 :   }
     149             : 
     150             :   /// Perform the CFG structurization
     151             :   bool run();
     152             : 
     153             :   /// Perform the CFG preparation
     154             :   /// This step will remove every unconditionnal/dead jump instructions and make
     155             :   /// sure all loops have an exit block
     156             :   bool prepare();
     157             : 
     158        2057 :   bool runOnMachineFunction(MachineFunction &MF) override {
     159        4114 :     TII = MF.getSubtarget<R600Subtarget>().getInstrInfo();
     160        4114 :     TRI = &TII->getRegisterInfo();
     161             :     DEBUG(MF.dump(););
     162        4114 :     OrderedBlks.clear();
     163        4114 :     Visited.clear();
     164        2057 :     FuncRep = &MF;
     165        2057 :     MLI = &getAnalysis<MachineLoopInfo>();
     166             :     DEBUG(dbgs() << "LoopInfo:\n"; PrintLoopinfo(*MLI););
     167        2057 :     MDT = &getAnalysis<MachineDominatorTree>();
     168             :     DEBUG(MDT->print(dbgs(), (const Module*)nullptr););
     169        2057 :     PDT = &getAnalysis<MachinePostDominatorTree>();
     170             :     DEBUG(PDT->print(dbgs()););
     171        2057 :     prepare();
     172        2057 :     run();
     173             :     DEBUG(MF.dump(););
     174        2057 :     return true;
     175             :   }
     176             : 
     177             : protected:
     178             :   MachineDominatorTree *MDT;
     179             :   MachinePostDominatorTree *PDT;
     180             :   MachineLoopInfo *MLI;
     181             :   const R600InstrInfo *TII = nullptr;
     182             :   const R600RegisterInfo *TRI = nullptr;
     183             : 
     184             :   // PRINT FUNCTIONS
     185             :   /// Print the ordered Blocks.
     186             :   void printOrderedBlocks() const {
     187             :     size_t i = 0;
     188             :     for (MBBVector::const_iterator iterBlk = OrderedBlks.begin(),
     189             :         iterBlkEnd = OrderedBlks.end(); iterBlk != iterBlkEnd; ++iterBlk, ++i) {
     190             :       dbgs() << "BB" << (*iterBlk)->getNumber();
     191             :       dbgs() << "(" << getSCCNum(*iterBlk) << "," << (*iterBlk)->size() << ")";
     192             :       if (i != 0 && i % 10 == 0) {
     193             :         dbgs() << "\n";
     194             :       } else {
     195             :         dbgs() << " ";
     196             :       }
     197             :     }
     198             :   }
     199             : 
     200             :   static void PrintLoopinfo(const MachineLoopInfo &LoopInfo) {
     201             :     for (MachineLoop::iterator iter = LoopInfo.begin(),
     202             :          iterEnd = LoopInfo.end(); iter != iterEnd; ++iter) {
     203             :       (*iter)->print(dbgs(), 0);
     204             :     }
     205             :   }
     206             : 
     207             :   // UTILITY FUNCTIONS
     208             :   int getSCCNum(MachineBasicBlock *MBB) const;
     209             :   MachineBasicBlock *getLoopLandInfo(MachineLoop *LoopRep) const;
     210             :   bool hasBackEdge(MachineBasicBlock *MBB) const;
     211             :   bool isRetiredBlock(MachineBasicBlock *MBB) const;
     212             :   bool isActiveLoophead(MachineBasicBlock *MBB) const;
     213             :   PathToKind singlePathTo(MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB,
     214             :       bool AllowSideEntry = true) const;
     215             :   int countActiveBlock(MBBVector::const_iterator It,
     216             :       MBBVector::const_iterator E) const;
     217             :   bool needMigrateBlock(MachineBasicBlock *MBB) const;
     218             : 
     219             :   // Utility Functions
     220             :   void reversePredicateSetter(MachineBasicBlock::iterator I,
     221             :                               MachineBasicBlock &MBB);
     222             :   /// Compute the reversed DFS post order of Blocks
     223             :   void orderBlocks(MachineFunction *MF);
     224             : 
     225             :   // Function originally from CFGStructTraits
     226             :   void insertInstrEnd(MachineBasicBlock *MBB, int NewOpcode,
     227             :                       const DebugLoc &DL = DebugLoc());
     228             :   MachineInstr *insertInstrBefore(MachineBasicBlock *MBB, int NewOpcode,
     229             :                                   const DebugLoc &DL = DebugLoc());
     230             :   MachineInstr *insertInstrBefore(MachineBasicBlock::iterator I, int NewOpcode);
     231             :   void insertCondBranchBefore(MachineBasicBlock::iterator I, int NewOpcode,
     232             :                               const DebugLoc &DL);
     233             :   void insertCondBranchBefore(MachineBasicBlock *MBB,
     234             :                               MachineBasicBlock::iterator I, int NewOpcode,
     235             :                               int RegNum, const DebugLoc &DL);
     236             : 
     237             :   static int getBranchNzeroOpcode(int OldOpcode);
     238             :   static int getBranchZeroOpcode(int OldOpcode);
     239             :   static int getContinueNzeroOpcode(int OldOpcode);
     240             :   static int getContinueZeroOpcode(int OldOpcode);
     241             :   static MachineBasicBlock *getTrueBranch(MachineInstr *MI);
     242             :   static void setTrueBranch(MachineInstr *MI, MachineBasicBlock *MBB);
     243             :   static MachineBasicBlock *getFalseBranch(MachineBasicBlock *MBB,
     244             :       MachineInstr *MI);
     245             :   static bool isCondBranch(MachineInstr *MI);
     246             :   static bool isUncondBranch(MachineInstr *MI);
     247             :   static DebugLoc getLastDebugLocInBB(MachineBasicBlock *MBB);
     248             :   static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *MBB);
     249             : 
     250             :   /// The correct naming for this is getPossibleLoopendBlockBranchInstr.
     251             :   ///
     252             :   /// BB with backward-edge could have move instructions after the branch
     253             :   /// instruction.  Such move instruction "belong to" the loop backward-edge.
     254             :   MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *MBB);
     255             : 
     256             :   static MachineInstr *getReturnInstr(MachineBasicBlock *MBB);
     257             :   static bool isReturnBlock(MachineBasicBlock *MBB);
     258             :   static void cloneSuccessorList(MachineBasicBlock *DstMBB,
     259             :       MachineBasicBlock *SrcMBB);
     260             :   static MachineBasicBlock *clone(MachineBasicBlock *MBB);
     261             : 
     262             :   /// MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose
     263             :   /// because the AMDGPU instruction is not recognized as terminator fix this
     264             :   /// and retire this routine
     265             :   void replaceInstrUseOfBlockWith(MachineBasicBlock *SrcMBB,
     266             :       MachineBasicBlock *OldMBB, MachineBasicBlock *NewBlk);
     267             : 
     268             :   static void wrapup(MachineBasicBlock *MBB);
     269             : 
     270             :   int patternMatch(MachineBasicBlock *MBB);
     271             :   int patternMatchGroup(MachineBasicBlock *MBB);
     272             :   int serialPatternMatch(MachineBasicBlock *MBB);
     273             :   int ifPatternMatch(MachineBasicBlock *MBB);
     274             :   int loopendPatternMatch();
     275             :   int mergeLoop(MachineLoop *LoopRep);
     276             : 
     277             :   /// return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in
     278             :   /// the same loop with LoopLandInfo without explicitly keeping track of
     279             :   /// loopContBlks and loopBreakBlks, this is a method to get the information.
     280             :   bool isSameloopDetachedContbreak(MachineBasicBlock *Src1MBB,
     281             :       MachineBasicBlock *Src2MBB);
     282             :   int handleJumpintoIf(MachineBasicBlock *HeadMBB,
     283             :       MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB);
     284             :   int handleJumpintoIfImp(MachineBasicBlock *HeadMBB,
     285             :       MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB);
     286             :   int improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
     287             :       MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB,
     288             :       MachineBasicBlock **LandMBBPtr);
     289             :   void showImproveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
     290             :       MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB,
     291             :       MachineBasicBlock *LandMBB, bool Detail = false);
     292             :   int cloneOnSideEntryTo(MachineBasicBlock *PreMBB,
     293             :       MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB);
     294             :   void mergeSerialBlock(MachineBasicBlock *DstMBB,
     295             :       MachineBasicBlock *SrcMBB);
     296             : 
     297             :   void mergeIfthenelseBlock(MachineInstr *BranchMI,
     298             :       MachineBasicBlock *MBB, MachineBasicBlock *TrueMBB,
     299             :       MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB);
     300             :   void mergeLooplandBlock(MachineBasicBlock *DstMBB,
     301             :       MachineBasicBlock *LandMBB);
     302             :   void mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB,
     303             :       MachineBasicBlock *LandMBB);
     304             :   void settleLoopcontBlock(MachineBasicBlock *ContingMBB,
     305             :       MachineBasicBlock *ContMBB);
     306             : 
     307             :   /// normalizeInfiniteLoopExit change
     308             :   ///   B1:
     309             :   ///        uncond_br LoopHeader
     310             :   ///
     311             :   /// to
     312             :   ///   B1:
     313             :   ///        cond_br 1 LoopHeader dummyExit
     314             :   /// and return the newly added dummy exit block
     315             :   MachineBasicBlock *normalizeInfiniteLoopExit(MachineLoop *LoopRep);
     316             :   void removeUnconditionalBranch(MachineBasicBlock *MBB);
     317             : 
     318             :   /// Remove duplicate branches instructions in a block.
     319             :   /// For instance
     320             :   /// B0:
     321             :   ///    cond_br X B1 B2
     322             :   ///    cond_br X B1 B2
     323             :   /// is transformed to
     324             :   /// B0:
     325             :   ///    cond_br X B1 B2
     326             :   void removeRedundantConditionalBranch(MachineBasicBlock *MBB);
     327             : 
     328             :   void addDummyExitBlock(SmallVectorImpl<MachineBasicBlock *> &RetMBB);
     329             :   void removeSuccessor(MachineBasicBlock *MBB);
     330             :   MachineBasicBlock *cloneBlockForPredecessor(MachineBasicBlock *MBB,
     331             :       MachineBasicBlock *PredMBB);
     332             :   void migrateInstruction(MachineBasicBlock *SrcMBB,
     333             :       MachineBasicBlock *DstMBB, MachineBasicBlock::iterator I);
     334             :   void recordSccnum(MachineBasicBlock *MBB, int SCCNum);
     335             :   void retireBlock(MachineBasicBlock *MBB);
     336             : 
     337             : private:
     338             :   MBBInfoMap BlockInfoMap;
     339             :   LoopLandInfoMap LLInfoMap;
     340             :   std::map<MachineLoop *, bool> Visited;
     341             :   MachineFunction *FuncRep;
     342             :   SmallVector<MachineBasicBlock *, DEFAULT_VEC_SLOTS> OrderedBlks;
     343             : };
     344             : 
     345             : } // end anonymous namespace
     346             : 
     347             : char AMDGPUCFGStructurizer::ID = 0;
     348             : 
     349             : int AMDGPUCFGStructurizer::getSCCNum(MachineBasicBlock *MBB) const {
     350        4816 :   MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB);
     351        4816 :   if (It == BlockInfoMap.end())
     352             :     return INVALIDSCCNUM;
     353        2408 :   return (*It).second->SccNum;
     354             : }
     355             : 
     356             : MachineBasicBlock *AMDGPUCFGStructurizer::getLoopLandInfo(MachineLoop *LoopRep)
     357             :     const {
     358           0 :   LoopLandInfoMap::const_iterator It = LLInfoMap.find(LoopRep);
     359           0 :   if (It == LLInfoMap.end())
     360             :     return nullptr;
     361           0 :   return (*It).second;
     362             : }
     363             : 
     364          46 : bool AMDGPUCFGStructurizer::hasBackEdge(MachineBasicBlock *MBB) const {
     365          92 :   MachineLoop *LoopRep = MLI->getLoopFor(MBB);
     366          11 :   if (!LoopRep)
     367             :     return false;
     368          22 :   MachineBasicBlock *LoopHeader = LoopRep->getHeader();
     369          11 :   return MBB->isSuccessor(LoopHeader);
     370             : }
     371             : 
     372             : bool AMDGPUCFGStructurizer::isRetiredBlock(MachineBasicBlock *MBB) const {
     373       13044 :   MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB);
     374       13044 :   if (It == BlockInfoMap.end())
     375             :     return false;
     376        6522 :   return (*It).second->IsRetired;
     377             : }
     378             : 
     379          74 : bool AMDGPUCFGStructurizer::isActiveLoophead(MachineBasicBlock *MBB) const {
     380         148 :   MachineLoop *LoopRep = MLI->getLoopFor(MBB);
     381          87 :   while (LoopRep && LoopRep->getHeader() == MBB) {
     382           0 :     MachineBasicBlock *LoopLand = getLoopLandInfo(LoopRep);
     383           0 :     if(!LoopLand)
     384             :       return true;
     385           0 :     if (!isRetiredBlock(LoopLand))
     386             :       return true;
     387           0 :     LoopRep = LoopRep->getParentLoop();
     388             :   }
     389             :   return false;
     390             : }
     391             : 
     392           4 : AMDGPUCFGStructurizer::PathToKind AMDGPUCFGStructurizer::singlePathTo(
     393             :     MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB,
     394             :     bool AllowSideEntry) const {
     395             :   assert(DstMBB);
     396           4 :   if (SrcMBB == DstMBB)
     397             :     return SinglePath_InPath;
     398          20 :   while (SrcMBB && SrcMBB->succ_size() == 1) {
     399          10 :     SrcMBB = *SrcMBB->succ_begin();
     400          10 :     if (SrcMBB == DstMBB)
     401             :       return SinglePath_InPath;
     402           6 :     if (!AllowSideEntry && SrcMBB->pred_size() > 1)
     403             :       return Not_SinglePath;
     404             :   }
     405           0 :   if (SrcMBB && SrcMBB->succ_size()==0)
     406             :     return SinglePath_NotInPath;
     407             :   return Not_SinglePath;
     408             : }
     409             : 
     410        4211 : int AMDGPUCFGStructurizer::countActiveBlock(MBBVector::const_iterator It,
     411             :     MBBVector::const_iterator E) const {
     412        4211 :   int Count = 0;
     413       12907 :   while (It != E) {
     414        8696 :     if (!isRetiredBlock(*It))
     415        4328 :       ++Count;
     416        4348 :     ++It;
     417             :   }
     418        4211 :   return Count;
     419             : }
     420             : 
     421           2 : bool AMDGPUCFGStructurizer::needMigrateBlock(MachineBasicBlock *MBB) const {
     422           2 :   unsigned BlockSizeThreshold = 30;
     423           2 :   unsigned CloneInstrThreshold = 100;
     424           4 :   bool MultiplePreds = MBB && (MBB->pred_size() > 1);
     425             : 
     426             :   if(!MultiplePreds)
     427             :     return false;
     428           1 :   unsigned BlkSize = MBB->size();
     429           2 :   return ((BlkSize > BlockSizeThreshold) &&
     430           1 :       (BlkSize * (MBB->pred_size() - 1) > CloneInstrThreshold));
     431             : }
     432             : 
     433          52 : void AMDGPUCFGStructurizer::reversePredicateSetter(
     434             :     MachineBasicBlock::iterator I, MachineBasicBlock &MBB) {
     435             :   assert(I.isValid() && "Expected valid iterator");
     436             :   for (;; --I) {
     437         304 :     if (I == MBB.end())
     438             :       continue;
     439         232 :     if (I->getOpcode() == AMDGPU::PRED_X) {
     440          52 :       switch (I->getOperand(2).getImm()) {
     441          14 :       case AMDGPU::PRED_SETE_INT:
     442          14 :         I->getOperand(2).setImm(AMDGPU::PRED_SETNE_INT);
     443             :         return;
     444          38 :       case AMDGPU::PRED_SETNE_INT:
     445          38 :         I->getOperand(2).setImm(AMDGPU::PRED_SETE_INT);
     446             :         return;
     447           0 :       case AMDGPU::PRED_SETE:
     448           0 :         I->getOperand(2).setImm(AMDGPU::PRED_SETNE);
     449             :         return;
     450           0 :       case AMDGPU::PRED_SETNE:
     451           0 :         I->getOperand(2).setImm(AMDGPU::PRED_SETE);
     452             :         return;
     453           0 :       default:
     454           0 :         llvm_unreachable("PRED_X Opcode invalid!");
     455             :       }
     456             :     }
     457             :   }
     458             : }
     459             : 
     460          36 : void AMDGPUCFGStructurizer::insertInstrEnd(MachineBasicBlock *MBB,
     461             :                                            int NewOpcode, const DebugLoc &DL) {
     462             :   MachineInstr *MI =
     463          72 :       MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DL);
     464          36 :   MBB->push_back(MI);
     465             :   //assume the instruction doesn't take any reg operand ...
     466             :   SHOWNEWINSTR(MI);
     467          36 : }
     468             : 
     469          16 : MachineInstr *AMDGPUCFGStructurizer::insertInstrBefore(MachineBasicBlock *MBB,
     470             :                                                        int NewOpcode,
     471             :                                                        const DebugLoc &DL) {
     472             :   MachineInstr *MI =
     473          32 :       MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DL);
     474          48 :   if (MBB->begin() != MBB->end())
     475          16 :     MBB->insert(MBB->begin(), MI);
     476             :   else
     477             :     MBB->push_back(MI);
     478             :   SHOWNEWINSTR(MI);
     479          16 :   return MI;
     480             : }
     481             : 
     482          80 : MachineInstr *AMDGPUCFGStructurizer::insertInstrBefore(
     483             :     MachineBasicBlock::iterator I, int NewOpcode) {
     484          80 :   MachineInstr *OldMI = &(*I);
     485          80 :   MachineBasicBlock *MBB = OldMI->getParent();
     486             :   MachineInstr *NewMBB =
     487         320 :       MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DebugLoc());
     488          80 :   MBB->insert(I, NewMBB);
     489             :   //assume the instruction doesn't take any reg operand ...
     490             :   SHOWNEWINSTR(NewMBB);
     491          80 :   return NewMBB;
     492             : }
     493             : 
     494          42 : void AMDGPUCFGStructurizer::insertCondBranchBefore(
     495             :     MachineBasicBlock::iterator I, int NewOpcode, const DebugLoc &DL) {
     496          42 :   MachineInstr *OldMI = &(*I);
     497          42 :   MachineBasicBlock *MBB = OldMI->getParent();
     498          42 :   MachineFunction *MF = MBB->getParent();
     499          84 :   MachineInstr *NewMI = MF->CreateMachineInstr(TII->get(NewOpcode), DL);
     500          42 :   MBB->insert(I, NewMI);
     501          42 :   MachineInstrBuilder MIB(*MF, NewMI);
     502          42 :   MIB.addReg(OldMI->getOperand(1).getReg(), false);
     503             :   SHOWNEWINSTR(NewMI);
     504             :   //erase later oldInstr->eraseFromParent();
     505          42 : }
     506             : 
     507          16 : void AMDGPUCFGStructurizer::insertCondBranchBefore(
     508             :     MachineBasicBlock *blk, MachineBasicBlock::iterator I, int NewOpcode,
     509             :     int RegNum, const DebugLoc &DL) {
     510          16 :   MachineFunction *MF = blk->getParent();
     511          32 :   MachineInstr *NewInstr = MF->CreateMachineInstr(TII->get(NewOpcode), DL);
     512             :   //insert before
     513          16 :   blk->insert(I, NewInstr);
     514          16 :   MachineInstrBuilder(*MF, NewInstr).addReg(RegNum, false);
     515             :   SHOWNEWINSTR(NewInstr);
     516          16 : }
     517             : 
     518             : int AMDGPUCFGStructurizer::getBranchNzeroOpcode(int OldOpcode) {
     519          42 :   switch(OldOpcode) {
     520             :   case AMDGPU::JUMP_COND:
     521             :   case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
     522           0 :   case AMDGPU::BRANCH_COND_i32:
     523             :   case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALNZ_f32;
     524           0 :   default: llvm_unreachable("internal error");
     525             :   }
     526             :   return -1;
     527             : }
     528             : 
     529             : int AMDGPUCFGStructurizer::getBranchZeroOpcode(int OldOpcode) {
     530           0 :   switch(OldOpcode) {
     531             :   case AMDGPU::JUMP_COND:
     532             :   case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
     533           0 :   case AMDGPU::BRANCH_COND_i32:
     534             :   case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALZ_f32;
     535           0 :   default: llvm_unreachable("internal error");
     536             :   }
     537             :   return -1;
     538             : }
     539             : 
     540             : int AMDGPUCFGStructurizer::getContinueNzeroOpcode(int OldOpcode) {
     541           0 :   switch(OldOpcode) {
     542             :   case AMDGPU::JUMP_COND:
     543             :   case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALNZ_i32;
     544           0 :   default: llvm_unreachable("internal error");
     545             :   }
     546             :   return -1;
     547             : }
     548             : 
     549             : int AMDGPUCFGStructurizer::getContinueZeroOpcode(int OldOpcode) {
     550           0 :   switch(OldOpcode) {
     551             :   case AMDGPU::JUMP_COND:
     552             :   case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALZ_i32;
     553           0 :   default: llvm_unreachable("internal error");
     554             :   }
     555             :   return -1;
     556             : }
     557             : 
     558             : MachineBasicBlock *AMDGPUCFGStructurizer::getTrueBranch(MachineInstr *MI) {
     559         109 :   return MI->getOperand(0).getMBB();
     560             : }
     561             : 
     562             : void AMDGPUCFGStructurizer::setTrueBranch(MachineInstr *MI,
     563             :     MachineBasicBlock *MBB) {
     564           2 :   MI->getOperand(0).setMBB(MBB);
     565             : }
     566             : 
     567             : MachineBasicBlock *
     568             : AMDGPUCFGStructurizer::getFalseBranch(MachineBasicBlock *MBB,
     569             :     MachineInstr *MI) {
     570             :   assert(MBB->succ_size() == 2);
     571          46 :   MachineBasicBlock *TrueBranch = getTrueBranch(MI);
     572          46 :   MachineBasicBlock::succ_iterator It = MBB->succ_begin();
     573          46 :   MachineBasicBlock::succ_iterator Next = It;
     574          46 :   ++Next;
     575          46 :   return (*It == TrueBranch) ? *Next : *It;
     576             : }
     577             : 
     578             : bool AMDGPUCFGStructurizer::isCondBranch(MachineInstr *MI) {
     579        4632 :   switch (MI->getOpcode()) {
     580             :     case AMDGPU::JUMP_COND:
     581             :     case AMDGPU::BRANCH_COND_i32:
     582             :     case AMDGPU::BRANCH_COND_f32: return true;
     583             :   default:
     584             :     return false;
     585             :   }
     586             :   return false;
     587             : }
     588             : 
     589             : bool AMDGPUCFGStructurizer::isUncondBranch(MachineInstr *MI) {
     590        2312 :   switch (MI->getOpcode()) {
     591             :   case AMDGPU::JUMP:
     592             :   case AMDGPU::BRANCH:
     593             :     return true;
     594             :   default:
     595             :     return false;
     596             :   }
     597             :   return false;
     598             : }
     599             : 
     600          16 : DebugLoc AMDGPUCFGStructurizer::getLastDebugLocInBB(MachineBasicBlock *MBB) {
     601             :   //get DebugLoc from the first MachineBasicBlock instruction with debug info
     602          16 :   DebugLoc DL;
     603         470 :   for (MachineBasicBlock::iterator It = MBB->begin(); It != MBB->end();
     604             :       ++It) {
     605         211 :     MachineInstr *instr = &(*It);
     606         422 :     if (instr->getDebugLoc())
     607           0 :       DL = instr->getDebugLoc();
     608             :   }
     609          16 :   return DL;
     610             : }
     611             : 
     612          46 : MachineInstr *AMDGPUCFGStructurizer::getNormalBlockBranchInstr(
     613             :     MachineBasicBlock *MBB) {
     614          46 :   MachineBasicBlock::reverse_iterator It = MBB->rbegin();
     615          46 :   MachineInstr *MI = &*It;
     616          46 :   if (MI && (isCondBranch(MI) || isUncondBranch(MI)))
     617             :     return MI;
     618             :   return nullptr;
     619             : }
     620             : 
     621        2208 : MachineInstr *AMDGPUCFGStructurizer::getLoopendBlockBranchInstr(
     622             :     MachineBasicBlock *MBB) {
     623        4416 :   for (MachineBasicBlock::reverse_iterator It = MBB->rbegin(), E = MBB->rend();
     624        2272 :       It != E; ++It) {
     625             :     // FIXME: Simplify
     626        2269 :     MachineInstr *MI = &*It;
     627        2269 :     if (MI) {
     628        4387 :       if (isCondBranch(MI) || isUncondBranch(MI))
     629          76 :         return MI;
     630        2193 :       else if (!TII->isMov(MI->getOpcode()))
     631             :         break;
     632             :     }
     633             :   }
     634             :   return nullptr;
     635             : }
     636             : 
     637        2182 : MachineInstr *AMDGPUCFGStructurizer::getReturnInstr(MachineBasicBlock *MBB) {
     638        2182 :   MachineBasicBlock::reverse_iterator It = MBB->rbegin();
     639        4364 :   if (It != MBB->rend()) {
     640        2178 :     MachineInstr *instr = &(*It);
     641        2178 :     if (instr->getOpcode() == AMDGPU::RETURN)
     642             :       return instr;
     643             :   }
     644             :   return nullptr;
     645             : }
     646             : 
     647             : bool AMDGPUCFGStructurizer::isReturnBlock(MachineBasicBlock *MBB) {
     648        2174 :   MachineInstr *MI = getReturnInstr(MBB);
     649        2174 :   bool IsReturn = (MBB->succ_size() == 0);
     650             :   if (MI)
     651             :     assert(IsReturn);
     652             :   else if (IsReturn)
     653             :     DEBUG(
     654             :       dbgs() << "BB" << MBB->getNumber()
     655             :              <<" is return block without RETURN instr\n";);
     656             :   return  IsReturn;
     657             : }
     658             : 
     659             : void AMDGPUCFGStructurizer::cloneSuccessorList(MachineBasicBlock *DstMBB,
     660             :     MachineBasicBlock *SrcMBB) {
     661          75 :   for (MachineBasicBlock::succ_iterator It = SrcMBB->succ_begin(),
     662         170 :        iterEnd = SrcMBB->succ_end(); It != iterEnd; ++It)
     663          20 :     DstMBB->addSuccessor(*It);  // *iter's predecessor is also taken care of
     664             : }
     665             : 
     666           1 : MachineBasicBlock *AMDGPUCFGStructurizer::clone(MachineBasicBlock *MBB) {
     667           1 :   MachineFunction *Func = MBB->getParent();
     668           1 :   MachineBasicBlock *NewMBB = Func->CreateMachineBasicBlock();
     669           1 :   Func->push_back(NewMBB);  //insert to function
     670         264 :   for (const MachineInstr &It : *MBB)
     671         260 :     NewMBB->push_back(Func->CloneMachineInstr(&It));
     672           1 :   return NewMBB;
     673             : }
     674             : 
     675           1 : void AMDGPUCFGStructurizer::replaceInstrUseOfBlockWith(
     676             :     MachineBasicBlock *SrcMBB, MachineBasicBlock *OldMBB,
     677             :     MachineBasicBlock *NewBlk) {
     678           1 :   MachineInstr *BranchMI = getLoopendBlockBranchInstr(SrcMBB);
     679           3 :   if (BranchMI && isCondBranch(BranchMI) &&
     680           1 :       getTrueBranch(BranchMI) == OldMBB)
     681             :     setTrueBranch(BranchMI, NewBlk);
     682           1 : }
     683             : 
     684        2057 : void AMDGPUCFGStructurizer::wrapup(MachineBasicBlock *MBB) {
     685             :   assert((!MBB->getParent()->getJumpTableInfo()
     686             :           || MBB->getParent()->getJumpTableInfo()->isEmpty())
     687             :          && "found a jump table");
     688             : 
     689             :    //collect continue right before endloop
     690        4114 :    SmallVector<MachineInstr *, DEFAULT_VEC_SLOTS> ContInstr;
     691        2057 :    MachineBasicBlock::iterator Pre = MBB->begin();
     692        2057 :    MachineBasicBlock::iterator E = MBB->end();
     693        2057 :    MachineBasicBlock::iterator It = Pre;
     694       59696 :    while (It != E) {
     695       57639 :      if (Pre->getOpcode() == AMDGPU::CONTINUE
     696       57671 :          && It->getOpcode() == AMDGPU::ENDLOOP)
     697          16 :        ContInstr.push_back(&*Pre);
     698       57639 :      Pre = It;
     699             :      ++It;
     700             :    }
     701             : 
     702             :    //delete continue right before endloop
     703        4162 :    for (unsigned i = 0; i < ContInstr.size(); ++i)
     704          32 :       ContInstr[i]->eraseFromParent();
     705             : 
     706             :    // TODO to fix up jump table so later phase won't be confused.  if
     707             :    // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but
     708             :    // there isn't such an interface yet.  alternatively, replace all the other
     709             :    // blocks in the jump table with the entryBlk //}
     710        2057 : }
     711             : 
     712        2057 : bool AMDGPUCFGStructurizer::prepare() {
     713        2057 :   bool Changed = false;
     714             : 
     715             :   //FIXME: if not reducible flow graph, make it so ???
     716             : 
     717             :   DEBUG(dbgs() << "AMDGPUCFGStructurizer::prepare\n";);
     718             : 
     719        2057 :   orderBlocks(FuncRep);
     720             : 
     721        4114 :   SmallVector<MachineBasicBlock *, DEFAULT_VEC_SLOTS> RetBlks;
     722             : 
     723             :   // Add an ExitBlk to loop that don't have one
     724        4114 :   for (MachineLoopInfo::iterator It = MLI->begin(),
     725        6185 :        E = MLI->end(); It != E; ++It) {
     726          14 :     MachineLoop *LoopRep = (*It);
     727          28 :     MBBVector ExitingMBBs;
     728          14 :     LoopRep->getExitingBlocks(ExitingMBBs);
     729             : 
     730          14 :     if (ExitingMBBs.size() == 0) {
     731           0 :       MachineBasicBlock* DummyExitBlk = normalizeInfiniteLoopExit(LoopRep);
     732           0 :       if (DummyExitBlk)
     733           0 :         RetBlks.push_back(DummyExitBlk);
     734             :     }
     735             :   }
     736             : 
     737             :   // Remove unconditional branch instr.
     738             :   // Add dummy exit block iff there are multiple returns.
     739        2174 :   for (SmallVectorImpl<MachineBasicBlock *>::const_iterator
     740        6171 :        It = OrderedBlks.begin(), E = OrderedBlks.end(); It != E; ++It) {
     741        2174 :     MachineBasicBlock *MBB = *It;
     742        2174 :     removeUnconditionalBranch(MBB);
     743        2174 :     removeRedundantConditionalBranch(MBB);
     744        4348 :     if (isReturnBlock(MBB)) {
     745        2061 :       RetBlks.push_back(MBB);
     746             :     }
     747             :     assert(MBB->succ_size() <= 2);
     748             :   }
     749             : 
     750        2057 :   if (RetBlks.size() >= 2) {
     751           4 :     addDummyExitBlock(RetBlks);
     752           4 :     Changed = true;
     753             :   }
     754             : 
     755        4114 :   return Changed;
     756             : }
     757             : 
     758        2057 : bool AMDGPUCFGStructurizer::run() {
     759             :   //Assume reducible CFG...
     760             :   DEBUG(dbgs() << "AMDGPUCFGStructurizer::run\n");
     761             : 
     762             : #ifdef STRESSTEST
     763             :   //Use the worse block ordering to test the algorithm.
     764             :   ReverseVector(orderedBlks);
     765             : #endif
     766             : 
     767             :   DEBUG(dbgs() << "Ordered blocks:\n"; printOrderedBlocks(););
     768        2057 :   int NumIter = 0;
     769        2057 :   bool Finish = false;
     770             :   MachineBasicBlock *MBB;
     771        2057 :   bool MakeProgress = false;
     772        4114 :   int NumRemainedBlk = countActiveBlock(OrderedBlks.begin(),
     773        6171 :                                         OrderedBlks.end());
     774             : 
     775             :   do {
     776        2057 :     ++NumIter;
     777             :     DEBUG(
     778             :       dbgs() << "numIter = " << NumIter
     779             :              << ", numRemaintedBlk = " << NumRemainedBlk << "\n";
     780             :     );
     781             : 
     782             :     SmallVectorImpl<MachineBasicBlock *>::const_iterator It =
     783        4114 :         OrderedBlks.begin();
     784             :     SmallVectorImpl<MachineBasicBlock *>::const_iterator E =
     785        4114 :         OrderedBlks.end();
     786             : 
     787        2057 :     SmallVectorImpl<MachineBasicBlock *>::const_iterator SccBeginIter =
     788             :         It;
     789        2057 :     MachineBasicBlock *SccBeginMBB = nullptr;
     790        2057 :     int SccNumBlk = 0;  // The number of active blocks, init to a
     791             :                         // maximum possible number.
     792             :     int SccNumIter;     // Number of iteration in this SCC.
     793             : 
     794        4231 :     while (It != E) {
     795        2174 :       MBB = *It;
     796             : 
     797        2174 :       if (!SccBeginMBB) {
     798        2154 :         SccBeginIter = It;
     799        2154 :         SccBeginMBB = MBB;
     800        2154 :         SccNumIter = 0;
     801        2154 :         SccNumBlk = NumRemainedBlk; // Init to maximum possible number.
     802             :         DEBUG(
     803             :               dbgs() << "start processing SCC" << getSCCNum(SccBeginMBB);
     804             :               dbgs() << "\n";
     805             :         );
     806             :       }
     807             : 
     808        2174 :       if (!isRetiredBlock(MBB))
     809             :         patternMatch(MBB);
     810             : 
     811        2174 :       ++It;
     812             : 
     813        2174 :       bool ContNextScc = true;
     814             :       if (It == E
     815        2408 :           || getSCCNum(SccBeginMBB) != getSCCNum(*It)) {
     816             :         // Just finish one scc.
     817        2154 :         ++SccNumIter;
     818        2154 :         int sccRemainedNumBlk = countActiveBlock(SccBeginIter, It);
     819        2154 :         if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= SccNumBlk) {
     820             :           DEBUG(
     821             :             dbgs() << "Can't reduce SCC " << getSCCNum(MBB)
     822             :                    << ", sccNumIter = " << SccNumIter;
     823             :             dbgs() << "doesn't make any progress\n";
     824             :           );
     825             :           ContNextScc = true;
     826        2154 :         } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < SccNumBlk) {
     827           0 :           SccNumBlk = sccRemainedNumBlk;
     828           0 :           It = SccBeginIter;
     829           0 :           ContNextScc = false;
     830             :           DEBUG(
     831             :             dbgs() << "repeat processing SCC" << getSCCNum(MBB)
     832             :                    << "sccNumIter = " << SccNumIter << '\n';
     833             :           );
     834             :         } else {
     835             :           // Finish the current scc.
     836             :           ContNextScc = true;
     837             :         }
     838             :       } else {
     839             :         // Continue on next component in the current scc.
     840             :         ContNextScc = false;
     841             :       }
     842             : 
     843        2174 :       if (ContNextScc)
     844        2154 :         SccBeginMBB = nullptr;
     845             :     } //while, "one iteration" over the function.
     846             : 
     847             :     MachineBasicBlock *EntryMBB =
     848        6171 :         *GraphTraits<MachineFunction *>::nodes_begin(FuncRep);
     849        2057 :     if (EntryMBB->succ_size() == 0) {
     850             :       Finish = true;
     851             :       DEBUG(
     852             :         dbgs() << "Reduce to one block\n";
     853             :       );
     854             :     } else {
     855             :       int NewnumRemainedBlk
     856           0 :         = countActiveBlock(OrderedBlks.begin(), OrderedBlks.end());
     857             :       // consider cloned blocks ??
     858           0 :       if (NewnumRemainedBlk == 1 || NewnumRemainedBlk < NumRemainedBlk) {
     859             :         MakeProgress = true;
     860             :         NumRemainedBlk = NewnumRemainedBlk;
     861             :       } else {
     862           0 :         MakeProgress = false;
     863             :         DEBUG(
     864             :           dbgs() << "No progress\n";
     865             :         );
     866             :       }
     867             :     }
     868        2057 :   } while (!Finish && MakeProgress);
     869             : 
     870             :   // Misc wrap up to maintain the consistency of the Function representation.
     871        6171 :   wrapup(*GraphTraits<MachineFunction *>::nodes_begin(FuncRep));
     872             : 
     873             :   // Detach retired Block, release memory.
     874        6171 :   for (MBBInfoMap::iterator It = BlockInfoMap.begin(), E = BlockInfoMap.end();
     875        4236 :       It != E; ++It) {
     876        4358 :     if ((*It).second && (*It).second->IsRetired) {
     877             :       assert(((*It).first)->getNumber() != -1);
     878             :       DEBUG(
     879             :         dbgs() << "Erase BB" << ((*It).first)->getNumber() << "\n";
     880             :       );
     881         122 :       (*It).first->eraseFromParent();  //Remove from the parent Function.
     882             :     }
     883        2179 :     delete (*It).second;
     884             :   }
     885        4114 :   BlockInfoMap.clear();
     886        4114 :   LLInfoMap.clear();
     887             : 
     888        2057 :   if (!Finish) {
     889             :     DEBUG(FuncRep->viewCFG());
     890           0 :     report_fatal_error("IRREDUCIBLE_CFG");
     891             :   }
     892             : 
     893        2057 :   return true;
     894             : }
     895             : 
     896        2057 : void AMDGPUCFGStructurizer::orderBlocks(MachineFunction *MF) {
     897        2057 :   int SccNum = 0;
     898             :   MachineBasicBlock *MBB;
     899        8422 :   for (scc_iterator<MachineFunction *> It = scc_begin(MF); !It.isAtEnd();
     900             :        ++It, ++SccNum) {
     901             :     const std::vector<MachineBasicBlock *> &SccNext = *It;
     902             :     for (std::vector<MachineBasicBlock *>::const_iterator
     903             :          blockIter = SccNext.begin(), blockEnd = SccNext.end();
     904        4328 :          blockIter != blockEnd; ++blockIter) {
     905        2174 :       MBB = *blockIter;
     906        2174 :       OrderedBlks.push_back(MBB);
     907        2174 :       recordSccnum(MBB, SccNum);
     908             :     }
     909             :   }
     910             : 
     911             :   // walk through all the block in func to check for unreachable
     912       10519 :   for (auto *MBB : nodes(MF)) {
     913        2174 :     SccNum = getSCCNum(MBB);
     914        2174 :     if (SccNum == INVALIDSCCNUM)
     915           0 :       dbgs() << "unreachable block BB" << MBB->getNumber() << "\n";
     916             :   }
     917        2057 : }
     918             : 
     919             : int AMDGPUCFGStructurizer::patternMatch(MachineBasicBlock *MBB) {
     920             :   int NumMatch = 0;
     921             :   int CurMatch;
     922             : 
     923             :   DEBUG(
     924             :         dbgs() << "Begin patternMatch BB" << MBB->getNumber() << "\n";
     925             :   );
     926             : 
     927        2264 :   while ((CurMatch = patternMatchGroup(MBB)) > 0)
     928             :     NumMatch += CurMatch;
     929             : 
     930             :   DEBUG(
     931             :         dbgs() << "End patternMatch BB" << MBB->getNumber()
     932             :       << ", numMatch = " << NumMatch << "\n";
     933             :   );
     934             : 
     935             :   return NumMatch;
     936             : }
     937             : 
     938        2264 : int AMDGPUCFGStructurizer::patternMatchGroup(MachineBasicBlock *MBB) {
     939        2264 :   int NumMatch = 0;
     940        2264 :   NumMatch += loopendPatternMatch();
     941        2264 :   NumMatch += serialPatternMatch(MBB);
     942        2264 :   NumMatch += ifPatternMatch(MBB);
     943        2264 :   return NumMatch;
     944             : }
     945             : 
     946        2390 : int AMDGPUCFGStructurizer::serialPatternMatch(MachineBasicBlock *MBB) {
     947        2390 :   if (MBB->succ_size() != 1)
     948             :     return 0;
     949             : 
     950         198 :   MachineBasicBlock *childBlk = *MBB->succ_begin();
     951         198 :   if (childBlk->pred_size() != 1 || isActiveLoophead(childBlk))
     952             :     return 0;
     953             : 
     954          74 :   mergeSerialBlock(MBB, childBlk);
     955          74 :   ++numSerialPatternMatch;
     956          74 :   return 1;
     957             : }
     958             : 
     959        2386 : int AMDGPUCFGStructurizer::ifPatternMatch(MachineBasicBlock *MBB) {
     960             :   //two edges
     961        2386 :   if (MBB->succ_size() != 2)
     962             :     return 0;
     963          46 :   if (hasBackEdge(MBB))
     964             :     return 0;
     965          46 :   MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB);
     966          46 :   if (!BranchMI)
     967             :     return 0;
     968             : 
     969             :   assert(isCondBranch(BranchMI));
     970          46 :   int NumMatch = 0;
     971             : 
     972          46 :   MachineBasicBlock *TrueMBB = getTrueBranch(BranchMI);
     973          46 :   NumMatch += serialPatternMatch(TrueMBB);
     974          46 :   NumMatch += ifPatternMatch(TrueMBB);
     975          46 :   MachineBasicBlock *FalseMBB = getFalseBranch(MBB, BranchMI);
     976          46 :   NumMatch += serialPatternMatch(FalseMBB);
     977          46 :   NumMatch += ifPatternMatch(FalseMBB);
     978             :   MachineBasicBlock *LandBlk;
     979          46 :   int Cloned = 0;
     980             : 
     981             :   assert (!TrueMBB->succ_empty() || !FalseMBB->succ_empty());
     982             :   // TODO: Simplify
     983         128 :   if (TrueMBB->succ_size() == 1 && FalseMBB->succ_size() == 1
     984         100 :     && *TrueMBB->succ_begin() == *FalseMBB->succ_begin()) {
     985             :     // Diamond pattern
     986          12 :     LandBlk = *TrueMBB->succ_begin();
     987          92 :   } else if (TrueMBB->succ_size() == 1 && *TrueMBB->succ_begin() == FalseMBB) {
     988             :     // Triangle pattern, false is empty
     989           0 :     LandBlk = FalseMBB;
     990           0 :     FalseMBB = nullptr;
     991          80 :   } else if (FalseMBB->succ_size() == 1
     992         120 :              && *FalseMBB->succ_begin() == TrueMBB) {
     993             :     // Triangle pattern, true is empty
     994             :     // We reverse the predicate to make a triangle, empty false pattern;
     995          36 :     std::swap(TrueMBB, FalseMBB);
     996          36 :     reversePredicateSetter(MBB->end(), *MBB);
     997          36 :     LandBlk = FalseMBB;
     998          36 :     FalseMBB = nullptr;
     999           8 :   } else if (FalseMBB->succ_size() == 1
    1000           8 :              && isSameloopDetachedContbreak(TrueMBB, FalseMBB)) {
    1001           0 :     LandBlk = *FalseMBB->succ_begin();
    1002           8 :   } else if (TrueMBB->succ_size() == 1
    1003           8 :     && isSameloopDetachedContbreak(FalseMBB, TrueMBB)) {
    1004           0 :     LandBlk = *TrueMBB->succ_begin();
    1005             :   } else {
    1006           4 :     return NumMatch + handleJumpintoIf(MBB, TrueMBB, FalseMBB);
    1007             :   }
    1008             : 
    1009             :   // improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the
    1010             :   // new BB created for landBlk==NULL may introduce new challenge to the
    1011             :   // reduction process.
    1012          42 :   if (LandBlk &&
    1013          84 :       ((TrueMBB && TrueMBB->pred_size() > 1)
    1014          88 :       || (FalseMBB && FalseMBB->pred_size() > 1))) {
    1015           1 :      Cloned += improveSimpleJumpintoIf(MBB, TrueMBB, FalseMBB, &LandBlk);
    1016             :   }
    1017             : 
    1018          84 :   if (TrueMBB && TrueMBB->pred_size() > 1) {
    1019           1 :     TrueMBB = cloneBlockForPredecessor(TrueMBB, MBB);
    1020           1 :     ++Cloned;
    1021             :   }
    1022             : 
    1023          48 :   if (FalseMBB && FalseMBB->pred_size() > 1) {
    1024           0 :     FalseMBB = cloneBlockForPredecessor(FalseMBB, MBB);
    1025           0 :     ++Cloned;
    1026             :   }
    1027             : 
    1028          42 :   mergeIfthenelseBlock(BranchMI, MBB, TrueMBB, FalseMBB, LandBlk);
    1029             : 
    1030          42 :   ++numIfPatternMatch;
    1031             : 
    1032          42 :   numClonedBlock += Cloned;
    1033             : 
    1034          42 :   return 1 + Cloned + NumMatch;
    1035             : }
    1036             : 
    1037        2264 : int AMDGPUCFGStructurizer::loopendPatternMatch() {
    1038        4528 :   std::deque<MachineLoop *> NestedLoops;
    1039        9148 :   for (auto &It: *MLI)
    1040         668 :     for (MachineLoop *ML : depth_first(It))
    1041         104 :       NestedLoops.push_front(ML);
    1042             : 
    1043        2264 :   if (NestedLoops.empty())
    1044             :     return 0;
    1045             : 
    1046             :   // Process nested loop outside->inside (we did push_front),
    1047             :   // so "continue" to a outside loop won't be mistaken as "break"
    1048             :   // of the current loop.
    1049          92 :   int Num = 0;
    1050         380 :   for (MachineLoop *ExaminedLoop : NestedLoops) {
    1051         208 :     if (ExaminedLoop->getNumBlocks() == 0 || Visited[ExaminedLoop])
    1052          88 :       continue;
    1053             :     DEBUG(dbgs() << "Processing:\n"; ExaminedLoop->dump(););
    1054          16 :     int NumBreak = mergeLoop(ExaminedLoop);
    1055          16 :     if (NumBreak == -1)
    1056             :       break;
    1057          16 :     Num += NumBreak;
    1058             :   }
    1059          92 :   return Num;
    1060             : }
    1061             : 
    1062          16 : int AMDGPUCFGStructurizer::mergeLoop(MachineLoop *LoopRep) {
    1063          32 :   MachineBasicBlock *LoopHeader = LoopRep->getHeader();
    1064          32 :   MBBVector ExitingMBBs;
    1065          16 :   LoopRep->getExitingBlocks(ExitingMBBs);
    1066             :   assert(!ExitingMBBs.empty() && "Infinite Loop not supported");
    1067             :   DEBUG(dbgs() << "Loop has " << ExitingMBBs.size() << " exiting blocks\n";);
    1068             :   // We assume a single ExitBlk
    1069          32 :   MBBVector ExitBlks;
    1070          16 :   LoopRep->getExitBlocks(ExitBlks);
    1071          32 :   SmallPtrSet<MachineBasicBlock *, 2> ExitBlkSet;
    1072          48 :   for (unsigned i = 0, e = ExitBlks.size(); i < e; ++i)
    1073          32 :     ExitBlkSet.insert(ExitBlks[i]);
    1074             :   assert(ExitBlkSet.size() == 1);
    1075          16 :   MachineBasicBlock *ExitBlk = *ExitBlks.begin();
    1076             :   assert(ExitBlk && "Loop has several exit block");
    1077          32 :   MBBVector LatchBlks;
    1078          80 :   for (auto *LB : inverse_children<MachineBasicBlock*>(LoopHeader))
    1079          64 :     if (LoopRep->contains(LB))
    1080          16 :       LatchBlks.push_back(LB);
    1081             : 
    1082          48 :   for (unsigned i = 0, e = ExitingMBBs.size(); i < e; ++i)
    1083          32 :     mergeLoopbreakBlock(ExitingMBBs[i], ExitBlk);
    1084          48 :   for (unsigned i = 0, e = LatchBlks.size(); i < e; ++i)
    1085          32 :     settleLoopcontBlock(LatchBlks[i], LoopHeader);
    1086             :   int Match = 0;
    1087             :   do {
    1088          26 :     Match = 0;
    1089          26 :     Match += serialPatternMatch(LoopHeader);
    1090          26 :     Match += ifPatternMatch(LoopHeader);
    1091          26 :   } while (Match > 0);
    1092          16 :   mergeLooplandBlock(LoopHeader, ExitBlk);
    1093          16 :   MachineLoop *ParentLoop = LoopRep->getParentLoop();
    1094          16 :   if (ParentLoop)
    1095           2 :     MLI->changeLoopFor(LoopHeader, ParentLoop);
    1096             :   else
    1097          14 :     MLI->removeBlock(LoopHeader);
    1098          16 :   Visited[LoopRep] = true;
    1099          32 :   return 1;
    1100             : }
    1101             : 
    1102           8 : bool AMDGPUCFGStructurizer::isSameloopDetachedContbreak(
    1103             :     MachineBasicBlock *Src1MBB, MachineBasicBlock *Src2MBB) {
    1104           8 :   if (Src1MBB->succ_size() == 0) {
    1105           0 :     MachineLoop *LoopRep = MLI->getLoopFor(Src1MBB);
    1106           0 :     if (LoopRep&& LoopRep == MLI->getLoopFor(Src2MBB)) {
    1107           0 :       MachineBasicBlock *&TheEntry = LLInfoMap[LoopRep];
    1108           0 :       if (TheEntry) {
    1109             :         DEBUG(
    1110             :           dbgs() << "isLoopContBreakBlock yes src1 = BB"
    1111             :                  << Src1MBB->getNumber()
    1112             :                  << " src2 = BB" << Src2MBB->getNumber() << "\n";
    1113             :         );
    1114           0 :         return true;
    1115             :       }
    1116             :     }
    1117             :   }
    1118             :   return false;
    1119             : }
    1120             : 
    1121           4 : int AMDGPUCFGStructurizer::handleJumpintoIf(MachineBasicBlock *HeadMBB,
    1122             :     MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) {
    1123           4 :   int Num = handleJumpintoIfImp(HeadMBB, TrueMBB, FalseMBB);
    1124           4 :   if (Num == 0) {
    1125             :     DEBUG(
    1126             :       dbgs() << "handleJumpintoIf swap trueBlk and FalseBlk" << "\n";
    1127             :     );
    1128           0 :     Num = handleJumpintoIfImp(HeadMBB, FalseMBB, TrueMBB);
    1129             :   }
    1130           4 :   return Num;
    1131             : }
    1132             : 
    1133           4 : int AMDGPUCFGStructurizer::handleJumpintoIfImp(MachineBasicBlock *HeadMBB,
    1134             :     MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) {
    1135           4 :   int Num = 0;
    1136             :   MachineBasicBlock *DownBlk;
    1137             : 
    1138             :   //trueBlk could be the common post dominator
    1139           4 :   DownBlk = TrueMBB;
    1140             : 
    1141             :   DEBUG(
    1142             :     dbgs() << "handleJumpintoIfImp head = BB" << HeadMBB->getNumber()
    1143             :            << " true = BB" << TrueMBB->getNumber()
    1144             :            << ", numSucc=" << TrueMBB->succ_size()
    1145             :            << " false = BB" << FalseMBB->getNumber() << "\n";
    1146             :   );
    1147             : 
    1148           4 :   while (DownBlk) {
    1149             :     DEBUG(
    1150             :       dbgs() << "check down = BB" << DownBlk->getNumber();
    1151             :     );
    1152             : 
    1153           4 :     if (singlePathTo(FalseMBB, DownBlk) == SinglePath_InPath) {
    1154             :       DEBUG(
    1155             :         dbgs() << " working\n";
    1156             :       );
    1157             : 
    1158           4 :       Num += cloneOnSideEntryTo(HeadMBB, TrueMBB, DownBlk);
    1159           4 :       Num += cloneOnSideEntryTo(HeadMBB, FalseMBB, DownBlk);
    1160             : 
    1161           4 :       numClonedBlock += Num;
    1162           4 :       Num += serialPatternMatch(*HeadMBB->succ_begin());
    1163           8 :       Num += serialPatternMatch(*std::next(HeadMBB->succ_begin()));
    1164           4 :       Num += ifPatternMatch(HeadMBB);
    1165             :       assert(Num > 0);
    1166             : 
    1167           4 :       break;
    1168             :     }
    1169             :     DEBUG(
    1170             :       dbgs() << " not working\n";
    1171             :     );
    1172           0 :     DownBlk = (DownBlk->succ_size() == 1) ? (*DownBlk->succ_begin()) : nullptr;
    1173             :   } // walk down the postDomTree
    1174             : 
    1175           4 :   return Num;
    1176             : }
    1177             : 
    1178             : #ifndef NDEBUG
    1179             : void AMDGPUCFGStructurizer::showImproveSimpleJumpintoIf(
    1180             :     MachineBasicBlock *HeadMBB, MachineBasicBlock *TrueMBB,
    1181             :     MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB, bool Detail) {
    1182             :   dbgs() << "head = BB" << HeadMBB->getNumber()
    1183             :          << " size = " << HeadMBB->size();
    1184             :   if (Detail) {
    1185             :     dbgs() << "\n";
    1186             :     HeadMBB->print(dbgs());
    1187             :     dbgs() << "\n";
    1188             :   }
    1189             : 
    1190             :   if (TrueMBB) {
    1191             :     dbgs() << ", true = BB" << TrueMBB->getNumber() << " size = "
    1192             :            << TrueMBB->size() << " numPred = " << TrueMBB->pred_size();
    1193             :     if (Detail) {
    1194             :       dbgs() << "\n";
    1195             :       TrueMBB->print(dbgs());
    1196             :       dbgs() << "\n";
    1197             :     }
    1198             :   }
    1199             :   if (FalseMBB) {
    1200             :     dbgs() << ", false = BB" << FalseMBB->getNumber() << " size = "
    1201             :            << FalseMBB->size() << " numPred = " << FalseMBB->pred_size();
    1202             :     if (Detail) {
    1203             :       dbgs() << "\n";
    1204             :       FalseMBB->print(dbgs());
    1205             :       dbgs() << "\n";
    1206             :     }
    1207             :   }
    1208             :   if (LandMBB) {
    1209             :     dbgs() << ", land = BB" << LandMBB->getNumber() << " size = "
    1210             :            << LandMBB->size() << " numPred = " << LandMBB->pred_size();
    1211             :     if (Detail) {
    1212             :       dbgs() << "\n";
    1213             :       LandMBB->print(dbgs());
    1214             :       dbgs() << "\n";
    1215             :     }
    1216             :   }
    1217             : 
    1218             :   dbgs() << "\n";
    1219             : }
    1220             : #endif
    1221             : 
    1222           1 : int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
    1223             :     MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB,
    1224             :     MachineBasicBlock **LandMBBPtr) {
    1225           1 :   bool MigrateTrue = false;
    1226           1 :   bool MigrateFalse = false;
    1227             : 
    1228           1 :   MachineBasicBlock *LandBlk = *LandMBBPtr;
    1229             : 
    1230             :   assert((!TrueMBB || TrueMBB->succ_size() <= 1)
    1231             :          && (!FalseMBB || FalseMBB->succ_size() <= 1));
    1232             : 
    1233           1 :   if (TrueMBB == FalseMBB)
    1234             :     return 0;
    1235             : 
    1236           1 :   MigrateTrue = needMigrateBlock(TrueMBB);
    1237           1 :   MigrateFalse = needMigrateBlock(FalseMBB);
    1238             : 
    1239           1 :   if (!MigrateTrue && !MigrateFalse)
    1240             :     return 0;
    1241             : 
    1242             :   // If we need to migrate either trueBlk and falseBlk, migrate the rest that
    1243             :   // have more than one predecessors.  without doing this, its predecessor
    1244             :   // rather than headBlk will have undefined value in initReg.
    1245           1 :   if (!MigrateTrue && TrueMBB && TrueMBB->pred_size() > 1)
    1246             :     MigrateTrue = true;
    1247           2 :   if (!MigrateFalse && FalseMBB && FalseMBB->pred_size() > 1)
    1248             :     MigrateFalse = true;
    1249             : 
    1250             :   DEBUG(
    1251             :     dbgs() << "before improveSimpleJumpintoIf: ";
    1252             :     showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0);
    1253             :   );
    1254             : 
    1255             :   // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk
    1256             :   //
    1257             :   // new: headBlk => if () {initReg = 1; org trueBlk branch} else
    1258             :   //      {initReg = 0; org falseBlk branch }
    1259             :   //      => landBlk => if (initReg) {org trueBlk} else {org falseBlk}
    1260             :   //      => org landBlk
    1261             :   //      if landBlk->pred_size() > 2, put the about if-else inside
    1262             :   //      if (initReg !=2) {...}
    1263             :   //
    1264             :   // add initReg = initVal to headBlk
    1265             : 
    1266           2 :   const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
    1267           1 :   if (!MigrateTrue || !MigrateFalse) {
    1268             :     // XXX: We have an opportunity here to optimize the "branch into if" case
    1269             :     // here.  Branch into if looks like this:
    1270             :     //                        entry
    1271             :     //                       /     |
    1272             :     //           diamond_head       branch_from
    1273             :     //             /      \           |
    1274             :     // diamond_false        diamond_true
    1275             :     //             \      /
    1276             :     //               done
    1277             :     //
    1278             :     // The diamond_head block begins the "if" and the diamond_true block
    1279             :     // is the block being "branched into".
    1280             :     //
    1281             :     // If MigrateTrue is true, then TrueBB is the block being "branched into"
    1282             :     // and if MigrateFalse is true, then FalseBB is the block being
    1283             :     // "branched into"
    1284             :     //
    1285             :     // Here is the pseudo code for how I think the optimization should work:
    1286             :     // 1. Insert MOV GPR0, 0 before the branch instruction in diamond_head.
    1287             :     // 2. Insert MOV GPR0, 1 before the branch instruction in branch_from.
    1288             :     // 3. Move the branch instruction from diamond_head into its own basic
    1289             :     //    block (new_block).
    1290             :     // 4. Add an unconditional branch from diamond_head to new_block
    1291             :     // 5. Replace the branch instruction in branch_from with an unconditional
    1292             :     //    branch to new_block.  If branch_from has multiple predecessors, then
    1293             :     //    we need to replace the True/False block in the branch
    1294             :     //    instruction instead of replacing it.
    1295             :     // 6. Change the condition of the branch instruction in new_block from
    1296             :     //    COND to (COND || GPR0)
    1297             :     //
    1298             :     // In order insert these MOV instruction, we will need to use the
    1299             :     // RegisterScavenger.  Usually liveness stops being tracked during
    1300             :     // the late machine optimization passes, however if we implement
    1301             :     // bool TargetRegisterInfo::requiresRegisterScavenging(
    1302             :     //                                                const MachineFunction &MF)
    1303             :     // and have it return true, liveness will be tracked correctly
    1304             :     // by generic optimization passes.  We will also need to make sure that
    1305             :     // all of our target-specific passes that run after regalloc and before
    1306             :     // the CFGStructurizer track liveness and we will need to modify this pass
    1307             :     // to correctly track liveness.
    1308             :     //
    1309             :     // After the above changes, the new CFG should look like this:
    1310             :     //                        entry
    1311             :     //                       /     |
    1312             :     //           diamond_head       branch_from
    1313             :     //                       \     /
    1314             :     //                      new_block
    1315             :     //                      /      |
    1316             :     //         diamond_false        diamond_true
    1317             :     //                      \      /
    1318             :     //                        done
    1319             :     //
    1320             :     // Without this optimization, we are forced to duplicate the diamond_true
    1321             :     // block and we will end up with a CFG like this:
    1322             :     //
    1323             :     //                        entry
    1324             :     //                       /     |
    1325             :     //           diamond_head       branch_from
    1326             :     //             /      \                   |
    1327             :     // diamond_false        diamond_true      diamond_true (duplicate)
    1328             :     //             \      /                   |
    1329             :     //               done --------------------|
    1330             :     //
    1331             :     // Duplicating diamond_true can be very costly especially if it has a
    1332             :     // lot of instructions.
    1333             :     return 0;
    1334             :   }
    1335             : 
    1336           0 :   int NumNewBlk = 0;
    1337             : 
    1338           0 :   bool LandBlkHasOtherPred = (LandBlk->pred_size() > 2);
    1339             : 
    1340             :   //insert AMDGPU::ENDIF to avoid special case "input landBlk == NULL"
    1341           0 :   MachineBasicBlock::iterator I = insertInstrBefore(LandBlk, AMDGPU::ENDIF);
    1342             : 
    1343           0 :   if (LandBlkHasOtherPred) {
    1344           0 :     report_fatal_error("Extra register needed to handle CFG");
    1345             :     unsigned CmpResReg =
    1346             :       HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC);
    1347             :     report_fatal_error("Extra compare instruction needed to handle CFG");
    1348             :     insertCondBranchBefore(LandBlk, I, AMDGPU::IF_PREDICATE_SET,
    1349             :         CmpResReg, DebugLoc());
    1350             :   }
    1351             : 
    1352             :   // XXX: We are running this after RA, so creating virtual registers will
    1353             :   // cause an assertion failure in the PostRA scheduling pass.
    1354             :   unsigned InitReg =
    1355           0 :     HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC);
    1356           0 :   insertCondBranchBefore(LandBlk, I, AMDGPU::IF_PREDICATE_SET, InitReg,
    1357           0 :       DebugLoc());
    1358             : 
    1359           0 :   if (MigrateTrue) {
    1360           0 :     migrateInstruction(TrueMBB, LandBlk, I);
    1361             :     // need to uncondionally insert the assignment to ensure a path from its
    1362             :     // predecessor rather than headBlk has valid value in initReg if
    1363             :     // (initVal != 1).
    1364           0 :     report_fatal_error("Extra register needed to handle CFG");
    1365             :   }
    1366           0 :   insertInstrBefore(I, AMDGPU::ELSE);
    1367             : 
    1368           0 :   if (MigrateFalse) {
    1369           0 :     migrateInstruction(FalseMBB, LandBlk, I);
    1370             :     // need to uncondionally insert the assignment to ensure a path from its
    1371             :     // predecessor rather than headBlk has valid value in initReg if
    1372             :     // (initVal != 0)
    1373           0 :     report_fatal_error("Extra register needed to handle CFG");
    1374             :   }
    1375             : 
    1376             :   if (LandBlkHasOtherPred) {
    1377             :     // add endif
    1378             :     insertInstrBefore(I, AMDGPU::ENDIF);
    1379             : 
    1380             :     // put initReg = 2 to other predecessors of landBlk
    1381             :     for (MachineBasicBlock::pred_iterator PI = LandBlk->pred_begin(),
    1382             :          PE = LandBlk->pred_end(); PI != PE; ++PI) {
    1383             :       MachineBasicBlock *MBB = *PI;
    1384             :       if (MBB != TrueMBB && MBB != FalseMBB)
    1385             :         report_fatal_error("Extra register needed to handle CFG");
    1386             :     }
    1387             :   }
    1388             :   DEBUG(
    1389             :     dbgs() << "result from improveSimpleJumpintoIf: ";
    1390             :     showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0);
    1391             :   );
    1392             : 
    1393             :   // update landBlk
    1394           0 :   *LandMBBPtr = LandBlk;
    1395             : 
    1396           0 :   return NumNewBlk;
    1397             : }
    1398             : 
    1399          74 : void AMDGPUCFGStructurizer::mergeSerialBlock(MachineBasicBlock *DstMBB,
    1400             :     MachineBasicBlock *SrcMBB) {
    1401             :   DEBUG(
    1402             :     dbgs() << "serialPattern BB" << DstMBB->getNumber()
    1403             :            << " <= BB" << SrcMBB->getNumber() << "\n";
    1404             :   );
    1405         296 :   DstMBB->splice(DstMBB->end(), SrcMBB, SrcMBB->begin(), SrcMBB->end());
    1406             : 
    1407          74 :   DstMBB->removeSuccessor(SrcMBB, true);
    1408          74 :   cloneSuccessorList(DstMBB, SrcMBB);
    1409             : 
    1410          74 :   removeSuccessor(SrcMBB);
    1411         148 :   MLI->removeBlock(SrcMBB);
    1412          74 :   retireBlock(SrcMBB);
    1413          74 : }
    1414             : 
    1415          42 : void AMDGPUCFGStructurizer::mergeIfthenelseBlock(MachineInstr *BranchMI,
    1416             :     MachineBasicBlock *MBB, MachineBasicBlock *TrueMBB,
    1417             :     MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB) {
    1418             :   assert (TrueMBB);
    1419             :   DEBUG(
    1420             :     dbgs() << "ifPattern BB" << MBB->getNumber();
    1421             :     dbgs() << "{  ";
    1422             :     if (TrueMBB) {
    1423             :       dbgs() << "BB" << TrueMBB->getNumber();
    1424             :     }
    1425             :     dbgs() << "  } else ";
    1426             :     dbgs() << "{  ";
    1427             :     if (FalseMBB) {
    1428             :       dbgs() << "BB" << FalseMBB->getNumber();
    1429             :     }
    1430             :     dbgs() << "  }\n ";
    1431             :     dbgs() << "landBlock: ";
    1432             :     if (!LandMBB) {
    1433             :       dbgs() << "NULL";
    1434             :     } else {
    1435             :       dbgs() << "BB" << LandMBB->getNumber();
    1436             :     }
    1437             :     dbgs() << "\n";
    1438             :   );
    1439             : 
    1440          84 :   int OldOpcode = BranchMI->getOpcode();
    1441         126 :   DebugLoc BranchDL = BranchMI->getDebugLoc();
    1442             : 
    1443             : //    transform to
    1444             : //    if cond
    1445             : //       trueBlk
    1446             : //    else
    1447             : //       falseBlk
    1448             : //    endif
    1449             : //    landBlk
    1450             : 
    1451          42 :   MachineBasicBlock::iterator I = BranchMI;
    1452          42 :   insertCondBranchBefore(I, getBranchNzeroOpcode(OldOpcode),
    1453             :       BranchDL);
    1454             : 
    1455          42 :   if (TrueMBB) {
    1456         126 :     MBB->splice(I, TrueMBB, TrueMBB->begin(), TrueMBB->end());
    1457          42 :     MBB->removeSuccessor(TrueMBB, true);
    1458          84 :     if (LandMBB && TrueMBB->succ_size()!=0)
    1459          42 :       TrueMBB->removeSuccessor(LandMBB, true);
    1460          42 :     retireBlock(TrueMBB);
    1461          42 :     MLI->removeBlock(TrueMBB);
    1462             :   }
    1463             : 
    1464          42 :   if (FalseMBB) {
    1465           6 :     insertInstrBefore(I, AMDGPU::ELSE);
    1466          18 :     MBB->splice(I, FalseMBB, FalseMBB->begin(),
    1467             :                    FalseMBB->end());
    1468           6 :     MBB->removeSuccessor(FalseMBB, true);
    1469          12 :     if (LandMBB && FalseMBB->succ_size() != 0)
    1470           6 :       FalseMBB->removeSuccessor(LandMBB, true);
    1471           6 :     retireBlock(FalseMBB);
    1472           6 :     MLI->removeBlock(FalseMBB);
    1473             :   }
    1474          42 :   insertInstrBefore(I, AMDGPU::ENDIF);
    1475             : 
    1476          42 :   BranchMI->eraseFromParent();
    1477             : 
    1478          42 :   if (LandMBB && TrueMBB && FalseMBB)
    1479           6 :     MBB->addSuccessor(LandMBB);
    1480          42 : }
    1481             : 
    1482          16 : void AMDGPUCFGStructurizer::mergeLooplandBlock(MachineBasicBlock *DstBlk,
    1483             :     MachineBasicBlock *LandMBB) {
    1484             :   DEBUG(dbgs() << "loopPattern header = BB" << DstBlk->getNumber()
    1485             :                << " land = BB" << LandMBB->getNumber() << "\n";);
    1486             : 
    1487          48 :   insertInstrBefore(DstBlk, AMDGPU::WHILELOOP, DebugLoc());
    1488          48 :   insertInstrEnd(DstBlk, AMDGPU::ENDLOOP, DebugLoc());
    1489          16 :   DstBlk->replaceSuccessor(DstBlk, LandMBB);
    1490          16 : }
    1491             : 
    1492          16 : void AMDGPUCFGStructurizer::mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB,
    1493             :     MachineBasicBlock *LandMBB) {
    1494             :   DEBUG(dbgs() << "loopbreakPattern exiting = BB" << ExitingMBB->getNumber()
    1495             :                << " land = BB" << LandMBB->getNumber() << "\n";);
    1496          16 :   MachineInstr *BranchMI = getLoopendBlockBranchInstr(ExitingMBB);
    1497             :   assert(BranchMI && isCondBranch(BranchMI));
    1498          48 :   DebugLoc DL = BranchMI->getDebugLoc();
    1499          16 :   MachineBasicBlock *TrueBranch = getTrueBranch(BranchMI);
    1500          16 :   MachineBasicBlock::iterator I = BranchMI;
    1501          16 :   if (TrueBranch != LandMBB)
    1502          16 :     reversePredicateSetter(I, *I->getParent());
    1503          16 :   insertCondBranchBefore(ExitingMBB, I, AMDGPU::IF_PREDICATE_SET, AMDGPU::PREDICATE_BIT, DL);
    1504          16 :   insertInstrBefore(I, AMDGPU::BREAK);
    1505          16 :   insertInstrBefore(I, AMDGPU::ENDIF);
    1506             :   //now branchInst can be erase safely
    1507          16 :   BranchMI->eraseFromParent();
    1508             :   //now take care of successors, retire blocks
    1509          16 :   ExitingMBB->removeSuccessor(LandMBB, true);
    1510          16 : }
    1511             : 
    1512          16 : void AMDGPUCFGStructurizer::settleLoopcontBlock(MachineBasicBlock *ContingMBB,
    1513             :     MachineBasicBlock *ContMBB) {
    1514             :   DEBUG(dbgs() << "settleLoopcontBlock conting = BB"
    1515             :                << ContingMBB->getNumber()
    1516             :                << ", cont = BB" << ContMBB->getNumber() << "\n";);
    1517             : 
    1518          16 :   MachineInstr *MI = getLoopendBlockBranchInstr(ContingMBB);
    1519          16 :   if (MI) {
    1520             :     assert(isCondBranch(MI));
    1521           0 :     MachineBasicBlock::iterator I = MI;
    1522           0 :     MachineBasicBlock *TrueBranch = getTrueBranch(MI);
    1523           0 :     int OldOpcode = MI->getOpcode();
    1524           0 :     DebugLoc DL = MI->getDebugLoc();
    1525             : 
    1526           0 :     bool UseContinueLogical = ((&*ContingMBB->rbegin()) == MI);
    1527             : 
    1528           0 :     if (!UseContinueLogical) {
    1529             :       int BranchOpcode =
    1530           0 :           TrueBranch == ContMBB ? getBranchNzeroOpcode(OldOpcode) :
    1531           0 :           getBranchZeroOpcode(OldOpcode);
    1532           0 :       insertCondBranchBefore(I, BranchOpcode, DL);
    1533             :       // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
    1534           0 :       insertInstrEnd(ContingMBB, AMDGPU::CONTINUE, DL);
    1535           0 :       insertInstrEnd(ContingMBB, AMDGPU::ENDIF, DL);
    1536             :     } else {
    1537             :       int BranchOpcode =
    1538           0 :           TrueBranch == ContMBB ? getContinueNzeroOpcode(OldOpcode) :
    1539           0 :           getContinueZeroOpcode(OldOpcode);
    1540           0 :       insertCondBranchBefore(I, BranchOpcode, DL);
    1541             :     }
    1542             : 
    1543           0 :     MI->eraseFromParent();
    1544             :   } else {
    1545             :     // if we've arrived here then we've already erased the branch instruction
    1546             :     // travel back up the basic block to see the last reference of our debug
    1547             :     // location we've just inserted that reference here so it should be
    1548             :     // representative insertEnd to ensure phi-moves, if exist, go before the
    1549             :     // continue-instr.
    1550          16 :     insertInstrEnd(ContingMBB, AMDGPU::CONTINUE,
    1551          32 :         getLastDebugLocInBB(ContingMBB));
    1552             :   }
    1553          16 : }
    1554             : 
    1555           8 : int AMDGPUCFGStructurizer::cloneOnSideEntryTo(MachineBasicBlock *PreMBB,
    1556             :     MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB) {
    1557           8 :   int Cloned = 0;
    1558             :   assert(PreMBB->isSuccessor(SrcMBB));
    1559          28 :   while (SrcMBB && SrcMBB != DstMBB) {
    1560             :     assert(SrcMBB->succ_size() == 1);
    1561          10 :     if (SrcMBB->pred_size() > 1) {
    1562           0 :       SrcMBB = cloneBlockForPredecessor(SrcMBB, PreMBB);
    1563           0 :       ++Cloned;
    1564             :     }
    1565             : 
    1566          10 :     PreMBB = SrcMBB;
    1567          10 :     SrcMBB = *SrcMBB->succ_begin();
    1568             :   }
    1569             : 
    1570           8 :   return Cloned;
    1571             : }
    1572             : 
    1573             : MachineBasicBlock *
    1574           1 : AMDGPUCFGStructurizer::cloneBlockForPredecessor(MachineBasicBlock *MBB,
    1575             :     MachineBasicBlock *PredMBB) {
    1576             :   assert(PredMBB->isSuccessor(MBB) &&
    1577             :          "succBlk is not a prececessor of curBlk");
    1578             : 
    1579           1 :   MachineBasicBlock *CloneMBB = clone(MBB);  //clone instructions
    1580           1 :   replaceInstrUseOfBlockWith(PredMBB, MBB, CloneMBB);
    1581             :   //srcBlk, oldBlk, newBlk
    1582             : 
    1583           1 :   PredMBB->replaceSuccessor(MBB, CloneMBB);
    1584             : 
    1585             :   // add all successor to cloneBlk
    1586           1 :   cloneSuccessorList(CloneMBB, MBB);
    1587             : 
    1588           1 :   numClonedInstr += MBB->size();
    1589             : 
    1590             :   DEBUG(
    1591             :     dbgs() << "Cloned block: " << "BB"
    1592             :            << MBB->getNumber() << "size " << MBB->size() << "\n";
    1593             :   );
    1594             : 
    1595             :   SHOWNEWBLK(CloneMBB, "result of Cloned block: ");
    1596             : 
    1597           1 :   return CloneMBB;
    1598             : }
    1599             : 
    1600           0 : void AMDGPUCFGStructurizer::migrateInstruction(MachineBasicBlock *SrcMBB,
    1601             :     MachineBasicBlock *DstMBB, MachineBasicBlock::iterator I) {
    1602           0 :   MachineBasicBlock::iterator SpliceEnd;
    1603             :   //look for the input branchinstr, not the AMDGPU branchinstr
    1604           0 :   MachineInstr *BranchMI = getNormalBlockBranchInstr(SrcMBB);
    1605           0 :   if (!BranchMI) {
    1606             :     DEBUG(
    1607             :       dbgs() << "migrateInstruction don't see branch instr\n";
    1608             :     );
    1609           0 :     SpliceEnd = SrcMBB->end();
    1610             :   } else {
    1611             :     DEBUG(dbgs() << "migrateInstruction see branch instr: " << *BranchMI);
    1612             :     SpliceEnd = BranchMI;
    1613             :   }
    1614             :   DEBUG(
    1615             :     dbgs() << "migrateInstruction before splice dstSize = " << DstMBB->size()
    1616             :       << "srcSize = " << SrcMBB->size() << "\n";
    1617             :   );
    1618             : 
    1619             :   //splice insert before insertPos
    1620           0 :   DstMBB->splice(I, SrcMBB, SrcMBB->begin(), SpliceEnd);
    1621             : 
    1622             :   DEBUG(
    1623             :     dbgs() << "migrateInstruction after splice dstSize = " << DstMBB->size()
    1624             :       << "srcSize = " << SrcMBB->size() << '\n';
    1625             :   );
    1626           0 : }
    1627             : 
    1628             : MachineBasicBlock *
    1629           0 : AMDGPUCFGStructurizer::normalizeInfiniteLoopExit(MachineLoop* LoopRep) {
    1630           0 :   MachineBasicBlock *LoopHeader = LoopRep->getHeader();
    1631           0 :   MachineBasicBlock *LoopLatch = LoopRep->getLoopLatch();
    1632             : 
    1633           0 :   if (!LoopHeader || !LoopLatch)
    1634             :     return nullptr;
    1635           0 :   MachineInstr *BranchMI = getLoopendBlockBranchInstr(LoopLatch);
    1636             :   // Is LoopRep an infinite loop ?
    1637           0 :   if (!BranchMI || !isUncondBranch(BranchMI))
    1638             :     return nullptr;
    1639             : 
    1640           0 :   MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock();
    1641           0 :   FuncRep->push_back(DummyExitBlk);  //insert to function
    1642             :   SHOWNEWBLK(DummyExitBlk, "DummyExitBlock to normalize infiniteLoop: ");
    1643             :   DEBUG(dbgs() << "Old branch instr: " << *BranchMI << "\n";);
    1644           0 :   LLVMContext &Ctx = LoopHeader->getParent()->getFunction()->getContext();
    1645           0 :   Ctx.emitError("Extra register needed to handle CFG");
    1646           0 :   return nullptr;
    1647             : }
    1648             : 
    1649        2174 : void AMDGPUCFGStructurizer::removeUnconditionalBranch(MachineBasicBlock *MBB) {
    1650             :   MachineInstr *BranchMI;
    1651             : 
    1652             :   // I saw two unconditional branch in one basic block in example
    1653             :   // test_fc_do_while_or.c need to fix the upstream on this to remove the loop.
    1654           1 :   while ((BranchMI = getLoopendBlockBranchInstr(MBB))
    1655        2176 :           && isUncondBranch(BranchMI)) {
    1656             :     DEBUG(dbgs() << "Removing uncond branch instr: " << *BranchMI);
    1657           1 :     BranchMI->eraseFromParent();
    1658             :   }
    1659        2174 : }
    1660             : 
    1661        2174 : void AMDGPUCFGStructurizer::removeRedundantConditionalBranch(
    1662             :     MachineBasicBlock *MBB) {
    1663        2174 :   if (MBB->succ_size() != 2)
    1664             :     return;
    1665          58 :   MachineBasicBlock *MBB1 = *MBB->succ_begin();
    1666         116 :   MachineBasicBlock *MBB2 = *std::next(MBB->succ_begin());
    1667          58 :   if (MBB1 != MBB2)
    1668             :     return;
    1669             : 
    1670           0 :   MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB);
    1671             :   assert(BranchMI && isCondBranch(BranchMI));
    1672             :   DEBUG(dbgs() << "Removing unneeded cond branch instr: " << *BranchMI);
    1673           0 :   BranchMI->eraseFromParent();
    1674             :   SHOWNEWBLK(MBB1, "Removing redundant successor");
    1675           0 :   MBB->removeSuccessor(MBB1, true);
    1676             : }
    1677             : 
    1678           4 : void AMDGPUCFGStructurizer::addDummyExitBlock(
    1679             :     SmallVectorImpl<MachineBasicBlock*> &RetMBB) {
    1680           4 :   MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock();
    1681           8 :   FuncRep->push_back(DummyExitBlk);  //insert to function
    1682          12 :   insertInstrEnd(DummyExitBlk, AMDGPU::RETURN);
    1683             : 
    1684          16 :   for (SmallVectorImpl<MachineBasicBlock *>::iterator It = RetMBB.begin(),
    1685           8 :        E = RetMBB.end(); It != E; ++It) {
    1686           8 :     MachineBasicBlock *MBB = *It;
    1687           8 :     MachineInstr *MI = getReturnInstr(MBB);
    1688           8 :     if (MI)
    1689           7 :       MI->eraseFromParent();
    1690           8 :     MBB->addSuccessor(DummyExitBlk);
    1691             :     DEBUG(
    1692             :       dbgs() << "Add dummyExitBlock to BB" << MBB->getNumber()
    1693             :              << " successors\n";
    1694             :     );
    1695             :   }
    1696             :   SHOWNEWBLK(DummyExitBlk, "DummyExitBlock: ");
    1697           4 : }
    1698             : 
    1699          74 : void AMDGPUCFGStructurizer::removeSuccessor(MachineBasicBlock *MBB) {
    1700          93 :   while (MBB->succ_size())
    1701          19 :     MBB->removeSuccessor(*MBB->succ_begin());
    1702          74 : }
    1703             : 
    1704        2174 : void AMDGPUCFGStructurizer::recordSccnum(MachineBasicBlock *MBB,
    1705             :     int SccNum) {
    1706        2174 :   BlockInformation *&srcBlkInfo = BlockInfoMap[MBB];
    1707        2174 :   if (!srcBlkInfo)
    1708        4348 :     srcBlkInfo = new BlockInformation();
    1709        2174 :   srcBlkInfo->SccNum = SccNum;
    1710        2174 : }
    1711             : 
    1712         122 : void AMDGPUCFGStructurizer::retireBlock(MachineBasicBlock *MBB) {
    1713             :   DEBUG(
    1714             :         dbgs() << "Retiring BB" << MBB->getNumber() << "\n";
    1715             :   );
    1716             : 
    1717         122 :   BlockInformation *&SrcBlkInfo = BlockInfoMap[MBB];
    1718             : 
    1719         122 :   if (!SrcBlkInfo)
    1720          10 :     SrcBlkInfo = new BlockInformation();
    1721             : 
    1722         122 :   SrcBlkInfo->IsRetired = true;
    1723             :   assert(MBB->succ_size() == 0 && MBB->pred_size() == 0
    1724             :          && "can't retire block yet");
    1725         122 : }
    1726             : 
    1727         244 : INITIALIZE_PASS_BEGIN(AMDGPUCFGStructurizer, "amdgpustructurizer",
    1728             :                       "AMDGPU CFG Structurizer", false, false)
    1729         244 : INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
    1730         244 : INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
    1731         244 : INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
    1732        1220 : INITIALIZE_PASS_END(AMDGPUCFGStructurizer, "amdgpustructurizer",
    1733             :                       "AMDGPU CFG Structurizer", false, false)
    1734             : 
    1735         244 : FunctionPass *llvm::createAMDGPUCFGStructurizerPass() {
    1736         244 :   return new AMDGPUCFGStructurizer();
    1737             : }

Generated by: LCOV version 1.13