LLVM  3.7.0
AMDILCFGStructurizer.cpp
Go to the documentation of this file.
1 //===-- AMDILCFGStructurizer.cpp - CFG Structurizer -----------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 /// \file
9 //==-----------------------------------------------------------------------===//
10 
11 #include "AMDGPU.h"
12 #include "AMDGPUInstrInfo.h"
13 #include "AMDGPUSubtarget.h"
14 #include "R600InstrInfo.h"
16 #include "llvm/ADT/SCCIterator.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/Statistic.h"
28 #include "llvm/IR/Dominators.h"
29 #include "llvm/Support/Debug.h"
33 #include <deque>
34 
35 using namespace llvm;
36 
37 #define DEBUG_TYPE "structcfg"
38 
39 #define DEFAULT_VEC_SLOTS 8
40 
41 // TODO: move-begin.
42 
43 //===----------------------------------------------------------------------===//
44 //
45 // Statistics for CFGStructurizer.
46 //
47 //===----------------------------------------------------------------------===//
48 
49 STATISTIC(numSerialPatternMatch, "CFGStructurizer number of serial pattern "
50  "matched");
51 STATISTIC(numIfPatternMatch, "CFGStructurizer number of if pattern "
52  "matched");
53 STATISTIC(numLoopcontPatternMatch, "CFGStructurizer number of loop-continue "
54  "pattern matched");
55 STATISTIC(numClonedBlock, "CFGStructurizer cloned blocks");
56 STATISTIC(numClonedInstr, "CFGStructurizer cloned instructions");
57 
58 namespace llvm {
60 }
61 
62 //===----------------------------------------------------------------------===//
63 //
64 // Miscellaneous utility for CFGStructurizer.
65 //
66 //===----------------------------------------------------------------------===//
67 namespace {
68 #define SHOWNEWINSTR(i) \
69  DEBUG(dbgs() << "New instr: " << *i << "\n");
70 
71 #define SHOWNEWBLK(b, msg) \
72 DEBUG( \
73  dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
74  dbgs() << "\n"; \
75 );
76 
77 #define SHOWBLK_DETAIL(b, msg) \
78 DEBUG( \
79  if (b) { \
80  dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
81  b->print(dbgs()); \
82  dbgs() << "\n"; \
83  } \
84 );
85 
86 #define INVALIDSCCNUM -1
87 
88 template<class NodeT>
89 void ReverseVector(SmallVectorImpl<NodeT *> &Src) {
90  size_t sz = Src.size();
91  for (size_t i = 0; i < sz/2; ++i) {
92  NodeT *t = Src[i];
93  Src[i] = Src[sz - i - 1];
94  Src[sz - i - 1] = t;
95  }
96 }
97 
98 } // end anonymous namespace
99 
100 //===----------------------------------------------------------------------===//
101 //
102 // supporting data structure for CFGStructurizer
103 //
104 //===----------------------------------------------------------------------===//
105 
106 
107 namespace {
108 
109 class BlockInformation {
110 public:
111  bool IsRetired;
112  int SccNum;
113  BlockInformation() : IsRetired(false), SccNum(INVALIDSCCNUM) {}
114 };
115 
116 } // end anonymous namespace
117 
118 //===----------------------------------------------------------------------===//
119 //
120 // CFGStructurizer
121 //
122 //===----------------------------------------------------------------------===//
123 
124 namespace {
125 class AMDGPUCFGStructurizer : public MachineFunctionPass {
126 public:
127  typedef SmallVector<MachineBasicBlock *, 32> MBBVector;
128  typedef std::map<MachineBasicBlock *, BlockInformation *> MBBInfoMap;
129  typedef std::map<MachineLoop *, MachineBasicBlock *> LoopLandInfoMap;
130 
131  enum PathToKind {
132  Not_SinglePath = 0,
133  SinglePath_InPath = 1,
134  SinglePath_NotInPath = 2
135  };
136 
137  static char ID;
138 
139  AMDGPUCFGStructurizer() :
140  MachineFunctionPass(ID), TII(nullptr), TRI(nullptr) {
142  }
143 
144  const char *getPassName() const override {
145  return "AMDGPU Control Flow Graph structurizer Pass";
146  }
147 
148  void getAnalysisUsage(AnalysisUsage &AU) const override {
154  }
155 
156  /// Perform the CFG structurization
157  bool run();
158 
159  /// Perform the CFG preparation
160  /// This step will remove every unconditionnal/dead jump instructions and make
161  /// sure all loops have an exit block
162  bool prepare();
163 
164  bool runOnMachineFunction(MachineFunction &MF) override {
165  TII = static_cast<const R600InstrInfo *>(MF.getSubtarget().getInstrInfo());
166  TRI = &TII->getRegisterInfo();
167  DEBUG(MF.dump(););
168  OrderedBlks.clear();
169  Visited.clear();
170  FuncRep = &MF;
171  MLI = &getAnalysis<MachineLoopInfo>();
172  DEBUG(dbgs() << "LoopInfo:\n"; PrintLoopinfo(*MLI););
173  MDT = &getAnalysis<MachineDominatorTree>();
174  DEBUG(MDT->print(dbgs(), (const llvm::Module*)nullptr););
175  PDT = &getAnalysis<MachinePostDominatorTree>();
176  DEBUG(PDT->print(dbgs()););
177  prepare();
178  run();
179  DEBUG(MF.dump(););
180  return true;
181  }
182 
183 protected:
186  MachineLoopInfo *MLI;
187  const R600InstrInfo *TII;
188  const AMDGPURegisterInfo *TRI;
189 
190  // PRINT FUNCTIONS
191  /// Print the ordered Blocks.
192  void printOrderedBlocks() const {
193  size_t i = 0;
194  for (MBBVector::const_iterator iterBlk = OrderedBlks.begin(),
195  iterBlkEnd = OrderedBlks.end(); iterBlk != iterBlkEnd; ++iterBlk, ++i) {
196  dbgs() << "BB" << (*iterBlk)->getNumber();
197  dbgs() << "(" << getSCCNum(*iterBlk) << "," << (*iterBlk)->size() << ")";
198  if (i != 0 && i % 10 == 0) {
199  dbgs() << "\n";
200  } else {
201  dbgs() << " ";
202  }
203  }
204  }
205  static void PrintLoopinfo(const MachineLoopInfo &LoopInfo) {
206  for (MachineLoop::iterator iter = LoopInfo.begin(),
207  iterEnd = LoopInfo.end(); iter != iterEnd; ++iter) {
208  (*iter)->print(dbgs(), 0);
209  }
210  }
211 
212  // UTILITY FUNCTIONS
213  int getSCCNum(MachineBasicBlock *MBB) const;
214  MachineBasicBlock *getLoopLandInfo(MachineLoop *LoopRep) const;
215  bool hasBackEdge(MachineBasicBlock *MBB) const;
216  static unsigned getLoopDepth(MachineLoop *LoopRep);
217  bool isRetiredBlock(MachineBasicBlock *MBB) const;
218  bool isActiveLoophead(MachineBasicBlock *MBB) const;
219  PathToKind singlePathTo(MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB,
220  bool AllowSideEntry = true) const;
221  int countActiveBlock(MBBVector::const_iterator It,
222  MBBVector::const_iterator E) const;
223  bool needMigrateBlock(MachineBasicBlock *MBB) const;
224 
225  // Utility Functions
226  void reversePredicateSetter(MachineBasicBlock::iterator I);
227  /// Compute the reversed DFS post order of Blocks
228  void orderBlocks(MachineFunction *MF);
229 
230  // Function originally from CFGStructTraits
231  void insertInstrEnd(MachineBasicBlock *MBB, int NewOpcode,
232  DebugLoc DL = DebugLoc());
233  MachineInstr *insertInstrBefore(MachineBasicBlock *MBB, int NewOpcode,
234  DebugLoc DL = DebugLoc());
235  MachineInstr *insertInstrBefore(MachineBasicBlock::iterator I, int NewOpcode);
236  void insertCondBranchBefore(MachineBasicBlock::iterator I, int NewOpcode,
237  DebugLoc DL);
238  void insertCondBranchBefore(MachineBasicBlock *MBB,
239  MachineBasicBlock::iterator I, int NewOpcode, int RegNum,
240  DebugLoc DL);
241  void insertCondBranchEnd(MachineBasicBlock *MBB, int NewOpcode, int RegNum);
242  static int getBranchNzeroOpcode(int OldOpcode);
243  static int getBranchZeroOpcode(int OldOpcode);
244  static int getContinueNzeroOpcode(int OldOpcode);
245  static int getContinueZeroOpcode(int OldOpcode);
246  static MachineBasicBlock *getTrueBranch(MachineInstr *MI);
247  static void setTrueBranch(MachineInstr *MI, MachineBasicBlock *MBB);
248  static MachineBasicBlock *getFalseBranch(MachineBasicBlock *MBB,
249  MachineInstr *MI);
250  static bool isCondBranch(MachineInstr *MI);
251  static bool isUncondBranch(MachineInstr *MI);
252  static DebugLoc getLastDebugLocInBB(MachineBasicBlock *MBB);
253  static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *MBB);
254  /// The correct naming for this is getPossibleLoopendBlockBranchInstr.
255  ///
256  /// BB with backward-edge could have move instructions after the branch
257  /// instruction. Such move instruction "belong to" the loop backward-edge.
258  MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *MBB);
259  static MachineInstr *getReturnInstr(MachineBasicBlock *MBB);
260  static MachineInstr *getContinueInstr(MachineBasicBlock *MBB);
261  static bool isReturnBlock(MachineBasicBlock *MBB);
262  static void cloneSuccessorList(MachineBasicBlock *DstMBB,
263  MachineBasicBlock *SrcMBB) ;
264  static MachineBasicBlock *clone(MachineBasicBlock *MBB);
265  /// MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose
266  /// because the AMDGPU instruction is not recognized as terminator fix this
267  /// and retire this routine
268  void replaceInstrUseOfBlockWith(MachineBasicBlock *SrcMBB,
269  MachineBasicBlock *OldMBB, MachineBasicBlock *NewBlk);
270  static void wrapup(MachineBasicBlock *MBB);
271 
272 
273  int patternMatch(MachineBasicBlock *MBB);
274  int patternMatchGroup(MachineBasicBlock *MBB);
275  int serialPatternMatch(MachineBasicBlock *MBB);
276  int ifPatternMatch(MachineBasicBlock *MBB);
277  int loopendPatternMatch();
278  int mergeLoop(MachineLoop *LoopRep);
279  int loopcontPatternMatch(MachineLoop *LoopRep, MachineBasicBlock *LoopHeader);
280 
281  void handleLoopcontBlock(MachineBasicBlock *ContingMBB,
282  MachineLoop *ContingLoop, MachineBasicBlock *ContMBB,
283  MachineLoop *ContLoop);
284  /// return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in
285  /// the same loop with LoopLandInfo without explicitly keeping track of
286  /// loopContBlks and loopBreakBlks, this is a method to get the information.
287  bool isSameloopDetachedContbreak(MachineBasicBlock *Src1MBB,
288  MachineBasicBlock *Src2MBB);
289  int handleJumpintoIf(MachineBasicBlock *HeadMBB,
290  MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB);
291  int handleJumpintoIfImp(MachineBasicBlock *HeadMBB,
292  MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB);
293  int improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
294  MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB,
295  MachineBasicBlock **LandMBBPtr);
296  void showImproveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
297  MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB,
298  MachineBasicBlock *LandMBB, bool Detail = false);
299  int cloneOnSideEntryTo(MachineBasicBlock *PreMBB,
300  MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB);
301  void mergeSerialBlock(MachineBasicBlock *DstMBB,
302  MachineBasicBlock *SrcMBB);
303 
304  void mergeIfthenelseBlock(MachineInstr *BranchMI,
305  MachineBasicBlock *MBB, MachineBasicBlock *TrueMBB,
306  MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB);
307  void mergeLooplandBlock(MachineBasicBlock *DstMBB,
308  MachineBasicBlock *LandMBB);
309  void mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB,
310  MachineBasicBlock *LandMBB);
311  void settleLoopcontBlock(MachineBasicBlock *ContingMBB,
312  MachineBasicBlock *ContMBB);
313  /// normalizeInfiniteLoopExit change
314  /// B1:
315  /// uncond_br LoopHeader
316  ///
317  /// to
318  /// B1:
319  /// cond_br 1 LoopHeader dummyExit
320  /// and return the newly added dummy exit block
321  MachineBasicBlock *normalizeInfiniteLoopExit(MachineLoop *LoopRep);
322  void removeUnconditionalBranch(MachineBasicBlock *MBB);
323  /// Remove duplicate branches instructions in a block.
324  /// For instance
325  /// B0:
326  /// cond_br X B1 B2
327  /// cond_br X B1 B2
328  /// is transformed to
329  /// B0:
330  /// cond_br X B1 B2
331  void removeRedundantConditionalBranch(MachineBasicBlock *MBB);
332  void addDummyExitBlock(SmallVectorImpl<MachineBasicBlock *> &RetMBB);
333  void removeSuccessor(MachineBasicBlock *MBB);
334  MachineBasicBlock *cloneBlockForPredecessor(MachineBasicBlock *MBB,
335  MachineBasicBlock *PredMBB);
336  void migrateInstruction(MachineBasicBlock *SrcMBB,
338  void recordSccnum(MachineBasicBlock *MBB, int SCCNum);
339  void retireBlock(MachineBasicBlock *MBB);
340  void setLoopLandBlock(MachineLoop *LoopRep, MachineBasicBlock *MBB = nullptr);
341 
342  MachineBasicBlock *findNearestCommonPostDom(std::set<MachineBasicBlock *>&);
343  /// This is work around solution for findNearestCommonDominator not available
344  /// to post dom a proper fix should go to Dominators.h.
345  MachineBasicBlock *findNearestCommonPostDom(MachineBasicBlock *MBB1,
346  MachineBasicBlock *MBB2);
347 
348 private:
349  MBBInfoMap BlockInfoMap;
350  LoopLandInfoMap LLInfoMap;
351  std::map<MachineLoop *, bool> Visited;
352  MachineFunction *FuncRep;
354 };
355 
356 int AMDGPUCFGStructurizer::getSCCNum(MachineBasicBlock *MBB) const {
357  MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB);
358  if (It == BlockInfoMap.end())
359  return INVALIDSCCNUM;
360  return (*It).second->SccNum;
361 }
362 
363 MachineBasicBlock *AMDGPUCFGStructurizer::getLoopLandInfo(MachineLoop *LoopRep)
364  const {
365  LoopLandInfoMap::const_iterator It = LLInfoMap.find(LoopRep);
366  if (It == LLInfoMap.end())
367  return nullptr;
368  return (*It).second;
369 }
370 
371 bool AMDGPUCFGStructurizer::hasBackEdge(MachineBasicBlock *MBB) const {
372  MachineLoop *LoopRep = MLI->getLoopFor(MBB);
373  if (!LoopRep)
374  return false;
375  MachineBasicBlock *LoopHeader = LoopRep->getHeader();
376  return MBB->isSuccessor(LoopHeader);
377 }
378 
379 unsigned AMDGPUCFGStructurizer::getLoopDepth(MachineLoop *LoopRep) {
380  return LoopRep ? LoopRep->getLoopDepth() : 0;
381 }
382 
383 bool AMDGPUCFGStructurizer::isRetiredBlock(MachineBasicBlock *MBB) const {
384  MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB);
385  if (It == BlockInfoMap.end())
386  return false;
387  return (*It).second->IsRetired;
388 }
389 
390 bool AMDGPUCFGStructurizer::isActiveLoophead(MachineBasicBlock *MBB) const {
391  MachineLoop *LoopRep = MLI->getLoopFor(MBB);
392  while (LoopRep && LoopRep->getHeader() == MBB) {
393  MachineBasicBlock *LoopLand = getLoopLandInfo(LoopRep);
394  if(!LoopLand)
395  return true;
396  if (!isRetiredBlock(LoopLand))
397  return true;
398  LoopRep = LoopRep->getParentLoop();
399  }
400  return false;
401 }
402 AMDGPUCFGStructurizer::PathToKind AMDGPUCFGStructurizer::singlePathTo(
403  MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB,
404  bool AllowSideEntry) const {
405  assert(DstMBB);
406  if (SrcMBB == DstMBB)
407  return SinglePath_InPath;
408  while (SrcMBB && SrcMBB->succ_size() == 1) {
409  SrcMBB = *SrcMBB->succ_begin();
410  if (SrcMBB == DstMBB)
411  return SinglePath_InPath;
412  if (!AllowSideEntry && SrcMBB->pred_size() > 1)
413  return Not_SinglePath;
414  }
415  if (SrcMBB && SrcMBB->succ_size()==0)
416  return SinglePath_NotInPath;
417  return Not_SinglePath;
418 }
419 
420 int AMDGPUCFGStructurizer::countActiveBlock(MBBVector::const_iterator It,
421  MBBVector::const_iterator E) const {
422  int Count = 0;
423  while (It != E) {
424  if (!isRetiredBlock(*It))
425  ++Count;
426  ++It;
427  }
428  return Count;
429 }
430 
431 bool AMDGPUCFGStructurizer::needMigrateBlock(MachineBasicBlock *MBB) const {
432  unsigned BlockSizeThreshold = 30;
433  unsigned CloneInstrThreshold = 100;
434  bool MultiplePreds = MBB && (MBB->pred_size() > 1);
435 
436  if(!MultiplePreds)
437  return false;
438  unsigned BlkSize = MBB->size();
439  return ((BlkSize > BlockSizeThreshold) &&
440  (BlkSize * (MBB->pred_size() - 1) > CloneInstrThreshold));
441 }
442 
443 void AMDGPUCFGStructurizer::reversePredicateSetter(
445  while (I--) {
446  if (I->getOpcode() == AMDGPU::PRED_X) {
447  switch (static_cast<MachineInstr *>(I)->getOperand(2).getImm()) {
448  case OPCODE_IS_ZERO_INT:
449  static_cast<MachineInstr *>(I)->getOperand(2)
450  .setImm(OPCODE_IS_NOT_ZERO_INT);
451  return;
453  static_cast<MachineInstr *>(I)->getOperand(2)
454  .setImm(OPCODE_IS_ZERO_INT);
455  return;
456  case OPCODE_IS_ZERO:
457  static_cast<MachineInstr *>(I)->getOperand(2)
458  .setImm(OPCODE_IS_NOT_ZERO);
459  return;
460  case OPCODE_IS_NOT_ZERO:
461  static_cast<MachineInstr *>(I)->getOperand(2)
462  .setImm(OPCODE_IS_ZERO);
463  return;
464  default:
465  llvm_unreachable("PRED_X Opcode invalid!");
466  }
467  }
468  }
469 }
470 
471 void AMDGPUCFGStructurizer::insertInstrEnd(MachineBasicBlock *MBB,
472  int NewOpcode, DebugLoc DL) {
473  MachineInstr *MI = MBB->getParent()
474  ->CreateMachineInstr(TII->get(NewOpcode), DL);
475  MBB->push_back(MI);
476  //assume the instruction doesn't take any reg operand ...
477  SHOWNEWINSTR(MI);
478 }
479 
480 MachineInstr *AMDGPUCFGStructurizer::insertInstrBefore(MachineBasicBlock *MBB,
481  int NewOpcode, DebugLoc DL) {
482  MachineInstr *MI =
483  MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DL);
484  if (MBB->begin() != MBB->end())
485  MBB->insert(MBB->begin(), MI);
486  else
487  MBB->push_back(MI);
488  SHOWNEWINSTR(MI);
489  return MI;
490 }
491 
492 MachineInstr *AMDGPUCFGStructurizer::insertInstrBefore(
493  MachineBasicBlock::iterator I, int NewOpcode) {
494  MachineInstr *OldMI = &(*I);
495  MachineBasicBlock *MBB = OldMI->getParent();
496  MachineInstr *NewMBB =
497  MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DebugLoc());
498  MBB->insert(I, NewMBB);
499  //assume the instruction doesn't take any reg operand ...
500  SHOWNEWINSTR(NewMBB);
501  return NewMBB;
502 }
503 
504 void AMDGPUCFGStructurizer::insertCondBranchBefore(
505  MachineBasicBlock::iterator I, int NewOpcode, DebugLoc DL) {
506  MachineInstr *OldMI = &(*I);
507  MachineBasicBlock *MBB = OldMI->getParent();
508  MachineFunction *MF = MBB->getParent();
509  MachineInstr *NewMI = MF->CreateMachineInstr(TII->get(NewOpcode), DL);
510  MBB->insert(I, NewMI);
511  MachineInstrBuilder MIB(*MF, NewMI);
512  MIB.addReg(OldMI->getOperand(1).getReg(), false);
513  SHOWNEWINSTR(NewMI);
514  //erase later oldInstr->eraseFromParent();
515 }
516 
517 void AMDGPUCFGStructurizer::insertCondBranchBefore(MachineBasicBlock *blk,
518  MachineBasicBlock::iterator I, int NewOpcode, int RegNum,
519  DebugLoc DL) {
520  MachineFunction *MF = blk->getParent();
521  MachineInstr *NewInstr = MF->CreateMachineInstr(TII->get(NewOpcode), DL);
522  //insert before
523  blk->insert(I, NewInstr);
524  MachineInstrBuilder(*MF, NewInstr).addReg(RegNum, false);
525  SHOWNEWINSTR(NewInstr);
526 }
527 
528 void AMDGPUCFGStructurizer::insertCondBranchEnd(MachineBasicBlock *MBB,
529  int NewOpcode, int RegNum) {
530  MachineFunction *MF = MBB->getParent();
531  MachineInstr *NewInstr =
532  MF->CreateMachineInstr(TII->get(NewOpcode), DebugLoc());
533  MBB->push_back(NewInstr);
534  MachineInstrBuilder(*MF, NewInstr).addReg(RegNum, false);
535  SHOWNEWINSTR(NewInstr);
536 }
537 
538 int AMDGPUCFGStructurizer::getBranchNzeroOpcode(int OldOpcode) {
539  switch(OldOpcode) {
540  case AMDGPU::JUMP_COND:
541  case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
542  case AMDGPU::BRANCH_COND_i32:
543  case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALNZ_f32;
544  default: llvm_unreachable("internal error");
545  }
546  return -1;
547 }
548 
549 int AMDGPUCFGStructurizer::getBranchZeroOpcode(int OldOpcode) {
550  switch(OldOpcode) {
551  case AMDGPU::JUMP_COND:
552  case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
553  case AMDGPU::BRANCH_COND_i32:
554  case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALZ_f32;
555  default: llvm_unreachable("internal error");
556  }
557  return -1;
558 }
559 
560 int AMDGPUCFGStructurizer::getContinueNzeroOpcode(int OldOpcode) {
561  switch(OldOpcode) {
562  case AMDGPU::JUMP_COND:
563  case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALNZ_i32;
564  default: llvm_unreachable("internal error");
565  };
566  return -1;
567 }
568 
569 int AMDGPUCFGStructurizer::getContinueZeroOpcode(int OldOpcode) {
570  switch(OldOpcode) {
571  case AMDGPU::JUMP_COND:
572  case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALZ_i32;
573  default: llvm_unreachable("internal error");
574  }
575  return -1;
576 }
577 
578 MachineBasicBlock *AMDGPUCFGStructurizer::getTrueBranch(MachineInstr *MI) {
579  return MI->getOperand(0).getMBB();
580 }
581 
582 void AMDGPUCFGStructurizer::setTrueBranch(MachineInstr *MI,
583  MachineBasicBlock *MBB) {
584  MI->getOperand(0).setMBB(MBB);
585 }
586 
588 AMDGPUCFGStructurizer::getFalseBranch(MachineBasicBlock *MBB,
589  MachineInstr *MI) {
590  assert(MBB->succ_size() == 2);
591  MachineBasicBlock *TrueBranch = getTrueBranch(MI);
594  ++Next;
595  return (*It == TrueBranch) ? *Next : *It;
596 }
597 
598 bool AMDGPUCFGStructurizer::isCondBranch(MachineInstr *MI) {
599  switch (MI->getOpcode()) {
600  case AMDGPU::JUMP_COND:
601  case AMDGPU::BRANCH_COND_i32:
602  case AMDGPU::BRANCH_COND_f32: return true;
603  default:
604  return false;
605  }
606  return false;
607 }
608 
609 bool AMDGPUCFGStructurizer::isUncondBranch(MachineInstr *MI) {
610  switch (MI->getOpcode()) {
611  case AMDGPU::JUMP:
612  case AMDGPU::BRANCH:
613  return true;
614  default:
615  return false;
616  }
617  return false;
618 }
619 
620 DebugLoc AMDGPUCFGStructurizer::getLastDebugLocInBB(MachineBasicBlock *MBB) {
621  //get DebugLoc from the first MachineBasicBlock instruction with debug info
622  DebugLoc DL;
623  for (MachineBasicBlock::iterator It = MBB->begin(); It != MBB->end();
624  ++It) {
625  MachineInstr *instr = &(*It);
626  if (instr->getDebugLoc())
627  DL = instr->getDebugLoc();
628  }
629  return DL;
630 }
631 
632 MachineInstr *AMDGPUCFGStructurizer::getNormalBlockBranchInstr(
633  MachineBasicBlock *MBB) {
635  MachineInstr *MI = &*It;
636  if (MI && (isCondBranch(MI) || isUncondBranch(MI)))
637  return MI;
638  return nullptr;
639 }
640 
641 MachineInstr *AMDGPUCFGStructurizer::getLoopendBlockBranchInstr(
642  MachineBasicBlock *MBB) {
643  for (MachineBasicBlock::reverse_iterator It = MBB->rbegin(), E = MBB->rend();
644  It != E; ++It) {
645  // FIXME: Simplify
646  MachineInstr *MI = &*It;
647  if (MI) {
648  if (isCondBranch(MI) || isUncondBranch(MI))
649  return MI;
650  else if (!TII->isMov(MI->getOpcode()))
651  break;
652  }
653  }
654  return nullptr;
655 }
656 
657 MachineInstr *AMDGPUCFGStructurizer::getReturnInstr(MachineBasicBlock *MBB) {
659  if (It != MBB->rend()) {
660  MachineInstr *instr = &(*It);
661  if (instr->getOpcode() == AMDGPU::RETURN)
662  return instr;
663  }
664  return nullptr;
665 }
666 
667 MachineInstr *AMDGPUCFGStructurizer::getContinueInstr(MachineBasicBlock *MBB) {
669  if (It != MBB->rend()) {
670  MachineInstr *MI = &(*It);
671  if (MI->getOpcode() == AMDGPU::CONTINUE)
672  return MI;
673  }
674  return nullptr;
675 }
676 
677 bool AMDGPUCFGStructurizer::isReturnBlock(MachineBasicBlock *MBB) {
678  MachineInstr *MI = getReturnInstr(MBB);
679  bool IsReturn = (MBB->succ_size() == 0);
680  if (MI)
681  assert(IsReturn);
682  else if (IsReturn)
683  DEBUG(
684  dbgs() << "BB" << MBB->getNumber()
685  <<" is return block without RETURN instr\n";);
686  return IsReturn;
687 }
688 
689 void AMDGPUCFGStructurizer::cloneSuccessorList(MachineBasicBlock *DstMBB,
690  MachineBasicBlock *SrcMBB) {
691  for (MachineBasicBlock::succ_iterator It = SrcMBB->succ_begin(),
692  iterEnd = SrcMBB->succ_end(); It != iterEnd; ++It)
693  DstMBB->addSuccessor(*It); // *iter's predecessor is also taken care of
694 }
695 
696 MachineBasicBlock *AMDGPUCFGStructurizer::clone(MachineBasicBlock *MBB) {
697  MachineFunction *Func = MBB->getParent();
698  MachineBasicBlock *NewMBB = Func->CreateMachineBasicBlock();
699  Func->push_back(NewMBB); //insert to function
700  for (MachineBasicBlock::iterator It = MBB->begin(), E = MBB->end();
701  It != E; ++It) {
702  MachineInstr *MI = Func->CloneMachineInstr(It);
703  NewMBB->push_back(MI);
704  }
705  return NewMBB;
706 }
707 
708 void AMDGPUCFGStructurizer::replaceInstrUseOfBlockWith(
709  MachineBasicBlock *SrcMBB, MachineBasicBlock *OldMBB,
710  MachineBasicBlock *NewBlk) {
711  MachineInstr *BranchMI = getLoopendBlockBranchInstr(SrcMBB);
712  if (BranchMI && isCondBranch(BranchMI) &&
713  getTrueBranch(BranchMI) == OldMBB)
714  setTrueBranch(BranchMI, NewBlk);
715 }
716 
717 void AMDGPUCFGStructurizer::wrapup(MachineBasicBlock *MBB) {
718  assert((!MBB->getParent()->getJumpTableInfo()
719  || MBB->getParent()->getJumpTableInfo()->isEmpty())
720  && "found a jump table");
721 
722  //collect continue right before endloop
724  MachineBasicBlock::iterator Pre = MBB->begin();
725  MachineBasicBlock::iterator E = MBB->end();
727  while (It != E) {
728  if (Pre->getOpcode() == AMDGPU::CONTINUE
729  && It->getOpcode() == AMDGPU::ENDLOOP)
730  ContInstr.push_back(Pre);
731  Pre = It;
732  ++It;
733  }
734 
735  //delete continue right before endloop
736  for (unsigned i = 0; i < ContInstr.size(); ++i)
737  ContInstr[i]->eraseFromParent();
738 
739  // TODO to fix up jump table so later phase won't be confused. if
740  // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but
741  // there isn't such an interface yet. alternatively, replace all the other
742  // blocks in the jump table with the entryBlk //}
743 
744 }
745 
746 
747 bool AMDGPUCFGStructurizer::prepare() {
748  bool Changed = false;
749 
750  //FIXME: if not reducible flow graph, make it so ???
751 
752  DEBUG(dbgs() << "AMDGPUCFGStructurizer::prepare\n";);
753 
754  orderBlocks(FuncRep);
755 
757 
758  // Add an ExitBlk to loop that don't have one
759  for (MachineLoopInfo::iterator It = MLI->begin(),
760  E = MLI->end(); It != E; ++It) {
761  MachineLoop *LoopRep = (*It);
762  MBBVector ExitingMBBs;
763  LoopRep->getExitingBlocks(ExitingMBBs);
764 
765  if (ExitingMBBs.size() == 0) {
766  MachineBasicBlock* DummyExitBlk = normalizeInfiniteLoopExit(LoopRep);
767  if (DummyExitBlk)
768  RetBlks.push_back(DummyExitBlk);
769  }
770  }
771 
772  // Remove unconditional branch instr.
773  // Add dummy exit block iff there are multiple returns.
775  It = OrderedBlks.begin(), E = OrderedBlks.end(); It != E; ++It) {
776  MachineBasicBlock *MBB = *It;
777  removeUnconditionalBranch(MBB);
778  removeRedundantConditionalBranch(MBB);
779  if (isReturnBlock(MBB)) {
780  RetBlks.push_back(MBB);
781  }
782  assert(MBB->succ_size() <= 2);
783  }
784 
785  if (RetBlks.size() >= 2) {
786  addDummyExitBlock(RetBlks);
787  Changed = true;
788  }
789 
790  return Changed;
791 }
792 
793 bool AMDGPUCFGStructurizer::run() {
794 
795  //Assume reducible CFG...
796  DEBUG(dbgs() << "AMDGPUCFGStructurizer::run\n");
797 
798 #ifdef STRESSTEST
799  //Use the worse block ordering to test the algorithm.
800  ReverseVector(orderedBlks);
801 #endif
802 
803  DEBUG(dbgs() << "Ordered blocks:\n"; printOrderedBlocks(););
804  int NumIter = 0;
805  bool Finish = false;
806  MachineBasicBlock *MBB;
807  bool MakeProgress = false;
808  int NumRemainedBlk = countActiveBlock(OrderedBlks.begin(),
809  OrderedBlks.end());
810 
811  do {
812  ++NumIter;
813  DEBUG(
814  dbgs() << "numIter = " << NumIter
815  << ", numRemaintedBlk = " << NumRemainedBlk << "\n";
816  );
817 
819  OrderedBlks.begin();
821  OrderedBlks.end();
822 
824  It;
825  MachineBasicBlock *SccBeginMBB = nullptr;
826  int SccNumBlk = 0; // The number of active blocks, init to a
827  // maximum possible number.
828  int SccNumIter; // Number of iteration in this SCC.
829 
830  while (It != E) {
831  MBB = *It;
832 
833  if (!SccBeginMBB) {
834  SccBeginIter = It;
835  SccBeginMBB = MBB;
836  SccNumIter = 0;
837  SccNumBlk = NumRemainedBlk; // Init to maximum possible number.
838  DEBUG(
839  dbgs() << "start processing SCC" << getSCCNum(SccBeginMBB);
840  dbgs() << "\n";
841  );
842  }
843 
844  if (!isRetiredBlock(MBB))
845  patternMatch(MBB);
846 
847  ++It;
848 
849  bool ContNextScc = true;
850  if (It == E
851  || getSCCNum(SccBeginMBB) != getSCCNum(*It)) {
852  // Just finish one scc.
853  ++SccNumIter;
854  int sccRemainedNumBlk = countActiveBlock(SccBeginIter, It);
855  if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= SccNumBlk) {
856  DEBUG(
857  dbgs() << "Can't reduce SCC " << getSCCNum(MBB)
858  << ", sccNumIter = " << SccNumIter;
859  dbgs() << "doesn't make any progress\n";
860  );
861  ContNextScc = true;
862  } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < SccNumBlk) {
863  SccNumBlk = sccRemainedNumBlk;
864  It = SccBeginIter;
865  ContNextScc = false;
866  DEBUG(
867  dbgs() << "repeat processing SCC" << getSCCNum(MBB)
868  << "sccNumIter = " << SccNumIter << '\n';
869  );
870  } else {
871  // Finish the current scc.
872  ContNextScc = true;
873  }
874  } else {
875  // Continue on next component in the current scc.
876  ContNextScc = false;
877  }
878 
879  if (ContNextScc)
880  SccBeginMBB = nullptr;
881  } //while, "one iteration" over the function.
882 
883  MachineBasicBlock *EntryMBB =
885  if (EntryMBB->succ_size() == 0) {
886  Finish = true;
887  DEBUG(
888  dbgs() << "Reduce to one block\n";
889  );
890  } else {
891  int NewnumRemainedBlk
892  = countActiveBlock(OrderedBlks.begin(), OrderedBlks.end());
893  // consider cloned blocks ??
894  if (NewnumRemainedBlk == 1 || NewnumRemainedBlk < NumRemainedBlk) {
895  MakeProgress = true;
896  NumRemainedBlk = NewnumRemainedBlk;
897  } else {
898  MakeProgress = false;
899  DEBUG(
900  dbgs() << "No progress\n";
901  );
902  }
903  }
904  } while (!Finish && MakeProgress);
905 
906  // Misc wrap up to maintain the consistency of the Function representation.
908 
909  // Detach retired Block, release memory.
910  for (MBBInfoMap::iterator It = BlockInfoMap.begin(), E = BlockInfoMap.end();
911  It != E; ++It) {
912  if ((*It).second && (*It).second->IsRetired) {
913  assert(((*It).first)->getNumber() != -1);
914  DEBUG(
915  dbgs() << "Erase BB" << ((*It).first)->getNumber() << "\n";
916  );
917  (*It).first->eraseFromParent(); //Remove from the parent Function.
918  }
919  delete (*It).second;
920  }
921  BlockInfoMap.clear();
922  LLInfoMap.clear();
923 
924  if (!Finish) {
925  DEBUG(FuncRep->viewCFG());
926  llvm_unreachable("IRREDUCIBLE_CFG");
927  }
928 
929  return true;
930 }
931 
932 
933 
934 void AMDGPUCFGStructurizer::orderBlocks(MachineFunction *MF) {
935  int SccNum = 0;
936  MachineBasicBlock *MBB;
937  for (scc_iterator<MachineFunction *> It = scc_begin(MF); !It.isAtEnd();
938  ++It, ++SccNum) {
939  const std::vector<MachineBasicBlock *> &SccNext = *It;
940  for (std::vector<MachineBasicBlock *>::const_iterator
941  blockIter = SccNext.begin(), blockEnd = SccNext.end();
942  blockIter != blockEnd; ++blockIter) {
943  MBB = *blockIter;
944  OrderedBlks.push_back(MBB);
945  recordSccnum(MBB, SccNum);
946  }
947  }
948 
949  //walk through all the block in func to check for unreachable
950  typedef GraphTraits<MachineFunction *> GTM;
951  MachineFunction::iterator It = GTM::nodes_begin(MF), E = GTM::nodes_end(MF);
952  for (; It != E; ++It) {
953  MachineBasicBlock *MBB = &(*It);
954  SccNum = getSCCNum(MBB);
955  if (SccNum == INVALIDSCCNUM)
956  dbgs() << "unreachable block BB" << MBB->getNumber() << "\n";
957  }
958 }
959 
960 int AMDGPUCFGStructurizer::patternMatch(MachineBasicBlock *MBB) {
961  int NumMatch = 0;
962  int CurMatch;
963 
964  DEBUG(
965  dbgs() << "Begin patternMatch BB" << MBB->getNumber() << "\n";
966  );
967 
968  while ((CurMatch = patternMatchGroup(MBB)) > 0)
969  NumMatch += CurMatch;
970 
971  DEBUG(
972  dbgs() << "End patternMatch BB" << MBB->getNumber()
973  << ", numMatch = " << NumMatch << "\n";
974  );
975 
976  return NumMatch;
977 }
978 
979 int AMDGPUCFGStructurizer::patternMatchGroup(MachineBasicBlock *MBB) {
980  int NumMatch = 0;
981  NumMatch += loopendPatternMatch();
982  NumMatch += serialPatternMatch(MBB);
983  NumMatch += ifPatternMatch(MBB);
984  return NumMatch;
985 }
986 
987 
988 int AMDGPUCFGStructurizer::serialPatternMatch(MachineBasicBlock *MBB) {
989  if (MBB->succ_size() != 1)
990  return 0;
991 
992  MachineBasicBlock *childBlk = *MBB->succ_begin();
993  if (childBlk->pred_size() != 1 || isActiveLoophead(childBlk))
994  return 0;
995 
996  mergeSerialBlock(MBB, childBlk);
997  ++numSerialPatternMatch;
998  return 1;
999 }
1000 
1001 int AMDGPUCFGStructurizer::ifPatternMatch(MachineBasicBlock *MBB) {
1002  //two edges
1003  if (MBB->succ_size() != 2)
1004  return 0;
1005  if (hasBackEdge(MBB))
1006  return 0;
1007  MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB);
1008  if (!BranchMI)
1009  return 0;
1010 
1011  assert(isCondBranch(BranchMI));
1012  int NumMatch = 0;
1013 
1014  MachineBasicBlock *TrueMBB = getTrueBranch(BranchMI);
1015  NumMatch += serialPatternMatch(TrueMBB);
1016  NumMatch += ifPatternMatch(TrueMBB);
1017  MachineBasicBlock *FalseMBB = getFalseBranch(MBB, BranchMI);
1018  NumMatch += serialPatternMatch(FalseMBB);
1019  NumMatch += ifPatternMatch(FalseMBB);
1020  MachineBasicBlock *LandBlk;
1021  int Cloned = 0;
1022 
1023  assert (!TrueMBB->succ_empty() || !FalseMBB->succ_empty());
1024  // TODO: Simplify
1025  if (TrueMBB->succ_size() == 1 && FalseMBB->succ_size() == 1
1026  && *TrueMBB->succ_begin() == *FalseMBB->succ_begin()) {
1027  // Diamond pattern
1028  LandBlk = *TrueMBB->succ_begin();
1029  } else if (TrueMBB->succ_size() == 1 && *TrueMBB->succ_begin() == FalseMBB) {
1030  // Triangle pattern, false is empty
1031  LandBlk = FalseMBB;
1032  FalseMBB = nullptr;
1033  } else if (FalseMBB->succ_size() == 1
1034  && *FalseMBB->succ_begin() == TrueMBB) {
1035  // Triangle pattern, true is empty
1036  // We reverse the predicate to make a triangle, empty false pattern;
1037  std::swap(TrueMBB, FalseMBB);
1038  reversePredicateSetter(MBB->end());
1039  LandBlk = FalseMBB;
1040  FalseMBB = nullptr;
1041  } else if (FalseMBB->succ_size() == 1
1042  && isSameloopDetachedContbreak(TrueMBB, FalseMBB)) {
1043  LandBlk = *FalseMBB->succ_begin();
1044  } else if (TrueMBB->succ_size() == 1
1045  && isSameloopDetachedContbreak(FalseMBB, TrueMBB)) {
1046  LandBlk = *TrueMBB->succ_begin();
1047  } else {
1048  return NumMatch + handleJumpintoIf(MBB, TrueMBB, FalseMBB);
1049  }
1050 
1051  // improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the
1052  // new BB created for landBlk==NULL may introduce new challenge to the
1053  // reduction process.
1054  if (LandBlk &&
1055  ((TrueMBB && TrueMBB->pred_size() > 1)
1056  || (FalseMBB && FalseMBB->pred_size() > 1))) {
1057  Cloned += improveSimpleJumpintoIf(MBB, TrueMBB, FalseMBB, &LandBlk);
1058  }
1059 
1060  if (TrueMBB && TrueMBB->pred_size() > 1) {
1061  TrueMBB = cloneBlockForPredecessor(TrueMBB, MBB);
1062  ++Cloned;
1063  }
1064 
1065  if (FalseMBB && FalseMBB->pred_size() > 1) {
1066  FalseMBB = cloneBlockForPredecessor(FalseMBB, MBB);
1067  ++Cloned;
1068  }
1069 
1070  mergeIfthenelseBlock(BranchMI, MBB, TrueMBB, FalseMBB, LandBlk);
1071 
1072  ++numIfPatternMatch;
1073 
1074  numClonedBlock += Cloned;
1075 
1076  return 1 + Cloned + NumMatch;
1077 }
1078 
1079 int AMDGPUCFGStructurizer::loopendPatternMatch() {
1080  std::deque<MachineLoop *> NestedLoops;
1081  for (auto &It: *MLI)
1082  for (MachineLoop *ML : depth_first(It))
1083  NestedLoops.push_front(ML);
1084 
1085  if (NestedLoops.size() == 0)
1086  return 0;
1087 
1088  // Process nested loop outside->inside (we did push_front),
1089  // so "continue" to a outside loop won't be mistaken as "break"
1090  // of the current loop.
1091  int Num = 0;
1092  for (MachineLoop *ExaminedLoop : NestedLoops) {
1093  if (ExaminedLoop->getNumBlocks() == 0 || Visited[ExaminedLoop])
1094  continue;
1095  DEBUG(dbgs() << "Processing:\n"; ExaminedLoop->dump(););
1096  int NumBreak = mergeLoop(ExaminedLoop);
1097  if (NumBreak == -1)
1098  break;
1099  Num += NumBreak;
1100  }
1101  return Num;
1102 }
1103 
1104 int AMDGPUCFGStructurizer::mergeLoop(MachineLoop *LoopRep) {
1105  MachineBasicBlock *LoopHeader = LoopRep->getHeader();
1106  MBBVector ExitingMBBs;
1107  LoopRep->getExitingBlocks(ExitingMBBs);
1108  assert(!ExitingMBBs.empty() && "Infinite Loop not supported");
1109  DEBUG(dbgs() << "Loop has " << ExitingMBBs.size() << " exiting blocks\n";);
1110  // We assume a single ExitBlk
1111  MBBVector ExitBlks;
1112  LoopRep->getExitBlocks(ExitBlks);
1114  for (unsigned i = 0, e = ExitBlks.size(); i < e; ++i)
1115  ExitBlkSet.insert(ExitBlks[i]);
1116  assert(ExitBlkSet.size() == 1);
1117  MachineBasicBlock *ExitBlk = *ExitBlks.begin();
1118  assert(ExitBlk && "Loop has several exit block");
1119  MBBVector LatchBlks;
1120  typedef GraphTraits<Inverse<MachineBasicBlock*> > InvMBBTraits;
1121  InvMBBTraits::ChildIteratorType PI = InvMBBTraits::child_begin(LoopHeader),
1122  PE = InvMBBTraits::child_end(LoopHeader);
1123  for (; PI != PE; PI++) {
1124  if (LoopRep->contains(*PI))
1125  LatchBlks.push_back(*PI);
1126  }
1127 
1128  for (unsigned i = 0, e = ExitingMBBs.size(); i < e; ++i)
1129  mergeLoopbreakBlock(ExitingMBBs[i], ExitBlk);
1130  for (unsigned i = 0, e = LatchBlks.size(); i < e; ++i)
1131  settleLoopcontBlock(LatchBlks[i], LoopHeader);
1132  int Match = 0;
1133  do {
1134  Match = 0;
1135  Match += serialPatternMatch(LoopHeader);
1136  Match += ifPatternMatch(LoopHeader);
1137  } while (Match > 0);
1138  mergeLooplandBlock(LoopHeader, ExitBlk);
1139  MachineLoop *ParentLoop = LoopRep->getParentLoop();
1140  if (ParentLoop)
1141  MLI->changeLoopFor(LoopHeader, ParentLoop);
1142  else
1143  MLI->removeBlock(LoopHeader);
1144  Visited[LoopRep] = true;
1145  return 1;
1146 }
1147 
1148 int AMDGPUCFGStructurizer::loopcontPatternMatch(MachineLoop *LoopRep,
1149  MachineBasicBlock *LoopHeader) {
1150  int NumCont = 0;
1153  GTIM::ChildIteratorType It = GTIM::child_begin(LoopHeader),
1154  E = GTIM::child_end(LoopHeader);
1155  for (; It != E; ++It) {
1156  MachineBasicBlock *MBB = *It;
1157  if (LoopRep->contains(MBB)) {
1158  handleLoopcontBlock(MBB, MLI->getLoopFor(MBB),
1159  LoopHeader, LoopRep);
1160  ContMBB.push_back(MBB);
1161  ++NumCont;
1162  }
1163  }
1164 
1166  E = ContMBB.end(); It != E; ++It) {
1167  (*It)->removeSuccessor(LoopHeader);
1168  }
1169 
1170  numLoopcontPatternMatch += NumCont;
1171 
1172  return NumCont;
1173 }
1174 
1175 
1176 bool AMDGPUCFGStructurizer::isSameloopDetachedContbreak(
1177  MachineBasicBlock *Src1MBB, MachineBasicBlock *Src2MBB) {
1178  if (Src1MBB->succ_size() == 0) {
1179  MachineLoop *LoopRep = MLI->getLoopFor(Src1MBB);
1180  if (LoopRep&& LoopRep == MLI->getLoopFor(Src2MBB)) {
1181  MachineBasicBlock *&TheEntry = LLInfoMap[LoopRep];
1182  if (TheEntry) {
1183  DEBUG(
1184  dbgs() << "isLoopContBreakBlock yes src1 = BB"
1185  << Src1MBB->getNumber()
1186  << " src2 = BB" << Src2MBB->getNumber() << "\n";
1187  );
1188  return true;
1189  }
1190  }
1191  }
1192  return false;
1193 }
1194 
1195 int AMDGPUCFGStructurizer::handleJumpintoIf(MachineBasicBlock *HeadMBB,
1196  MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) {
1197  int Num = handleJumpintoIfImp(HeadMBB, TrueMBB, FalseMBB);
1198  if (Num == 0) {
1199  DEBUG(
1200  dbgs() << "handleJumpintoIf swap trueBlk and FalseBlk" << "\n";
1201  );
1202  Num = handleJumpintoIfImp(HeadMBB, FalseMBB, TrueMBB);
1203  }
1204  return Num;
1205 }
1206 
1207 int AMDGPUCFGStructurizer::handleJumpintoIfImp(MachineBasicBlock *HeadMBB,
1208  MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) {
1209  int Num = 0;
1210  MachineBasicBlock *DownBlk;
1211 
1212  //trueBlk could be the common post dominator
1213  DownBlk = TrueMBB;
1214 
1215  DEBUG(
1216  dbgs() << "handleJumpintoIfImp head = BB" << HeadMBB->getNumber()
1217  << " true = BB" << TrueMBB->getNumber()
1218  << ", numSucc=" << TrueMBB->succ_size()
1219  << " false = BB" << FalseMBB->getNumber() << "\n";
1220  );
1221 
1222  while (DownBlk) {
1223  DEBUG(
1224  dbgs() << "check down = BB" << DownBlk->getNumber();
1225  );
1226 
1227  if (singlePathTo(FalseMBB, DownBlk) == SinglePath_InPath) {
1228  DEBUG(
1229  dbgs() << " working\n";
1230  );
1231 
1232  Num += cloneOnSideEntryTo(HeadMBB, TrueMBB, DownBlk);
1233  Num += cloneOnSideEntryTo(HeadMBB, FalseMBB, DownBlk);
1234 
1235  numClonedBlock += Num;
1236  Num += serialPatternMatch(*HeadMBB->succ_begin());
1237  Num += serialPatternMatch(*std::next(HeadMBB->succ_begin()));
1238  Num += ifPatternMatch(HeadMBB);
1239  assert(Num > 0);
1240 
1241  break;
1242  }
1243  DEBUG(
1244  dbgs() << " not working\n";
1245  );
1246  DownBlk = (DownBlk->succ_size() == 1) ? (*DownBlk->succ_begin()) : nullptr;
1247  } // walk down the postDomTree
1248 
1249  return Num;
1250 }
1251 
1252 void AMDGPUCFGStructurizer::showImproveSimpleJumpintoIf(
1253  MachineBasicBlock *HeadMBB, MachineBasicBlock *TrueMBB,
1254  MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB, bool Detail) {
1255  dbgs() << "head = BB" << HeadMBB->getNumber()
1256  << " size = " << HeadMBB->size();
1257  if (Detail) {
1258  dbgs() << "\n";
1259  HeadMBB->print(dbgs());
1260  dbgs() << "\n";
1261  }
1262 
1263  if (TrueMBB) {
1264  dbgs() << ", true = BB" << TrueMBB->getNumber() << " size = "
1265  << TrueMBB->size() << " numPred = " << TrueMBB->pred_size();
1266  if (Detail) {
1267  dbgs() << "\n";
1268  TrueMBB->print(dbgs());
1269  dbgs() << "\n";
1270  }
1271  }
1272  if (FalseMBB) {
1273  dbgs() << ", false = BB" << FalseMBB->getNumber() << " size = "
1274  << FalseMBB->size() << " numPred = " << FalseMBB->pred_size();
1275  if (Detail) {
1276  dbgs() << "\n";
1277  FalseMBB->print(dbgs());
1278  dbgs() << "\n";
1279  }
1280  }
1281  if (LandMBB) {
1282  dbgs() << ", land = BB" << LandMBB->getNumber() << " size = "
1283  << LandMBB->size() << " numPred = " << LandMBB->pred_size();
1284  if (Detail) {
1285  dbgs() << "\n";
1286  LandMBB->print(dbgs());
1287  dbgs() << "\n";
1288  }
1289  }
1290 
1291  dbgs() << "\n";
1292 }
1293 
1294 int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
1295  MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB,
1296  MachineBasicBlock **LandMBBPtr) {
1297  bool MigrateTrue = false;
1298  bool MigrateFalse = false;
1299 
1300  MachineBasicBlock *LandBlk = *LandMBBPtr;
1301 
1302  assert((!TrueMBB || TrueMBB->succ_size() <= 1)
1303  && (!FalseMBB || FalseMBB->succ_size() <= 1));
1304 
1305  if (TrueMBB == FalseMBB)
1306  return 0;
1307 
1308  MigrateTrue = needMigrateBlock(TrueMBB);
1309  MigrateFalse = needMigrateBlock(FalseMBB);
1310 
1311  if (!MigrateTrue && !MigrateFalse)
1312  return 0;
1313 
1314  // If we need to migrate either trueBlk and falseBlk, migrate the rest that
1315  // have more than one predecessors. without doing this, its predecessor
1316  // rather than headBlk will have undefined value in initReg.
1317  if (!MigrateTrue && TrueMBB && TrueMBB->pred_size() > 1)
1318  MigrateTrue = true;
1319  if (!MigrateFalse && FalseMBB && FalseMBB->pred_size() > 1)
1320  MigrateFalse = true;
1321 
1322  DEBUG(
1323  dbgs() << "before improveSimpleJumpintoIf: ";
1324  showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0);
1325  );
1326 
1327  // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk
1328  //
1329  // new: headBlk => if () {initReg = 1; org trueBlk branch} else
1330  // {initReg = 0; org falseBlk branch }
1331  // => landBlk => if (initReg) {org trueBlk} else {org falseBlk}
1332  // => org landBlk
1333  // if landBlk->pred_size() > 2, put the about if-else inside
1334  // if (initReg !=2) {...}
1335  //
1336  // add initReg = initVal to headBlk
1337 
1338  const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
1339  if (!MigrateTrue || !MigrateFalse) {
1340  // XXX: We have an opportunity here to optimize the "branch into if" case
1341  // here. Branch into if looks like this:
1342  // entry
1343  // / |
1344  // diamond_head branch_from
1345  // / \ |
1346  // diamond_false diamond_true
1347  // \ /
1348  // done
1349  //
1350  // The diamond_head block begins the "if" and the diamond_true block
1351  // is the block being "branched into".
1352  //
1353  // If MigrateTrue is true, then TrueBB is the block being "branched into"
1354  // and if MigrateFalse is true, then FalseBB is the block being
1355  // "branched into"
1356  //
1357  // Here is the pseudo code for how I think the optimization should work:
1358  // 1. Insert MOV GPR0, 0 before the branch instruction in diamond_head.
1359  // 2. Insert MOV GPR0, 1 before the branch instruction in branch_from.
1360  // 3. Move the branch instruction from diamond_head into its own basic
1361  // block (new_block).
1362  // 4. Add an unconditional branch from diamond_head to new_block
1363  // 5. Replace the branch instruction in branch_from with an unconditional
1364  // branch to new_block. If branch_from has multiple predecessors, then
1365  // we need to replace the True/False block in the branch
1366  // instruction instead of replacing it.
1367  // 6. Change the condition of the branch instruction in new_block from
1368  // COND to (COND || GPR0)
1369  //
1370  // In order insert these MOV instruction, we will need to use the
1371  // RegisterScavenger. Usually liveness stops being tracked during
1372  // the late machine optimization passes, however if we implement
1373  // bool TargetRegisterInfo::requiresRegisterScavenging(
1374  // const MachineFunction &MF)
1375  // and have it return true, liveness will be tracked correctly
1376  // by generic optimization passes. We will also need to make sure that
1377  // all of our target-specific passes that run after regalloc and before
1378  // the CFGStructurizer track liveness and we will need to modify this pass
1379  // to correctly track liveness.
1380  //
1381  // After the above changes, the new CFG should look like this:
1382  // entry
1383  // / |
1384  // diamond_head branch_from
1385  // \ /
1386  // new_block
1387  // / |
1388  // diamond_false diamond_true
1389  // \ /
1390  // done
1391  //
1392  // Without this optimization, we are forced to duplicate the diamond_true
1393  // block and we will end up with a CFG like this:
1394  //
1395  // entry
1396  // / |
1397  // diamond_head branch_from
1398  // / \ |
1399  // diamond_false diamond_true diamond_true (duplicate)
1400  // \ / |
1401  // done --------------------|
1402  //
1403  // Duplicating diamond_true can be very costly especially if it has a
1404  // lot of instructions.
1405  return 0;
1406  }
1407 
1408  int NumNewBlk = 0;
1409 
1410  bool LandBlkHasOtherPred = (LandBlk->pred_size() > 2);
1411 
1412  //insert AMDGPU::ENDIF to avoid special case "input landBlk == NULL"
1413  MachineBasicBlock::iterator I = insertInstrBefore(LandBlk, AMDGPU::ENDIF);
1414 
1415  if (LandBlkHasOtherPred) {
1416  llvm_unreachable("Extra register needed to handle CFG");
1417  unsigned CmpResReg =
1418  HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC);
1419  llvm_unreachable("Extra compare instruction needed to handle CFG");
1420  insertCondBranchBefore(LandBlk, I, AMDGPU::IF_PREDICATE_SET,
1421  CmpResReg, DebugLoc());
1422  }
1423 
1424  // XXX: We are running this after RA, so creating virtual registers will
1425  // cause an assertion failure in the PostRA scheduling pass.
1426  unsigned InitReg =
1427  HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC);
1428  insertCondBranchBefore(LandBlk, I, AMDGPU::IF_PREDICATE_SET, InitReg,
1429  DebugLoc());
1430 
1431  if (MigrateTrue) {
1432  migrateInstruction(TrueMBB, LandBlk, I);
1433  // need to uncondionally insert the assignment to ensure a path from its
1434  // predecessor rather than headBlk has valid value in initReg if
1435  // (initVal != 1).
1436  llvm_unreachable("Extra register needed to handle CFG");
1437  }
1438  insertInstrBefore(I, AMDGPU::ELSE);
1439 
1440  if (MigrateFalse) {
1441  migrateInstruction(FalseMBB, LandBlk, I);
1442  // need to uncondionally insert the assignment to ensure a path from its
1443  // predecessor rather than headBlk has valid value in initReg if
1444  // (initVal != 0)
1445  llvm_unreachable("Extra register needed to handle CFG");
1446  }
1447 
1448  if (LandBlkHasOtherPred) {
1449  // add endif
1450  insertInstrBefore(I, AMDGPU::ENDIF);
1451 
1452  // put initReg = 2 to other predecessors of landBlk
1453  for (MachineBasicBlock::pred_iterator PI = LandBlk->pred_begin(),
1454  PE = LandBlk->pred_end(); PI != PE; ++PI) {
1455  MachineBasicBlock *MBB = *PI;
1456  if (MBB != TrueMBB && MBB != FalseMBB)
1457  llvm_unreachable("Extra register needed to handle CFG");
1458  }
1459  }
1460  DEBUG(
1461  dbgs() << "result from improveSimpleJumpintoIf: ";
1462  showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0);
1463  );
1464 
1465  // update landBlk
1466  *LandMBBPtr = LandBlk;
1467 
1468  return NumNewBlk;
1469 }
1470 
1471 void AMDGPUCFGStructurizer::handleLoopcontBlock(MachineBasicBlock *ContingMBB,
1472  MachineLoop *ContingLoop, MachineBasicBlock *ContMBB,
1473  MachineLoop *ContLoop) {
1474  DEBUG(dbgs() << "loopcontPattern cont = BB" << ContingMBB->getNumber()
1475  << " header = BB" << ContMBB->getNumber() << "\n";
1476  dbgs() << "Trying to continue loop-depth = "
1477  << getLoopDepth(ContLoop)
1478  << " from loop-depth = " << getLoopDepth(ContingLoop) << "\n";);
1479  settleLoopcontBlock(ContingMBB, ContMBB);
1480 }
1481 
1482 void AMDGPUCFGStructurizer::mergeSerialBlock(MachineBasicBlock *DstMBB,
1483  MachineBasicBlock *SrcMBB) {
1484  DEBUG(
1485  dbgs() << "serialPattern BB" << DstMBB->getNumber()
1486  << " <= BB" << SrcMBB->getNumber() << "\n";
1487  );
1488  DstMBB->splice(DstMBB->end(), SrcMBB, SrcMBB->begin(), SrcMBB->end());
1489 
1490  DstMBB->removeSuccessor(SrcMBB);
1491  cloneSuccessorList(DstMBB, SrcMBB);
1492 
1493  removeSuccessor(SrcMBB);
1494  MLI->removeBlock(SrcMBB);
1495  retireBlock(SrcMBB);
1496 }
1497 
1498 void AMDGPUCFGStructurizer::mergeIfthenelseBlock(MachineInstr *BranchMI,
1499  MachineBasicBlock *MBB, MachineBasicBlock *TrueMBB,
1500  MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB) {
1501  assert (TrueMBB);
1502  DEBUG(
1503  dbgs() << "ifPattern BB" << MBB->getNumber();
1504  dbgs() << "{ ";
1505  if (TrueMBB) {
1506  dbgs() << "BB" << TrueMBB->getNumber();
1507  }
1508  dbgs() << " } else ";
1509  dbgs() << "{ ";
1510  if (FalseMBB) {
1511  dbgs() << "BB" << FalseMBB->getNumber();
1512  }
1513  dbgs() << " }\n ";
1514  dbgs() << "landBlock: ";
1515  if (!LandMBB) {
1516  dbgs() << "NULL";
1517  } else {
1518  dbgs() << "BB" << LandMBB->getNumber();
1519  }
1520  dbgs() << "\n";
1521  );
1522 
1523  int OldOpcode = BranchMI->getOpcode();
1524  DebugLoc BranchDL = BranchMI->getDebugLoc();
1525 
1526 // transform to
1527 // if cond
1528 // trueBlk
1529 // else
1530 // falseBlk
1531 // endif
1532 // landBlk
1533 
1534  MachineBasicBlock::iterator I = BranchMI;
1535  insertCondBranchBefore(I, getBranchNzeroOpcode(OldOpcode),
1536  BranchDL);
1537 
1538  if (TrueMBB) {
1539  MBB->splice(I, TrueMBB, TrueMBB->begin(), TrueMBB->end());
1540  MBB->removeSuccessor(TrueMBB);
1541  if (LandMBB && TrueMBB->succ_size()!=0)
1542  TrueMBB->removeSuccessor(LandMBB);
1543  retireBlock(TrueMBB);
1544  MLI->removeBlock(TrueMBB);
1545  }
1546 
1547  if (FalseMBB) {
1548  insertInstrBefore(I, AMDGPU::ELSE);
1549  MBB->splice(I, FalseMBB, FalseMBB->begin(),
1550  FalseMBB->end());
1551  MBB->removeSuccessor(FalseMBB);
1552  if (LandMBB && FalseMBB->succ_size() != 0)
1553  FalseMBB->removeSuccessor(LandMBB);
1554  retireBlock(FalseMBB);
1555  MLI->removeBlock(FalseMBB);
1556  }
1557  insertInstrBefore(I, AMDGPU::ENDIF);
1558 
1559  BranchMI->eraseFromParent();
1560 
1561  if (LandMBB && TrueMBB && FalseMBB)
1562  MBB->addSuccessor(LandMBB);
1563 
1564 }
1565 
1566 void AMDGPUCFGStructurizer::mergeLooplandBlock(MachineBasicBlock *DstBlk,
1567  MachineBasicBlock *LandMBB) {
1568  DEBUG(dbgs() << "loopPattern header = BB" << DstBlk->getNumber()
1569  << " land = BB" << LandMBB->getNumber() << "\n";);
1570 
1571  insertInstrBefore(DstBlk, AMDGPU::WHILELOOP, DebugLoc());
1572  insertInstrEnd(DstBlk, AMDGPU::ENDLOOP, DebugLoc());
1573  DstBlk->addSuccessor(LandMBB);
1574  DstBlk->removeSuccessor(DstBlk);
1575 }
1576 
1577 
1578 void AMDGPUCFGStructurizer::mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB,
1579  MachineBasicBlock *LandMBB) {
1580  DEBUG(dbgs() << "loopbreakPattern exiting = BB" << ExitingMBB->getNumber()
1581  << " land = BB" << LandMBB->getNumber() << "\n";);
1582  MachineInstr *BranchMI = getLoopendBlockBranchInstr(ExitingMBB);
1583  assert(BranchMI && isCondBranch(BranchMI));
1584  DebugLoc DL = BranchMI->getDebugLoc();
1585  MachineBasicBlock *TrueBranch = getTrueBranch(BranchMI);
1586  MachineBasicBlock::iterator I = BranchMI;
1587  if (TrueBranch != LandMBB)
1588  reversePredicateSetter(I);
1589  insertCondBranchBefore(ExitingMBB, I, AMDGPU::IF_PREDICATE_SET, AMDGPU::PREDICATE_BIT, DL);
1590  insertInstrBefore(I, AMDGPU::BREAK);
1591  insertInstrBefore(I, AMDGPU::ENDIF);
1592  //now branchInst can be erase safely
1593  BranchMI->eraseFromParent();
1594  //now take care of successors, retire blocks
1595  ExitingMBB->removeSuccessor(LandMBB);
1596 }
1597 
1598 void AMDGPUCFGStructurizer::settleLoopcontBlock(MachineBasicBlock *ContingMBB,
1599  MachineBasicBlock *ContMBB) {
1600  DEBUG(dbgs() << "settleLoopcontBlock conting = BB"
1601  << ContingMBB->getNumber()
1602  << ", cont = BB" << ContMBB->getNumber() << "\n";);
1603 
1604  MachineInstr *MI = getLoopendBlockBranchInstr(ContingMBB);
1605  if (MI) {
1606  assert(isCondBranch(MI));
1608  MachineBasicBlock *TrueBranch = getTrueBranch(MI);
1609  int OldOpcode = MI->getOpcode();
1610  DebugLoc DL = MI->getDebugLoc();
1611 
1612  bool UseContinueLogical = ((&*ContingMBB->rbegin()) == MI);
1613 
1614  if (!UseContinueLogical) {
1615  int BranchOpcode =
1616  TrueBranch == ContMBB ? getBranchNzeroOpcode(OldOpcode) :
1617  getBranchZeroOpcode(OldOpcode);
1618  insertCondBranchBefore(I, BranchOpcode, DL);
1619  // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
1620  insertInstrEnd(ContingMBB, AMDGPU::CONTINUE, DL);
1621  insertInstrEnd(ContingMBB, AMDGPU::ENDIF, DL);
1622  } else {
1623  int BranchOpcode =
1624  TrueBranch == ContMBB ? getContinueNzeroOpcode(OldOpcode) :
1625  getContinueZeroOpcode(OldOpcode);
1626  insertCondBranchBefore(I, BranchOpcode, DL);
1627  }
1628 
1629  MI->eraseFromParent();
1630  } else {
1631  // if we've arrived here then we've already erased the branch instruction
1632  // travel back up the basic block to see the last reference of our debug
1633  // location we've just inserted that reference here so it should be
1634  // representative insertEnd to ensure phi-moves, if exist, go before the
1635  // continue-instr.
1636  insertInstrEnd(ContingMBB, AMDGPU::CONTINUE,
1637  getLastDebugLocInBB(ContingMBB));
1638  }
1639 }
1640 
1641 int AMDGPUCFGStructurizer::cloneOnSideEntryTo(MachineBasicBlock *PreMBB,
1642  MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB) {
1643  int Cloned = 0;
1644  assert(PreMBB->isSuccessor(SrcMBB));
1645  while (SrcMBB && SrcMBB != DstMBB) {
1646  assert(SrcMBB->succ_size() == 1);
1647  if (SrcMBB->pred_size() > 1) {
1648  SrcMBB = cloneBlockForPredecessor(SrcMBB, PreMBB);
1649  ++Cloned;
1650  }
1651 
1652  PreMBB = SrcMBB;
1653  SrcMBB = *SrcMBB->succ_begin();
1654  }
1655 
1656  return Cloned;
1657 }
1658 
1660 AMDGPUCFGStructurizer::cloneBlockForPredecessor(MachineBasicBlock *MBB,
1661  MachineBasicBlock *PredMBB) {
1662  assert(PredMBB->isSuccessor(MBB) &&
1663  "succBlk is not a prececessor of curBlk");
1664 
1665  MachineBasicBlock *CloneMBB = clone(MBB); //clone instructions
1666  replaceInstrUseOfBlockWith(PredMBB, MBB, CloneMBB);
1667  //srcBlk, oldBlk, newBlk
1668 
1669  PredMBB->removeSuccessor(MBB);
1670  PredMBB->addSuccessor(CloneMBB);
1671 
1672  // add all successor to cloneBlk
1673  cloneSuccessorList(CloneMBB, MBB);
1674 
1675  numClonedInstr += MBB->size();
1676 
1677  DEBUG(
1678  dbgs() << "Cloned block: " << "BB"
1679  << MBB->getNumber() << "size " << MBB->size() << "\n";
1680  );
1681 
1682  SHOWNEWBLK(CloneMBB, "result of Cloned block: ");
1683 
1684  return CloneMBB;
1685 }
1686 
1687 void AMDGPUCFGStructurizer::migrateInstruction(MachineBasicBlock *SrcMBB,
1689  MachineBasicBlock::iterator SpliceEnd;
1690  //look for the input branchinstr, not the AMDGPU branchinstr
1691  MachineInstr *BranchMI = getNormalBlockBranchInstr(SrcMBB);
1692  if (!BranchMI) {
1693  DEBUG(
1694  dbgs() << "migrateInstruction don't see branch instr\n" ;
1695  );
1696  SpliceEnd = SrcMBB->end();
1697  } else {
1698  DEBUG(
1699  dbgs() << "migrateInstruction see branch instr\n" ;
1700  BranchMI->dump();
1701  );
1702  SpliceEnd = BranchMI;
1703  }
1704  DEBUG(
1705  dbgs() << "migrateInstruction before splice dstSize = " << DstMBB->size()
1706  << "srcSize = " << SrcMBB->size() << "\n";
1707  );
1708 
1709  //splice insert before insertPos
1710  DstMBB->splice(I, SrcMBB, SrcMBB->begin(), SpliceEnd);
1711 
1712  DEBUG(
1713  dbgs() << "migrateInstruction after splice dstSize = " << DstMBB->size()
1714  << "srcSize = " << SrcMBB->size() << "\n";
1715  );
1716 }
1717 
1719 AMDGPUCFGStructurizer::normalizeInfiniteLoopExit(MachineLoop* LoopRep) {
1720  MachineBasicBlock *LoopHeader = LoopRep->getHeader();
1721  MachineBasicBlock *LoopLatch = LoopRep->getLoopLatch();
1722 
1723  if (!LoopHeader || !LoopLatch)
1724  return nullptr;
1725  MachineInstr *BranchMI = getLoopendBlockBranchInstr(LoopLatch);
1726  // Is LoopRep an infinite loop ?
1727  if (!BranchMI || !isUncondBranch(BranchMI))
1728  return nullptr;
1729 
1730  MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock();
1731  FuncRep->push_back(DummyExitBlk); //insert to function
1732  SHOWNEWBLK(DummyExitBlk, "DummyExitBlock to normalize infiniteLoop: ");
1733  DEBUG(dbgs() << "Old branch instr: " << *BranchMI << "\n";);
1734  LLVMContext &Ctx = LoopHeader->getParent()->getFunction()->getContext();
1735  Ctx.emitError("Extra register needed to handle CFG");
1736  return nullptr;
1737 }
1738 
1739 void AMDGPUCFGStructurizer::removeUnconditionalBranch(MachineBasicBlock *MBB) {
1740  MachineInstr *BranchMI;
1741 
1742  // I saw two unconditional branch in one basic block in example
1743  // test_fc_do_while_or.c need to fix the upstream on this to remove the loop.
1744  while ((BranchMI = getLoopendBlockBranchInstr(MBB))
1745  && isUncondBranch(BranchMI)) {
1746  DEBUG(dbgs() << "Removing uncond branch instr"; BranchMI->dump(););
1747  BranchMI->eraseFromParent();
1748  }
1749 }
1750 
1751 void AMDGPUCFGStructurizer::removeRedundantConditionalBranch(
1752  MachineBasicBlock *MBB) {
1753  if (MBB->succ_size() != 2)
1754  return;
1755  MachineBasicBlock *MBB1 = *MBB->succ_begin();
1756  MachineBasicBlock *MBB2 = *std::next(MBB->succ_begin());
1757  if (MBB1 != MBB2)
1758  return;
1759 
1760  MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB);
1761  assert(BranchMI && isCondBranch(BranchMI));
1762  DEBUG(dbgs() << "Removing unneeded cond branch instr"; BranchMI->dump(););
1763  BranchMI->eraseFromParent();
1764  SHOWNEWBLK(MBB1, "Removing redundant successor");
1765  MBB->removeSuccessor(MBB1);
1766 }
1767 
1768 void AMDGPUCFGStructurizer::addDummyExitBlock(
1770  MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock();
1771  FuncRep->push_back(DummyExitBlk); //insert to function
1772  insertInstrEnd(DummyExitBlk, AMDGPU::RETURN);
1773 
1775  E = RetMBB.end(); It != E; ++It) {
1776  MachineBasicBlock *MBB = *It;
1777  MachineInstr *MI = getReturnInstr(MBB);
1778  if (MI)
1779  MI->eraseFromParent();
1780  MBB->addSuccessor(DummyExitBlk);
1781  DEBUG(
1782  dbgs() << "Add dummyExitBlock to BB" << MBB->getNumber()
1783  << " successors\n";
1784  );
1785  }
1786  SHOWNEWBLK(DummyExitBlk, "DummyExitBlock: ");
1787 }
1788 
1789 void AMDGPUCFGStructurizer::removeSuccessor(MachineBasicBlock *MBB) {
1790  while (MBB->succ_size())
1791  MBB->removeSuccessor(*MBB->succ_begin());
1792 }
1793 
1794 void AMDGPUCFGStructurizer::recordSccnum(MachineBasicBlock *MBB,
1795  int SccNum) {
1796  BlockInformation *&srcBlkInfo = BlockInfoMap[MBB];
1797  if (!srcBlkInfo)
1798  srcBlkInfo = new BlockInformation();
1799  srcBlkInfo->SccNum = SccNum;
1800 }
1801 
1802 void AMDGPUCFGStructurizer::retireBlock(MachineBasicBlock *MBB) {
1803  DEBUG(
1804  dbgs() << "Retiring BB" << MBB->getNumber() << "\n";
1805  );
1806 
1807  BlockInformation *&SrcBlkInfo = BlockInfoMap[MBB];
1808 
1809  if (!SrcBlkInfo)
1810  SrcBlkInfo = new BlockInformation();
1811 
1812  SrcBlkInfo->IsRetired = true;
1813  assert(MBB->succ_size() == 0 && MBB->pred_size() == 0
1814  && "can't retire block yet");
1815 }
1816 
1817 void AMDGPUCFGStructurizer::setLoopLandBlock(MachineLoop *loopRep,
1818  MachineBasicBlock *MBB) {
1819  MachineBasicBlock *&TheEntry = LLInfoMap[loopRep];
1820  if (!MBB) {
1821  MBB = FuncRep->CreateMachineBasicBlock();
1822  FuncRep->push_back(MBB); //insert to function
1823  SHOWNEWBLK(MBB, "DummyLandingBlock for loop without break: ");
1824  }
1825  TheEntry = MBB;
1826  DEBUG(
1827  dbgs() << "setLoopLandBlock loop-header = BB"
1828  << loopRep->getHeader()->getNumber()
1829  << " landing-block = BB" << MBB->getNumber() << "\n";
1830  );
1831 }
1832 
1834 AMDGPUCFGStructurizer::findNearestCommonPostDom(MachineBasicBlock *MBB1,
1835  MachineBasicBlock *MBB2) {
1836 
1837  if (PDT->dominates(MBB1, MBB2))
1838  return MBB1;
1839  if (PDT->dominates(MBB2, MBB1))
1840  return MBB2;
1841 
1842  MachineDomTreeNode *Node1 = PDT->getNode(MBB1);
1843  MachineDomTreeNode *Node2 = PDT->getNode(MBB2);
1844 
1845  // Handle newly cloned node.
1846  if (!Node1 && MBB1->succ_size() == 1)
1847  return findNearestCommonPostDom(*MBB1->succ_begin(), MBB2);
1848  if (!Node2 && MBB2->succ_size() == 1)
1849  return findNearestCommonPostDom(MBB1, *MBB2->succ_begin());
1850 
1851  if (!Node1 || !Node2)
1852  return nullptr;
1853 
1854  Node1 = Node1->getIDom();
1855  while (Node1) {
1856  if (PDT->dominates(Node1, Node2))
1857  return Node1->getBlock();
1858  Node1 = Node1->getIDom();
1859  }
1860 
1861  return nullptr;
1862 }
1863 
1865 AMDGPUCFGStructurizer::findNearestCommonPostDom(
1866  std::set<MachineBasicBlock *> &MBBs) {
1867  MachineBasicBlock *CommonDom;
1868  std::set<MachineBasicBlock *>::const_iterator It = MBBs.begin();
1869  std::set<MachineBasicBlock *>::const_iterator E = MBBs.end();
1870  for (CommonDom = *It; It != E && CommonDom; ++It) {
1871  MachineBasicBlock *MBB = *It;
1872  if (MBB != CommonDom)
1873  CommonDom = findNearestCommonPostDom(MBB, CommonDom);
1874  }
1875 
1876  DEBUG(
1877  dbgs() << "Common post dominator for exit blocks is ";
1878  if (CommonDom)
1879  dbgs() << "BB" << CommonDom->getNumber() << "\n";
1880  else
1881  dbgs() << "NULL\n";
1882  );
1883 
1884  return CommonDom;
1885 }
1886 
1887 char AMDGPUCFGStructurizer::ID = 0;
1888 
1889 } // end anonymous namespace
1890 
1891 
1892 INITIALIZE_PASS_BEGIN(AMDGPUCFGStructurizer, "amdgpustructurizer",
1893  "AMDGPU CFG Structurizer", false, false)
1897 INITIALIZE_PASS_END(AMDGPUCFGStructurizer, "amdgpustructurizer",
1898  "AMDGPU CFG Structurizer", false, false)
1899 
1901  return new AMDGPUCFGStructurizer();
1902 }
unsigned succ_size() const
const MachineFunction * getParent() const
getParent - Return the MachineFunction containing this basic block.
This builds on the llvm/ADT/GraphTraits.h file to find the strongly connected components (SCCs) of a ...
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
MachineInstr * CreateMachineInstr(const MCInstrDesc &MCID, DebugLoc DL, bool NoImp=false)
CreateMachineInstr - Allocate a new MachineInstr.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:223
AMDGPU specific subclass of TargetSubtarget.
STATISTIC(NumFunctions,"Total number of functions")
Interface definition for R600InstrInfo.
MachineBasicBlock * getMBB() const
int getNumber() const
getNumber - MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a M...
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:114
DominatorTree GraphTraits specialization so the DominatorTree can be iterable by generic graph iterat...
Definition: GraphTraits.h:27
unsigned createVirtualRegister(const TargetRegisterClass *RegClass)
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
AMDGPU CFG Structurizer
#define OPCODE_IS_ZERO
LoopT * getParentLoop() const
Definition: LoopInfo.h:97
#define OPCODE_IS_NOT_ZERO_INT
A debug info location.
Definition: DebugLoc.h:34
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
FunctionPass * createAMDGPUCFGStructurizerPass()
BlockT * getHeader() const
Definition: LoopInfo.h:96
MachineFunctionAnalysis - This class is a Pass that manages a MachineFunction object.
BlockT * getLoopLatch() const
getLoopLatch - If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:156
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:70
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
#define SHOWNEWINSTR(i)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:98
void initializeAMDGPUCFGStructurizerPass(PassRegistry &)
iterator begin() const
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:75
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APInt.h:33
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
std::vector< MachineBasicBlock * >::iterator succ_iterator
void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const
getExitingBlocks - Return all blocks inside the loop that have successors outside of the loop...
Definition: LoopInfoImpl.h:35
INITIALIZE_PASS_BEGIN(AMDGPUCFGStructurizer,"amdgpustructurizer","AMDGPU CFG Structurizer", false, false) INITIALIZE_PASS_END(AMDGPUCFGStructurizer
void getExitBlocks(SmallVectorImpl< BlockT * > &ExitBlocks) const
getExitBlocks - Return all of the successor blocks of this loop.
Definition: LoopInfoImpl.h:64
scc_iterator< T > scc_begin(const T &G)
Construct the begin iterator for a deduced graph type T.
Definition: SCCIterator.h:224
void emitError(unsigned LocCookie, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
const HexagonRegisterInfo & getRegisterInfo() const
getRegisterInfo - TargetInstrInfo is a superset of MRegister info.
#define OPCODE_IS_NOT_ZERO
const MachineJumpTableInfo * getJumpTableInfo() const
getJumpTableInfo - Return the jump table info object for the current function.
Base class for the actual dominator tree node.
std::vector< MachineBasicBlock * >::iterator pred_iterator
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
reverse_iterator rend()
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
reverse_iterator rbegin()
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:267
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:120
bundle_iterator< MachineInstr, instr_iterator > iterator
#define INVALIDSCCNUM
size_type size() const
Definition: SmallPtrSet.h:79
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:41
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:273
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:264
#define SHOWNEWBLK(b, msg)
AMDGPU CFG false
void setMBB(MachineBasicBlock *MBB)
Represent the analysis usage information of a pass.
bool contains(const LoopT *L) const
contains - Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:105
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:294
bool isEmpty() const
isEmpty - Return true if there are no jump tables.
void removeSuccessor(MachineBasicBlock *succ)
removeSuccessor - Remove successor from the successors list of this MachineBasicBlock.
DomTreeNodeBase< NodeT > * getIDom() const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:299
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
CloneMachineInstr - Create a new MachineInstr which is a copy of the 'Orig' instruction, identical in all ways except the instruction has no parent, prev, or next.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:861
bool isSuccessor(const MachineBasicBlock *MBB) const
isSuccessor - Return true if the specified MBB is a successor of this block.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:123
void dump() const
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:576
void dump() const
dump - Print the current MachineFunction to cerr, useful for debugger use.
NodeT * getBlock() const
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:238
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
Representation of each machine instruction.
Definition: MachineInstr.h:51
Flatten the CFG
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
void push_back(MachineInstr *MI)
#define I(x, y, z)
Definition: MD5.cpp:54
iterator_range< df_iterator< T > > depth_first(const T &G)
unsigned getReg() const
getReg - Returns the register number.
virtual const TargetInstrInfo * getInstrInfo() const
std::reverse_iterator< iterator > reverse_iterator
void push_back(MachineBasicBlock *MBB)
BasicBlockListType::iterator iterator
#define DEBUG(X)
Definition: Debug.h:92
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:41
iterator end() const
std::vector< LoopT * >::const_iterator iterator
Definition: LoopInfo.h:128
void print(raw_ostream &OS, SlotIndexes *=nullptr) const
LoopInfoBase< MachineBasicBlock, MachineLoop >::iterator iterator
iterator/begin/end - The interface to the top-level loops in the current function.
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
addReg - Add a new virtual register operand...
unsigned getLoopDepth() const
getLoopDepth - Return the nesting level of this loop.
Definition: LoopInfo.h:89
void addSuccessor(MachineBasicBlock *succ, uint32_t weight=0)
addSuccessor - Add succ as a successor of this MachineBasicBlock.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
unsigned pred_size() const
Enumerate the SCCs of a directed graph in reverse topological order of the SCC DAG.
Definition: SCCIterator.h:40
#define OPCODE_IS_ZERO_INT