LLVM  4.0.0
AMDILCFGStructurizer.cpp
Go to the documentation of this file.
1 //===-- AMDILCFGStructurizer.cpp - CFG Structurizer -----------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 /// \file
9 //==-----------------------------------------------------------------------===//
10 
11 #include "AMDGPU.h"
12 #include "AMDGPUInstrInfo.h"
13 #include "AMDGPUSubtarget.h"
14 #include "R600InstrInfo.h"
16 #include "llvm/ADT/SCCIterator.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/Statistic.h"
27 #include "llvm/IR/Dominators.h"
28 #include "llvm/Support/Debug.h"
32 #include <deque>
33 
34 using namespace llvm;
35 
36 #define DEBUG_TYPE "structcfg"
37 
38 #define DEFAULT_VEC_SLOTS 8
39 
40 // TODO: move-begin.
41 
42 //===----------------------------------------------------------------------===//
43 //
44 // Statistics for CFGStructurizer.
45 //
46 //===----------------------------------------------------------------------===//
47 
48 STATISTIC(numSerialPatternMatch, "CFGStructurizer number of serial pattern "
49  "matched");
50 STATISTIC(numIfPatternMatch, "CFGStructurizer number of if pattern "
51  "matched");
52 STATISTIC(numClonedBlock, "CFGStructurizer cloned blocks");
53 STATISTIC(numClonedInstr, "CFGStructurizer cloned instructions");
54 
55 namespace llvm {
57 }
58 
59 //===----------------------------------------------------------------------===//
60 //
61 // Miscellaneous utility for CFGStructurizer.
62 //
63 //===----------------------------------------------------------------------===//
64 namespace {
65 #define SHOWNEWINSTR(i) \
66  DEBUG(dbgs() << "New instr: " << *i << "\n");
67 
68 #define SHOWNEWBLK(b, msg) \
69 DEBUG( \
70  dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
71  dbgs() << "\n"; \
72 );
73 
74 #define SHOWBLK_DETAIL(b, msg) \
75 DEBUG( \
76  if (b) { \
77  dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
78  b->print(dbgs()); \
79  dbgs() << "\n"; \
80  } \
81 );
82 
83 #define INVALIDSCCNUM -1
84 
85 template<class NodeT>
86 void ReverseVector(SmallVectorImpl<NodeT *> &Src) {
87  size_t sz = Src.size();
88  for (size_t i = 0; i < sz/2; ++i) {
89  NodeT *t = Src[i];
90  Src[i] = Src[sz - i - 1];
91  Src[sz - i - 1] = t;
92  }
93 }
94 
95 } // end anonymous namespace
96 
97 //===----------------------------------------------------------------------===//
98 //
99 // supporting data structure for CFGStructurizer
100 //
101 //===----------------------------------------------------------------------===//
102 
103 
104 namespace {
105 
106 class BlockInformation {
107 public:
108  bool IsRetired;
109  int SccNum;
110  BlockInformation() : IsRetired(false), SccNum(INVALIDSCCNUM) {}
111 };
112 
113 } // end anonymous namespace
114 
115 //===----------------------------------------------------------------------===//
116 //
117 // CFGStructurizer
118 //
119 //===----------------------------------------------------------------------===//
120 
121 namespace {
122 class AMDGPUCFGStructurizer : public MachineFunctionPass {
123 public:
125  typedef std::map<MachineBasicBlock *, BlockInformation *> MBBInfoMap;
126  typedef std::map<MachineLoop *, MachineBasicBlock *> LoopLandInfoMap;
127 
128  enum PathToKind {
129  Not_SinglePath = 0,
130  SinglePath_InPath = 1,
131  SinglePath_NotInPath = 2
132  };
133 
134  static char ID;
135 
136  AMDGPUCFGStructurizer() :
137  MachineFunctionPass(ID), TII(nullptr), TRI(nullptr) {
139  }
140 
141  StringRef getPassName() const override {
142  return "AMDGPU Control Flow Graph structurizer Pass";
143  }
144 
145  void getAnalysisUsage(AnalysisUsage &AU) const override {
150  }
151 
152  /// Perform the CFG structurization
153  bool run();
154 
155  /// Perform the CFG preparation
156  /// This step will remove every unconditionnal/dead jump instructions and make
157  /// sure all loops have an exit block
158  bool prepare();
159 
160  bool runOnMachineFunction(MachineFunction &MF) override {
161  TII = MF.getSubtarget<R600Subtarget>().getInstrInfo();
162  TRI = &TII->getRegisterInfo();
163  DEBUG(MF.dump(););
164  OrderedBlks.clear();
165  Visited.clear();
166  FuncRep = &MF;
167  MLI = &getAnalysis<MachineLoopInfo>();
168  DEBUG(dbgs() << "LoopInfo:\n"; PrintLoopinfo(*MLI););
169  MDT = &getAnalysis<MachineDominatorTree>();
170  DEBUG(MDT->print(dbgs(), (const llvm::Module*)nullptr););
171  PDT = &getAnalysis<MachinePostDominatorTree>();
172  DEBUG(PDT->print(dbgs()););
173  prepare();
174  run();
175  DEBUG(MF.dump(););
176  return true;
177  }
178 
179 protected:
182  MachineLoopInfo *MLI;
183  const R600InstrInfo *TII;
184  const R600RegisterInfo *TRI;
185 
186  // PRINT FUNCTIONS
187  /// Print the ordered Blocks.
188  void printOrderedBlocks() const {
189  size_t i = 0;
190  for (MBBVector::const_iterator iterBlk = OrderedBlks.begin(),
191  iterBlkEnd = OrderedBlks.end(); iterBlk != iterBlkEnd; ++iterBlk, ++i) {
192  dbgs() << "BB" << (*iterBlk)->getNumber();
193  dbgs() << "(" << getSCCNum(*iterBlk) << "," << (*iterBlk)->size() << ")";
194  if (i != 0 && i % 10 == 0) {
195  dbgs() << "\n";
196  } else {
197  dbgs() << " ";
198  }
199  }
200  }
201  static void PrintLoopinfo(const MachineLoopInfo &LoopInfo) {
202  for (MachineLoop::iterator iter = LoopInfo.begin(),
203  iterEnd = LoopInfo.end(); iter != iterEnd; ++iter) {
204  (*iter)->print(dbgs(), 0);
205  }
206  }
207 
208  // UTILITY FUNCTIONS
209  int getSCCNum(MachineBasicBlock *MBB) const;
210  MachineBasicBlock *getLoopLandInfo(MachineLoop *LoopRep) const;
211  bool hasBackEdge(MachineBasicBlock *MBB) const;
212  bool isRetiredBlock(MachineBasicBlock *MBB) const;
213  bool isActiveLoophead(MachineBasicBlock *MBB) const;
214  PathToKind singlePathTo(MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB,
215  bool AllowSideEntry = true) const;
216  int countActiveBlock(MBBVector::const_iterator It,
218  bool needMigrateBlock(MachineBasicBlock *MBB) const;
219 
220  // Utility Functions
221  void reversePredicateSetter(MachineBasicBlock::iterator I,
223  /// Compute the reversed DFS post order of Blocks
224  void orderBlocks(MachineFunction *MF);
225 
226  // Function originally from CFGStructTraits
227  void insertInstrEnd(MachineBasicBlock *MBB, int NewOpcode,
228  const DebugLoc &DL = DebugLoc());
229  MachineInstr *insertInstrBefore(MachineBasicBlock *MBB, int NewOpcode,
230  const DebugLoc &DL = DebugLoc());
231  MachineInstr *insertInstrBefore(MachineBasicBlock::iterator I, int NewOpcode);
232  void insertCondBranchBefore(MachineBasicBlock::iterator I, int NewOpcode,
233  const DebugLoc &DL);
234  void insertCondBranchBefore(MachineBasicBlock *MBB,
235  MachineBasicBlock::iterator I, int NewOpcode,
236  int RegNum, const DebugLoc &DL);
237  static int getBranchNzeroOpcode(int OldOpcode);
238  static int getBranchZeroOpcode(int OldOpcode);
239  static int getContinueNzeroOpcode(int OldOpcode);
240  static int getContinueZeroOpcode(int OldOpcode);
241  static MachineBasicBlock *getTrueBranch(MachineInstr *MI);
242  static void setTrueBranch(MachineInstr *MI, MachineBasicBlock *MBB);
243  static MachineBasicBlock *getFalseBranch(MachineBasicBlock *MBB,
244  MachineInstr *MI);
245  static bool isCondBranch(MachineInstr *MI);
246  static bool isUncondBranch(MachineInstr *MI);
247  static DebugLoc getLastDebugLocInBB(MachineBasicBlock *MBB);
248  static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *MBB);
249  /// The correct naming for this is getPossibleLoopendBlockBranchInstr.
250  ///
251  /// BB with backward-edge could have move instructions after the branch
252  /// instruction. Such move instruction "belong to" the loop backward-edge.
253  MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *MBB);
254  static MachineInstr *getReturnInstr(MachineBasicBlock *MBB);
255  static bool isReturnBlock(MachineBasicBlock *MBB);
256  static void cloneSuccessorList(MachineBasicBlock *DstMBB,
257  MachineBasicBlock *SrcMBB) ;
258  static MachineBasicBlock *clone(MachineBasicBlock *MBB);
259  /// MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose
260  /// because the AMDGPU instruction is not recognized as terminator fix this
261  /// and retire this routine
262  void replaceInstrUseOfBlockWith(MachineBasicBlock *SrcMBB,
263  MachineBasicBlock *OldMBB, MachineBasicBlock *NewBlk);
264  static void wrapup(MachineBasicBlock *MBB);
265 
266 
267  int patternMatch(MachineBasicBlock *MBB);
268  int patternMatchGroup(MachineBasicBlock *MBB);
269  int serialPatternMatch(MachineBasicBlock *MBB);
270  int ifPatternMatch(MachineBasicBlock *MBB);
271  int loopendPatternMatch();
272  int mergeLoop(MachineLoop *LoopRep);
273 
274  /// return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in
275  /// the same loop with LoopLandInfo without explicitly keeping track of
276  /// loopContBlks and loopBreakBlks, this is a method to get the information.
277  bool isSameloopDetachedContbreak(MachineBasicBlock *Src1MBB,
278  MachineBasicBlock *Src2MBB);
279  int handleJumpintoIf(MachineBasicBlock *HeadMBB,
280  MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB);
281  int handleJumpintoIfImp(MachineBasicBlock *HeadMBB,
282  MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB);
283  int improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
284  MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB,
285  MachineBasicBlock **LandMBBPtr);
286  void showImproveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
287  MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB,
288  MachineBasicBlock *LandMBB, bool Detail = false);
289  int cloneOnSideEntryTo(MachineBasicBlock *PreMBB,
290  MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB);
291  void mergeSerialBlock(MachineBasicBlock *DstMBB,
292  MachineBasicBlock *SrcMBB);
293 
294  void mergeIfthenelseBlock(MachineInstr *BranchMI,
295  MachineBasicBlock *MBB, MachineBasicBlock *TrueMBB,
296  MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB);
297  void mergeLooplandBlock(MachineBasicBlock *DstMBB,
298  MachineBasicBlock *LandMBB);
299  void mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB,
300  MachineBasicBlock *LandMBB);
301  void settleLoopcontBlock(MachineBasicBlock *ContingMBB,
302  MachineBasicBlock *ContMBB);
303  /// normalizeInfiniteLoopExit change
304  /// B1:
305  /// uncond_br LoopHeader
306  ///
307  /// to
308  /// B1:
309  /// cond_br 1 LoopHeader dummyExit
310  /// and return the newly added dummy exit block
311  MachineBasicBlock *normalizeInfiniteLoopExit(MachineLoop *LoopRep);
312  void removeUnconditionalBranch(MachineBasicBlock *MBB);
313  /// Remove duplicate branches instructions in a block.
314  /// For instance
315  /// B0:
316  /// cond_br X B1 B2
317  /// cond_br X B1 B2
318  /// is transformed to
319  /// B0:
320  /// cond_br X B1 B2
321  void removeRedundantConditionalBranch(MachineBasicBlock *MBB);
322  void addDummyExitBlock(SmallVectorImpl<MachineBasicBlock *> &RetMBB);
323  void removeSuccessor(MachineBasicBlock *MBB);
324  MachineBasicBlock *cloneBlockForPredecessor(MachineBasicBlock *MBB,
325  MachineBasicBlock *PredMBB);
326  void migrateInstruction(MachineBasicBlock *SrcMBB,
328  void recordSccnum(MachineBasicBlock *MBB, int SCCNum);
329  void retireBlock(MachineBasicBlock *MBB);
330 
331 
332 private:
333  MBBInfoMap BlockInfoMap;
334  LoopLandInfoMap LLInfoMap;
335  std::map<MachineLoop *, bool> Visited;
336  MachineFunction *FuncRep;
338 };
339 
340 int AMDGPUCFGStructurizer::getSCCNum(MachineBasicBlock *MBB) const {
341  MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB);
342  if (It == BlockInfoMap.end())
343  return INVALIDSCCNUM;
344  return (*It).second->SccNum;
345 }
346 
347 MachineBasicBlock *AMDGPUCFGStructurizer::getLoopLandInfo(MachineLoop *LoopRep)
348  const {
349  LoopLandInfoMap::const_iterator It = LLInfoMap.find(LoopRep);
350  if (It == LLInfoMap.end())
351  return nullptr;
352  return (*It).second;
353 }
354 
355 bool AMDGPUCFGStructurizer::hasBackEdge(MachineBasicBlock *MBB) const {
356  MachineLoop *LoopRep = MLI->getLoopFor(MBB);
357  if (!LoopRep)
358  return false;
359  MachineBasicBlock *LoopHeader = LoopRep->getHeader();
360  return MBB->isSuccessor(LoopHeader);
361 }
362 
363 bool AMDGPUCFGStructurizer::isRetiredBlock(MachineBasicBlock *MBB) const {
364  MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB);
365  if (It == BlockInfoMap.end())
366  return false;
367  return (*It).second->IsRetired;
368 }
369 
370 bool AMDGPUCFGStructurizer::isActiveLoophead(MachineBasicBlock *MBB) const {
371  MachineLoop *LoopRep = MLI->getLoopFor(MBB);
372  while (LoopRep && LoopRep->getHeader() == MBB) {
373  MachineBasicBlock *LoopLand = getLoopLandInfo(LoopRep);
374  if(!LoopLand)
375  return true;
376  if (!isRetiredBlock(LoopLand))
377  return true;
378  LoopRep = LoopRep->getParentLoop();
379  }
380  return false;
381 }
382 AMDGPUCFGStructurizer::PathToKind AMDGPUCFGStructurizer::singlePathTo(
383  MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB,
384  bool AllowSideEntry) const {
385  assert(DstMBB);
386  if (SrcMBB == DstMBB)
387  return SinglePath_InPath;
388  while (SrcMBB && SrcMBB->succ_size() == 1) {
389  SrcMBB = *SrcMBB->succ_begin();
390  if (SrcMBB == DstMBB)
391  return SinglePath_InPath;
392  if (!AllowSideEntry && SrcMBB->pred_size() > 1)
393  return Not_SinglePath;
394  }
395  if (SrcMBB && SrcMBB->succ_size()==0)
396  return SinglePath_NotInPath;
397  return Not_SinglePath;
398 }
399 
400 int AMDGPUCFGStructurizer::countActiveBlock(MBBVector::const_iterator It,
402  int Count = 0;
403  while (It != E) {
404  if (!isRetiredBlock(*It))
405  ++Count;
406  ++It;
407  }
408  return Count;
409 }
410 
411 bool AMDGPUCFGStructurizer::needMigrateBlock(MachineBasicBlock *MBB) const {
412  unsigned BlockSizeThreshold = 30;
413  unsigned CloneInstrThreshold = 100;
414  bool MultiplePreds = MBB && (MBB->pred_size() > 1);
415 
416  if(!MultiplePreds)
417  return false;
418  unsigned BlkSize = MBB->size();
419  return ((BlkSize > BlockSizeThreshold) &&
420  (BlkSize * (MBB->pred_size() - 1) > CloneInstrThreshold));
421 }
422 
423 void AMDGPUCFGStructurizer::reversePredicateSetter(
425  assert(I.isValid() && "Expected valid iterator");
426  for (;; --I) {
427  if (I == MBB.end())
428  continue;
429  if (I->getOpcode() == AMDGPU::PRED_X) {
430  switch (I->getOperand(2).getImm()) {
431  case AMDGPU::PRED_SETE_INT:
432  I->getOperand(2).setImm(AMDGPU::PRED_SETNE_INT);
433  return;
434  case AMDGPU::PRED_SETNE_INT:
435  I->getOperand(2).setImm(AMDGPU::PRED_SETE_INT);
436  return;
437  case AMDGPU::PRED_SETE:
438  I->getOperand(2).setImm(AMDGPU::PRED_SETNE);
439  return;
440  case AMDGPU::PRED_SETNE:
441  I->getOperand(2).setImm(AMDGPU::PRED_SETE);
442  return;
443  default:
444  llvm_unreachable("PRED_X Opcode invalid!");
445  }
446  }
447  }
448 }
449 
450 void AMDGPUCFGStructurizer::insertInstrEnd(MachineBasicBlock *MBB,
451  int NewOpcode, const DebugLoc &DL) {
452  MachineInstr *MI =
453  MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DL);
454  MBB->push_back(MI);
455  //assume the instruction doesn't take any reg operand ...
456  SHOWNEWINSTR(MI);
457 }
458 
459 MachineInstr *AMDGPUCFGStructurizer::insertInstrBefore(MachineBasicBlock *MBB,
460  int NewOpcode,
461  const DebugLoc &DL) {
462  MachineInstr *MI =
463  MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DL);
464  if (MBB->begin() != MBB->end())
465  MBB->insert(MBB->begin(), MI);
466  else
467  MBB->push_back(MI);
468  SHOWNEWINSTR(MI);
469  return MI;
470 }
471 
472 MachineInstr *AMDGPUCFGStructurizer::insertInstrBefore(
473  MachineBasicBlock::iterator I, int NewOpcode) {
474  MachineInstr *OldMI = &(*I);
475  MachineBasicBlock *MBB = OldMI->getParent();
476  MachineInstr *NewMBB =
477  MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DebugLoc());
478  MBB->insert(I, NewMBB);
479  //assume the instruction doesn't take any reg operand ...
480  SHOWNEWINSTR(NewMBB);
481  return NewMBB;
482 }
483 
484 void AMDGPUCFGStructurizer::insertCondBranchBefore(
485  MachineBasicBlock::iterator I, int NewOpcode, const DebugLoc &DL) {
486  MachineInstr *OldMI = &(*I);
487  MachineBasicBlock *MBB = OldMI->getParent();
488  MachineFunction *MF = MBB->getParent();
489  MachineInstr *NewMI = MF->CreateMachineInstr(TII->get(NewOpcode), DL);
490  MBB->insert(I, NewMI);
491  MachineInstrBuilder MIB(*MF, NewMI);
492  MIB.addReg(OldMI->getOperand(1).getReg(), false);
493  SHOWNEWINSTR(NewMI);
494  //erase later oldInstr->eraseFromParent();
495 }
496 
497 void AMDGPUCFGStructurizer::insertCondBranchBefore(
499  int RegNum, const DebugLoc &DL) {
500  MachineFunction *MF = blk->getParent();
501  MachineInstr *NewInstr = MF->CreateMachineInstr(TII->get(NewOpcode), DL);
502  //insert before
503  blk->insert(I, NewInstr);
504  MachineInstrBuilder(*MF, NewInstr).addReg(RegNum, false);
505  SHOWNEWINSTR(NewInstr);
506 }
507 
508 int AMDGPUCFGStructurizer::getBranchNzeroOpcode(int OldOpcode) {
509  switch(OldOpcode) {
510  case AMDGPU::JUMP_COND:
511  case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
512  case AMDGPU::BRANCH_COND_i32:
513  case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALNZ_f32;
514  default: llvm_unreachable("internal error");
515  }
516  return -1;
517 }
518 
519 int AMDGPUCFGStructurizer::getBranchZeroOpcode(int OldOpcode) {
520  switch(OldOpcode) {
521  case AMDGPU::JUMP_COND:
522  case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
523  case AMDGPU::BRANCH_COND_i32:
524  case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALZ_f32;
525  default: llvm_unreachable("internal error");
526  }
527  return -1;
528 }
529 
530 int AMDGPUCFGStructurizer::getContinueNzeroOpcode(int OldOpcode) {
531  switch(OldOpcode) {
532  case AMDGPU::JUMP_COND:
533  case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALNZ_i32;
534  default: llvm_unreachable("internal error");
535  };
536  return -1;
537 }
538 
539 int AMDGPUCFGStructurizer::getContinueZeroOpcode(int OldOpcode) {
540  switch(OldOpcode) {
541  case AMDGPU::JUMP_COND:
542  case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALZ_i32;
543  default: llvm_unreachable("internal error");
544  }
545  return -1;
546 }
547 
548 MachineBasicBlock *AMDGPUCFGStructurizer::getTrueBranch(MachineInstr *MI) {
549  return MI->getOperand(0).getMBB();
550 }
551 
552 void AMDGPUCFGStructurizer::setTrueBranch(MachineInstr *MI,
553  MachineBasicBlock *MBB) {
554  MI->getOperand(0).setMBB(MBB);
555 }
556 
558 AMDGPUCFGStructurizer::getFalseBranch(MachineBasicBlock *MBB,
559  MachineInstr *MI) {
560  assert(MBB->succ_size() == 2);
561  MachineBasicBlock *TrueBranch = getTrueBranch(MI);
564  ++Next;
565  return (*It == TrueBranch) ? *Next : *It;
566 }
567 
568 bool AMDGPUCFGStructurizer::isCondBranch(MachineInstr *MI) {
569  switch (MI->getOpcode()) {
570  case AMDGPU::JUMP_COND:
571  case AMDGPU::BRANCH_COND_i32:
572  case AMDGPU::BRANCH_COND_f32: return true;
573  default:
574  return false;
575  }
576  return false;
577 }
578 
579 bool AMDGPUCFGStructurizer::isUncondBranch(MachineInstr *MI) {
580  switch (MI->getOpcode()) {
581  case AMDGPU::JUMP:
582  case AMDGPU::BRANCH:
583  return true;
584  default:
585  return false;
586  }
587  return false;
588 }
589 
590 DebugLoc AMDGPUCFGStructurizer::getLastDebugLocInBB(MachineBasicBlock *MBB) {
591  //get DebugLoc from the first MachineBasicBlock instruction with debug info
592  DebugLoc DL;
593  for (MachineBasicBlock::iterator It = MBB->begin(); It != MBB->end();
594  ++It) {
595  MachineInstr *instr = &(*It);
596  if (instr->getDebugLoc())
597  DL = instr->getDebugLoc();
598  }
599  return DL;
600 }
601 
602 MachineInstr *AMDGPUCFGStructurizer::getNormalBlockBranchInstr(
603  MachineBasicBlock *MBB) {
605  MachineInstr *MI = &*It;
606  if (MI && (isCondBranch(MI) || isUncondBranch(MI)))
607  return MI;
608  return nullptr;
609 }
610 
611 MachineInstr *AMDGPUCFGStructurizer::getLoopendBlockBranchInstr(
612  MachineBasicBlock *MBB) {
613  for (MachineBasicBlock::reverse_iterator It = MBB->rbegin(), E = MBB->rend();
614  It != E; ++It) {
615  // FIXME: Simplify
616  MachineInstr *MI = &*It;
617  if (MI) {
618  if (isCondBranch(MI) || isUncondBranch(MI))
619  return MI;
620  else if (!TII->isMov(MI->getOpcode()))
621  break;
622  }
623  }
624  return nullptr;
625 }
626 
627 MachineInstr *AMDGPUCFGStructurizer::getReturnInstr(MachineBasicBlock *MBB) {
629  if (It != MBB->rend()) {
630  MachineInstr *instr = &(*It);
631  if (instr->getOpcode() == AMDGPU::RETURN)
632  return instr;
633  }
634  return nullptr;
635 }
636 
637 bool AMDGPUCFGStructurizer::isReturnBlock(MachineBasicBlock *MBB) {
638  MachineInstr *MI = getReturnInstr(MBB);
639  bool IsReturn = (MBB->succ_size() == 0);
640  if (MI)
641  assert(IsReturn);
642  else if (IsReturn)
643  DEBUG(
644  dbgs() << "BB" << MBB->getNumber()
645  <<" is return block without RETURN instr\n";);
646  return IsReturn;
647 }
648 
649 void AMDGPUCFGStructurizer::cloneSuccessorList(MachineBasicBlock *DstMBB,
650  MachineBasicBlock *SrcMBB) {
651  for (MachineBasicBlock::succ_iterator It = SrcMBB->succ_begin(),
652  iterEnd = SrcMBB->succ_end(); It != iterEnd; ++It)
653  DstMBB->addSuccessor(*It); // *iter's predecessor is also taken care of
654 }
655 
656 MachineBasicBlock *AMDGPUCFGStructurizer::clone(MachineBasicBlock *MBB) {
657  MachineFunction *Func = MBB->getParent();
658  MachineBasicBlock *NewMBB = Func->CreateMachineBasicBlock();
659  Func->push_back(NewMBB); //insert to function
660  for (const MachineInstr &It : *MBB)
661  NewMBB->push_back(Func->CloneMachineInstr(&It));
662  return NewMBB;
663 }
664 
665 void AMDGPUCFGStructurizer::replaceInstrUseOfBlockWith(
666  MachineBasicBlock *SrcMBB, MachineBasicBlock *OldMBB,
667  MachineBasicBlock *NewBlk) {
668  MachineInstr *BranchMI = getLoopendBlockBranchInstr(SrcMBB);
669  if (BranchMI && isCondBranch(BranchMI) &&
670  getTrueBranch(BranchMI) == OldMBB)
671  setTrueBranch(BranchMI, NewBlk);
672 }
673 
674 void AMDGPUCFGStructurizer::wrapup(MachineBasicBlock *MBB) {
675  assert((!MBB->getParent()->getJumpTableInfo()
676  || MBB->getParent()->getJumpTableInfo()->isEmpty())
677  && "found a jump table");
678 
679  //collect continue right before endloop
681  MachineBasicBlock::iterator Pre = MBB->begin();
682  MachineBasicBlock::iterator E = MBB->end();
684  while (It != E) {
685  if (Pre->getOpcode() == AMDGPU::CONTINUE
686  && It->getOpcode() == AMDGPU::ENDLOOP)
687  ContInstr.push_back(&*Pre);
688  Pre = It;
689  ++It;
690  }
691 
692  //delete continue right before endloop
693  for (unsigned i = 0; i < ContInstr.size(); ++i)
694  ContInstr[i]->eraseFromParent();
695 
696  // TODO to fix up jump table so later phase won't be confused. if
697  // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but
698  // there isn't such an interface yet. alternatively, replace all the other
699  // blocks in the jump table with the entryBlk //}
700 
701 }
702 
703 
704 bool AMDGPUCFGStructurizer::prepare() {
705  bool Changed = false;
706 
707  //FIXME: if not reducible flow graph, make it so ???
708 
709  DEBUG(dbgs() << "AMDGPUCFGStructurizer::prepare\n";);
710 
711  orderBlocks(FuncRep);
712 
714 
715  // Add an ExitBlk to loop that don't have one
716  for (MachineLoopInfo::iterator It = MLI->begin(),
717  E = MLI->end(); It != E; ++It) {
718  MachineLoop *LoopRep = (*It);
719  MBBVector ExitingMBBs;
720  LoopRep->getExitingBlocks(ExitingMBBs);
721 
722  if (ExitingMBBs.size() == 0) {
723  MachineBasicBlock* DummyExitBlk = normalizeInfiniteLoopExit(LoopRep);
724  if (DummyExitBlk)
725  RetBlks.push_back(DummyExitBlk);
726  }
727  }
728 
729  // Remove unconditional branch instr.
730  // Add dummy exit block iff there are multiple returns.
732  It = OrderedBlks.begin(), E = OrderedBlks.end(); It != E; ++It) {
733  MachineBasicBlock *MBB = *It;
734  removeUnconditionalBranch(MBB);
735  removeRedundantConditionalBranch(MBB);
736  if (isReturnBlock(MBB)) {
737  RetBlks.push_back(MBB);
738  }
739  assert(MBB->succ_size() <= 2);
740  }
741 
742  if (RetBlks.size() >= 2) {
743  addDummyExitBlock(RetBlks);
744  Changed = true;
745  }
746 
747  return Changed;
748 }
749 
750 bool AMDGPUCFGStructurizer::run() {
751 
752  //Assume reducible CFG...
753  DEBUG(dbgs() << "AMDGPUCFGStructurizer::run\n");
754 
755 #ifdef STRESSTEST
756  //Use the worse block ordering to test the algorithm.
757  ReverseVector(orderedBlks);
758 #endif
759 
760  DEBUG(dbgs() << "Ordered blocks:\n"; printOrderedBlocks(););
761  int NumIter = 0;
762  bool Finish = false;
764  bool MakeProgress = false;
765  int NumRemainedBlk = countActiveBlock(OrderedBlks.begin(),
766  OrderedBlks.end());
767 
768  do {
769  ++NumIter;
770  DEBUG(
771  dbgs() << "numIter = " << NumIter
772  << ", numRemaintedBlk = " << NumRemainedBlk << "\n";
773  );
774 
776  OrderedBlks.begin();
778  OrderedBlks.end();
779 
781  It;
782  MachineBasicBlock *SccBeginMBB = nullptr;
783  int SccNumBlk = 0; // The number of active blocks, init to a
784  // maximum possible number.
785  int SccNumIter; // Number of iteration in this SCC.
786 
787  while (It != E) {
788  MBB = *It;
789 
790  if (!SccBeginMBB) {
791  SccBeginIter = It;
792  SccBeginMBB = MBB;
793  SccNumIter = 0;
794  SccNumBlk = NumRemainedBlk; // Init to maximum possible number.
795  DEBUG(
796  dbgs() << "start processing SCC" << getSCCNum(SccBeginMBB);
797  dbgs() << "\n";
798  );
799  }
800 
801  if (!isRetiredBlock(MBB))
802  patternMatch(MBB);
803 
804  ++It;
805 
806  bool ContNextScc = true;
807  if (It == E
808  || getSCCNum(SccBeginMBB) != getSCCNum(*It)) {
809  // Just finish one scc.
810  ++SccNumIter;
811  int sccRemainedNumBlk = countActiveBlock(SccBeginIter, It);
812  if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= SccNumBlk) {
813  DEBUG(
814  dbgs() << "Can't reduce SCC " << getSCCNum(MBB)
815  << ", sccNumIter = " << SccNumIter;
816  dbgs() << "doesn't make any progress\n";
817  );
818  ContNextScc = true;
819  } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < SccNumBlk) {
820  SccNumBlk = sccRemainedNumBlk;
821  It = SccBeginIter;
822  ContNextScc = false;
823  DEBUG(
824  dbgs() << "repeat processing SCC" << getSCCNum(MBB)
825  << "sccNumIter = " << SccNumIter << '\n';
826  );
827  } else {
828  // Finish the current scc.
829  ContNextScc = true;
830  }
831  } else {
832  // Continue on next component in the current scc.
833  ContNextScc = false;
834  }
835 
836  if (ContNextScc)
837  SccBeginMBB = nullptr;
838  } //while, "one iteration" over the function.
839 
840  MachineBasicBlock *EntryMBB =
842  if (EntryMBB->succ_size() == 0) {
843  Finish = true;
844  DEBUG(
845  dbgs() << "Reduce to one block\n";
846  );
847  } else {
848  int NewnumRemainedBlk
849  = countActiveBlock(OrderedBlks.begin(), OrderedBlks.end());
850  // consider cloned blocks ??
851  if (NewnumRemainedBlk == 1 || NewnumRemainedBlk < NumRemainedBlk) {
852  MakeProgress = true;
853  NumRemainedBlk = NewnumRemainedBlk;
854  } else {
855  MakeProgress = false;
856  DEBUG(
857  dbgs() << "No progress\n";
858  );
859  }
860  }
861  } while (!Finish && MakeProgress);
862 
863  // Misc wrap up to maintain the consistency of the Function representation.
865 
866  // Detach retired Block, release memory.
867  for (MBBInfoMap::iterator It = BlockInfoMap.begin(), E = BlockInfoMap.end();
868  It != E; ++It) {
869  if ((*It).second && (*It).second->IsRetired) {
870  assert(((*It).first)->getNumber() != -1);
871  DEBUG(
872  dbgs() << "Erase BB" << ((*It).first)->getNumber() << "\n";
873  );
874  (*It).first->eraseFromParent(); //Remove from the parent Function.
875  }
876  delete (*It).second;
877  }
878  BlockInfoMap.clear();
879  LLInfoMap.clear();
880 
881  if (!Finish) {
882  DEBUG(FuncRep->viewCFG());
883  report_fatal_error("IRREDUCIBLE_CFG");
884  }
885 
886  return true;
887 }
888 
889 
890 
891 void AMDGPUCFGStructurizer::orderBlocks(MachineFunction *MF) {
892  int SccNum = 0;
894  for (scc_iterator<MachineFunction *> It = scc_begin(MF); !It.isAtEnd();
895  ++It, ++SccNum) {
896  const std::vector<MachineBasicBlock *> &SccNext = *It;
897  for (std::vector<MachineBasicBlock *>::const_iterator
898  blockIter = SccNext.begin(), blockEnd = SccNext.end();
899  blockIter != blockEnd; ++blockIter) {
900  MBB = *blockIter;
901  OrderedBlks.push_back(MBB);
902  recordSccnum(MBB, SccNum);
903  }
904  }
905 
906  //walk through all the block in func to check for unreachable
907  typedef GraphTraits<MachineFunction *> GTM;
908  auto It = GTM::nodes_begin(MF), E = GTM::nodes_end(MF);
909  for (; It != E; ++It) {
910  MachineBasicBlock *MBB = *It;
911  SccNum = getSCCNum(MBB);
912  if (SccNum == INVALIDSCCNUM)
913  dbgs() << "unreachable block BB" << MBB->getNumber() << "\n";
914  }
915 }
916 
917 int AMDGPUCFGStructurizer::patternMatch(MachineBasicBlock *MBB) {
918  int NumMatch = 0;
919  int CurMatch;
920 
921  DEBUG(
922  dbgs() << "Begin patternMatch BB" << MBB->getNumber() << "\n";
923  );
924 
925  while ((CurMatch = patternMatchGroup(MBB)) > 0)
926  NumMatch += CurMatch;
927 
928  DEBUG(
929  dbgs() << "End patternMatch BB" << MBB->getNumber()
930  << ", numMatch = " << NumMatch << "\n";
931  );
932 
933  return NumMatch;
934 }
935 
936 int AMDGPUCFGStructurizer::patternMatchGroup(MachineBasicBlock *MBB) {
937  int NumMatch = 0;
938  NumMatch += loopendPatternMatch();
939  NumMatch += serialPatternMatch(MBB);
940  NumMatch += ifPatternMatch(MBB);
941  return NumMatch;
942 }
943 
944 
945 int AMDGPUCFGStructurizer::serialPatternMatch(MachineBasicBlock *MBB) {
946  if (MBB->succ_size() != 1)
947  return 0;
948 
949  MachineBasicBlock *childBlk = *MBB->succ_begin();
950  if (childBlk->pred_size() != 1 || isActiveLoophead(childBlk))
951  return 0;
952 
953  mergeSerialBlock(MBB, childBlk);
954  ++numSerialPatternMatch;
955  return 1;
956 }
957 
958 int AMDGPUCFGStructurizer::ifPatternMatch(MachineBasicBlock *MBB) {
959  //two edges
960  if (MBB->succ_size() != 2)
961  return 0;
962  if (hasBackEdge(MBB))
963  return 0;
964  MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB);
965  if (!BranchMI)
966  return 0;
967 
968  assert(isCondBranch(BranchMI));
969  int NumMatch = 0;
970 
971  MachineBasicBlock *TrueMBB = getTrueBranch(BranchMI);
972  NumMatch += serialPatternMatch(TrueMBB);
973  NumMatch += ifPatternMatch(TrueMBB);
974  MachineBasicBlock *FalseMBB = getFalseBranch(MBB, BranchMI);
975  NumMatch += serialPatternMatch(FalseMBB);
976  NumMatch += ifPatternMatch(FalseMBB);
977  MachineBasicBlock *LandBlk;
978  int Cloned = 0;
979 
980  assert (!TrueMBB->succ_empty() || !FalseMBB->succ_empty());
981  // TODO: Simplify
982  if (TrueMBB->succ_size() == 1 && FalseMBB->succ_size() == 1
983  && *TrueMBB->succ_begin() == *FalseMBB->succ_begin()) {
984  // Diamond pattern
985  LandBlk = *TrueMBB->succ_begin();
986  } else if (TrueMBB->succ_size() == 1 && *TrueMBB->succ_begin() == FalseMBB) {
987  // Triangle pattern, false is empty
988  LandBlk = FalseMBB;
989  FalseMBB = nullptr;
990  } else if (FalseMBB->succ_size() == 1
991  && *FalseMBB->succ_begin() == TrueMBB) {
992  // Triangle pattern, true is empty
993  // We reverse the predicate to make a triangle, empty false pattern;
994  std::swap(TrueMBB, FalseMBB);
995  reversePredicateSetter(MBB->end(), *MBB);
996  LandBlk = FalseMBB;
997  FalseMBB = nullptr;
998  } else if (FalseMBB->succ_size() == 1
999  && isSameloopDetachedContbreak(TrueMBB, FalseMBB)) {
1000  LandBlk = *FalseMBB->succ_begin();
1001  } else if (TrueMBB->succ_size() == 1
1002  && isSameloopDetachedContbreak(FalseMBB, TrueMBB)) {
1003  LandBlk = *TrueMBB->succ_begin();
1004  } else {
1005  return NumMatch + handleJumpintoIf(MBB, TrueMBB, FalseMBB);
1006  }
1007 
1008  // improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the
1009  // new BB created for landBlk==NULL may introduce new challenge to the
1010  // reduction process.
1011  if (LandBlk &&
1012  ((TrueMBB && TrueMBB->pred_size() > 1)
1013  || (FalseMBB && FalseMBB->pred_size() > 1))) {
1014  Cloned += improveSimpleJumpintoIf(MBB, TrueMBB, FalseMBB, &LandBlk);
1015  }
1016 
1017  if (TrueMBB && TrueMBB->pred_size() > 1) {
1018  TrueMBB = cloneBlockForPredecessor(TrueMBB, MBB);
1019  ++Cloned;
1020  }
1021 
1022  if (FalseMBB && FalseMBB->pred_size() > 1) {
1023  FalseMBB = cloneBlockForPredecessor(FalseMBB, MBB);
1024  ++Cloned;
1025  }
1026 
1027  mergeIfthenelseBlock(BranchMI, MBB, TrueMBB, FalseMBB, LandBlk);
1028 
1029  ++numIfPatternMatch;
1030 
1031  numClonedBlock += Cloned;
1032 
1033  return 1 + Cloned + NumMatch;
1034 }
1035 
1036 int AMDGPUCFGStructurizer::loopendPatternMatch() {
1037  std::deque<MachineLoop *> NestedLoops;
1038  for (auto &It: *MLI)
1039  for (MachineLoop *ML : depth_first(It))
1040  NestedLoops.push_front(ML);
1041 
1042  if (NestedLoops.size() == 0)
1043  return 0;
1044 
1045  // Process nested loop outside->inside (we did push_front),
1046  // so "continue" to a outside loop won't be mistaken as "break"
1047  // of the current loop.
1048  int Num = 0;
1049  for (MachineLoop *ExaminedLoop : NestedLoops) {
1050  if (ExaminedLoop->getNumBlocks() == 0 || Visited[ExaminedLoop])
1051  continue;
1052  DEBUG(dbgs() << "Processing:\n"; ExaminedLoop->dump(););
1053  int NumBreak = mergeLoop(ExaminedLoop);
1054  if (NumBreak == -1)
1055  break;
1056  Num += NumBreak;
1057  }
1058  return Num;
1059 }
1060 
1061 int AMDGPUCFGStructurizer::mergeLoop(MachineLoop *LoopRep) {
1062  MachineBasicBlock *LoopHeader = LoopRep->getHeader();
1063  MBBVector ExitingMBBs;
1064  LoopRep->getExitingBlocks(ExitingMBBs);
1065  assert(!ExitingMBBs.empty() && "Infinite Loop not supported");
1066  DEBUG(dbgs() << "Loop has " << ExitingMBBs.size() << " exiting blocks\n";);
1067  // We assume a single ExitBlk
1068  MBBVector ExitBlks;
1069  LoopRep->getExitBlocks(ExitBlks);
1071  for (unsigned i = 0, e = ExitBlks.size(); i < e; ++i)
1072  ExitBlkSet.insert(ExitBlks[i]);
1073  assert(ExitBlkSet.size() == 1);
1074  MachineBasicBlock *ExitBlk = *ExitBlks.begin();
1075  assert(ExitBlk && "Loop has several exit block");
1076  MBBVector LatchBlks;
1077  typedef GraphTraits<Inverse<MachineBasicBlock*> > InvMBBTraits;
1078  InvMBBTraits::ChildIteratorType PI = InvMBBTraits::child_begin(LoopHeader),
1079  PE = InvMBBTraits::child_end(LoopHeader);
1080  for (; PI != PE; PI++) {
1081  if (LoopRep->contains(*PI))
1082  LatchBlks.push_back(*PI);
1083  }
1084 
1085  for (unsigned i = 0, e = ExitingMBBs.size(); i < e; ++i)
1086  mergeLoopbreakBlock(ExitingMBBs[i], ExitBlk);
1087  for (unsigned i = 0, e = LatchBlks.size(); i < e; ++i)
1088  settleLoopcontBlock(LatchBlks[i], LoopHeader);
1089  int Match = 0;
1090  do {
1091  Match = 0;
1092  Match += serialPatternMatch(LoopHeader);
1093  Match += ifPatternMatch(LoopHeader);
1094  } while (Match > 0);
1095  mergeLooplandBlock(LoopHeader, ExitBlk);
1096  MachineLoop *ParentLoop = LoopRep->getParentLoop();
1097  if (ParentLoop)
1098  MLI->changeLoopFor(LoopHeader, ParentLoop);
1099  else
1100  MLI->removeBlock(LoopHeader);
1101  Visited[LoopRep] = true;
1102  return 1;
1103 }
1104 
1105 bool AMDGPUCFGStructurizer::isSameloopDetachedContbreak(
1106  MachineBasicBlock *Src1MBB, MachineBasicBlock *Src2MBB) {
1107  if (Src1MBB->succ_size() == 0) {
1108  MachineLoop *LoopRep = MLI->getLoopFor(Src1MBB);
1109  if (LoopRep&& LoopRep == MLI->getLoopFor(Src2MBB)) {
1110  MachineBasicBlock *&TheEntry = LLInfoMap[LoopRep];
1111  if (TheEntry) {
1112  DEBUG(
1113  dbgs() << "isLoopContBreakBlock yes src1 = BB"
1114  << Src1MBB->getNumber()
1115  << " src2 = BB" << Src2MBB->getNumber() << "\n";
1116  );
1117  return true;
1118  }
1119  }
1120  }
1121  return false;
1122 }
1123 
1124 int AMDGPUCFGStructurizer::handleJumpintoIf(MachineBasicBlock *HeadMBB,
1125  MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) {
1126  int Num = handleJumpintoIfImp(HeadMBB, TrueMBB, FalseMBB);
1127  if (Num == 0) {
1128  DEBUG(
1129  dbgs() << "handleJumpintoIf swap trueBlk and FalseBlk" << "\n";
1130  );
1131  Num = handleJumpintoIfImp(HeadMBB, FalseMBB, TrueMBB);
1132  }
1133  return Num;
1134 }
1135 
1136 int AMDGPUCFGStructurizer::handleJumpintoIfImp(MachineBasicBlock *HeadMBB,
1137  MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) {
1138  int Num = 0;
1139  MachineBasicBlock *DownBlk;
1140 
1141  //trueBlk could be the common post dominator
1142  DownBlk = TrueMBB;
1143 
1144  DEBUG(
1145  dbgs() << "handleJumpintoIfImp head = BB" << HeadMBB->getNumber()
1146  << " true = BB" << TrueMBB->getNumber()
1147  << ", numSucc=" << TrueMBB->succ_size()
1148  << " false = BB" << FalseMBB->getNumber() << "\n";
1149  );
1150 
1151  while (DownBlk) {
1152  DEBUG(
1153  dbgs() << "check down = BB" << DownBlk->getNumber();
1154  );
1155 
1156  if (singlePathTo(FalseMBB, DownBlk) == SinglePath_InPath) {
1157  DEBUG(
1158  dbgs() << " working\n";
1159  );
1160 
1161  Num += cloneOnSideEntryTo(HeadMBB, TrueMBB, DownBlk);
1162  Num += cloneOnSideEntryTo(HeadMBB, FalseMBB, DownBlk);
1163 
1164  numClonedBlock += Num;
1165  Num += serialPatternMatch(*HeadMBB->succ_begin());
1166  Num += serialPatternMatch(*std::next(HeadMBB->succ_begin()));
1167  Num += ifPatternMatch(HeadMBB);
1168  assert(Num > 0);
1169 
1170  break;
1171  }
1172  DEBUG(
1173  dbgs() << " not working\n";
1174  );
1175  DownBlk = (DownBlk->succ_size() == 1) ? (*DownBlk->succ_begin()) : nullptr;
1176  } // walk down the postDomTree
1177 
1178  return Num;
1179 }
1180 
1181 void AMDGPUCFGStructurizer::showImproveSimpleJumpintoIf(
1182  MachineBasicBlock *HeadMBB, MachineBasicBlock *TrueMBB,
1183  MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB, bool Detail) {
1184  dbgs() << "head = BB" << HeadMBB->getNumber()
1185  << " size = " << HeadMBB->size();
1186  if (Detail) {
1187  dbgs() << "\n";
1188  HeadMBB->print(dbgs());
1189  dbgs() << "\n";
1190  }
1191 
1192  if (TrueMBB) {
1193  dbgs() << ", true = BB" << TrueMBB->getNumber() << " size = "
1194  << TrueMBB->size() << " numPred = " << TrueMBB->pred_size();
1195  if (Detail) {
1196  dbgs() << "\n";
1197  TrueMBB->print(dbgs());
1198  dbgs() << "\n";
1199  }
1200  }
1201  if (FalseMBB) {
1202  dbgs() << ", false = BB" << FalseMBB->getNumber() << " size = "
1203  << FalseMBB->size() << " numPred = " << FalseMBB->pred_size();
1204  if (Detail) {
1205  dbgs() << "\n";
1206  FalseMBB->print(dbgs());
1207  dbgs() << "\n";
1208  }
1209  }
1210  if (LandMBB) {
1211  dbgs() << ", land = BB" << LandMBB->getNumber() << " size = "
1212  << LandMBB->size() << " numPred = " << LandMBB->pred_size();
1213  if (Detail) {
1214  dbgs() << "\n";
1215  LandMBB->print(dbgs());
1216  dbgs() << "\n";
1217  }
1218  }
1219 
1220  dbgs() << "\n";
1221 }
1222 
1223 int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
1224  MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB,
1225  MachineBasicBlock **LandMBBPtr) {
1226  bool MigrateTrue = false;
1227  bool MigrateFalse = false;
1228 
1229  MachineBasicBlock *LandBlk = *LandMBBPtr;
1230 
1231  assert((!TrueMBB || TrueMBB->succ_size() <= 1)
1232  && (!FalseMBB || FalseMBB->succ_size() <= 1));
1233 
1234  if (TrueMBB == FalseMBB)
1235  return 0;
1236 
1237  MigrateTrue = needMigrateBlock(TrueMBB);
1238  MigrateFalse = needMigrateBlock(FalseMBB);
1239 
1240  if (!MigrateTrue && !MigrateFalse)
1241  return 0;
1242 
1243  // If we need to migrate either trueBlk and falseBlk, migrate the rest that
1244  // have more than one predecessors. without doing this, its predecessor
1245  // rather than headBlk will have undefined value in initReg.
1246  if (!MigrateTrue && TrueMBB && TrueMBB->pred_size() > 1)
1247  MigrateTrue = true;
1248  if (!MigrateFalse && FalseMBB && FalseMBB->pred_size() > 1)
1249  MigrateFalse = true;
1250 
1251  DEBUG(
1252  dbgs() << "before improveSimpleJumpintoIf: ";
1253  showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0);
1254  );
1255 
1256  // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk
1257  //
1258  // new: headBlk => if () {initReg = 1; org trueBlk branch} else
1259  // {initReg = 0; org falseBlk branch }
1260  // => landBlk => if (initReg) {org trueBlk} else {org falseBlk}
1261  // => org landBlk
1262  // if landBlk->pred_size() > 2, put the about if-else inside
1263  // if (initReg !=2) {...}
1264  //
1265  // add initReg = initVal to headBlk
1266 
1267  const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
1268  if (!MigrateTrue || !MigrateFalse) {
1269  // XXX: We have an opportunity here to optimize the "branch into if" case
1270  // here. Branch into if looks like this:
1271  // entry
1272  // / |
1273  // diamond_head branch_from
1274  // / \ |
1275  // diamond_false diamond_true
1276  // \ /
1277  // done
1278  //
1279  // The diamond_head block begins the "if" and the diamond_true block
1280  // is the block being "branched into".
1281  //
1282  // If MigrateTrue is true, then TrueBB is the block being "branched into"
1283  // and if MigrateFalse is true, then FalseBB is the block being
1284  // "branched into"
1285  //
1286  // Here is the pseudo code for how I think the optimization should work:
1287  // 1. Insert MOV GPR0, 0 before the branch instruction in diamond_head.
1288  // 2. Insert MOV GPR0, 1 before the branch instruction in branch_from.
1289  // 3. Move the branch instruction from diamond_head into its own basic
1290  // block (new_block).
1291  // 4. Add an unconditional branch from diamond_head to new_block
1292  // 5. Replace the branch instruction in branch_from with an unconditional
1293  // branch to new_block. If branch_from has multiple predecessors, then
1294  // we need to replace the True/False block in the branch
1295  // instruction instead of replacing it.
1296  // 6. Change the condition of the branch instruction in new_block from
1297  // COND to (COND || GPR0)
1298  //
1299  // In order insert these MOV instruction, we will need to use the
1300  // RegisterScavenger. Usually liveness stops being tracked during
1301  // the late machine optimization passes, however if we implement
1302  // bool TargetRegisterInfo::requiresRegisterScavenging(
1303  // const MachineFunction &MF)
1304  // and have it return true, liveness will be tracked correctly
1305  // by generic optimization passes. We will also need to make sure that
1306  // all of our target-specific passes that run after regalloc and before
1307  // the CFGStructurizer track liveness and we will need to modify this pass
1308  // to correctly track liveness.
1309  //
1310  // After the above changes, the new CFG should look like this:
1311  // entry
1312  // / |
1313  // diamond_head branch_from
1314  // \ /
1315  // new_block
1316  // / |
1317  // diamond_false diamond_true
1318  // \ /
1319  // done
1320  //
1321  // Without this optimization, we are forced to duplicate the diamond_true
1322  // block and we will end up with a CFG like this:
1323  //
1324  // entry
1325  // / |
1326  // diamond_head branch_from
1327  // / \ |
1328  // diamond_false diamond_true diamond_true (duplicate)
1329  // \ / |
1330  // done --------------------|
1331  //
1332  // Duplicating diamond_true can be very costly especially if it has a
1333  // lot of instructions.
1334  return 0;
1335  }
1336 
1337  int NumNewBlk = 0;
1338 
1339  bool LandBlkHasOtherPred = (LandBlk->pred_size() > 2);
1340 
1341  //insert AMDGPU::ENDIF to avoid special case "input landBlk == NULL"
1342  MachineBasicBlock::iterator I = insertInstrBefore(LandBlk, AMDGPU::ENDIF);
1343 
1344  if (LandBlkHasOtherPred) {
1345  report_fatal_error("Extra register needed to handle CFG");
1346  unsigned CmpResReg =
1347  HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC);
1348  report_fatal_error("Extra compare instruction needed to handle CFG");
1349  insertCondBranchBefore(LandBlk, I, AMDGPU::IF_PREDICATE_SET,
1350  CmpResReg, DebugLoc());
1351  }
1352 
1353  // XXX: We are running this after RA, so creating virtual registers will
1354  // cause an assertion failure in the PostRA scheduling pass.
1355  unsigned InitReg =
1356  HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC);
1357  insertCondBranchBefore(LandBlk, I, AMDGPU::IF_PREDICATE_SET, InitReg,
1358  DebugLoc());
1359 
1360  if (MigrateTrue) {
1361  migrateInstruction(TrueMBB, LandBlk, I);
1362  // need to uncondionally insert the assignment to ensure a path from its
1363  // predecessor rather than headBlk has valid value in initReg if
1364  // (initVal != 1).
1365  report_fatal_error("Extra register needed to handle CFG");
1366  }
1367  insertInstrBefore(I, AMDGPU::ELSE);
1368 
1369  if (MigrateFalse) {
1370  migrateInstruction(FalseMBB, LandBlk, I);
1371  // need to uncondionally insert the assignment to ensure a path from its
1372  // predecessor rather than headBlk has valid value in initReg if
1373  // (initVal != 0)
1374  report_fatal_error("Extra register needed to handle CFG");
1375  }
1376 
1377  if (LandBlkHasOtherPred) {
1378  // add endif
1379  insertInstrBefore(I, AMDGPU::ENDIF);
1380 
1381  // put initReg = 2 to other predecessors of landBlk
1382  for (MachineBasicBlock::pred_iterator PI = LandBlk->pred_begin(),
1383  PE = LandBlk->pred_end(); PI != PE; ++PI) {
1384  MachineBasicBlock *MBB = *PI;
1385  if (MBB != TrueMBB && MBB != FalseMBB)
1386  report_fatal_error("Extra register needed to handle CFG");
1387  }
1388  }
1389  DEBUG(
1390  dbgs() << "result from improveSimpleJumpintoIf: ";
1391  showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0);
1392  );
1393 
1394  // update landBlk
1395  *LandMBBPtr = LandBlk;
1396 
1397  return NumNewBlk;
1398 }
1399 
1400 void AMDGPUCFGStructurizer::mergeSerialBlock(MachineBasicBlock *DstMBB,
1401  MachineBasicBlock *SrcMBB) {
1402  DEBUG(
1403  dbgs() << "serialPattern BB" << DstMBB->getNumber()
1404  << " <= BB" << SrcMBB->getNumber() << "\n";
1405  );
1406  DstMBB->splice(DstMBB->end(), SrcMBB, SrcMBB->begin(), SrcMBB->end());
1407 
1408  DstMBB->removeSuccessor(SrcMBB, true);
1409  cloneSuccessorList(DstMBB, SrcMBB);
1410 
1411  removeSuccessor(SrcMBB);
1412  MLI->removeBlock(SrcMBB);
1413  retireBlock(SrcMBB);
1414 }
1415 
1416 void AMDGPUCFGStructurizer::mergeIfthenelseBlock(MachineInstr *BranchMI,
1417  MachineBasicBlock *MBB, MachineBasicBlock *TrueMBB,
1418  MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB) {
1419  assert (TrueMBB);
1420  DEBUG(
1421  dbgs() << "ifPattern BB" << MBB->getNumber();
1422  dbgs() << "{ ";
1423  if (TrueMBB) {
1424  dbgs() << "BB" << TrueMBB->getNumber();
1425  }
1426  dbgs() << " } else ";
1427  dbgs() << "{ ";
1428  if (FalseMBB) {
1429  dbgs() << "BB" << FalseMBB->getNumber();
1430  }
1431  dbgs() << " }\n ";
1432  dbgs() << "landBlock: ";
1433  if (!LandMBB) {
1434  dbgs() << "NULL";
1435  } else {
1436  dbgs() << "BB" << LandMBB->getNumber();
1437  }
1438  dbgs() << "\n";
1439  );
1440 
1441  int OldOpcode = BranchMI->getOpcode();
1442  DebugLoc BranchDL = BranchMI->getDebugLoc();
1443 
1444 // transform to
1445 // if cond
1446 // trueBlk
1447 // else
1448 // falseBlk
1449 // endif
1450 // landBlk
1451 
1452  MachineBasicBlock::iterator I = BranchMI;
1453  insertCondBranchBefore(I, getBranchNzeroOpcode(OldOpcode),
1454  BranchDL);
1455 
1456  if (TrueMBB) {
1457  MBB->splice(I, TrueMBB, TrueMBB->begin(), TrueMBB->end());
1458  MBB->removeSuccessor(TrueMBB, true);
1459  if (LandMBB && TrueMBB->succ_size()!=0)
1460  TrueMBB->removeSuccessor(LandMBB, true);
1461  retireBlock(TrueMBB);
1462  MLI->removeBlock(TrueMBB);
1463  }
1464 
1465  if (FalseMBB) {
1466  insertInstrBefore(I, AMDGPU::ELSE);
1467  MBB->splice(I, FalseMBB, FalseMBB->begin(),
1468  FalseMBB->end());
1469  MBB->removeSuccessor(FalseMBB, true);
1470  if (LandMBB && FalseMBB->succ_size() != 0)
1471  FalseMBB->removeSuccessor(LandMBB, true);
1472  retireBlock(FalseMBB);
1473  MLI->removeBlock(FalseMBB);
1474  }
1475  insertInstrBefore(I, AMDGPU::ENDIF);
1476 
1477  BranchMI->eraseFromParent();
1478 
1479  if (LandMBB && TrueMBB && FalseMBB)
1480  MBB->addSuccessor(LandMBB);
1481 
1482 }
1483 
1484 void AMDGPUCFGStructurizer::mergeLooplandBlock(MachineBasicBlock *DstBlk,
1485  MachineBasicBlock *LandMBB) {
1486  DEBUG(dbgs() << "loopPattern header = BB" << DstBlk->getNumber()
1487  << " land = BB" << LandMBB->getNumber() << "\n";);
1488 
1489  insertInstrBefore(DstBlk, AMDGPU::WHILELOOP, DebugLoc());
1490  insertInstrEnd(DstBlk, AMDGPU::ENDLOOP, DebugLoc());
1491  DstBlk->replaceSuccessor(DstBlk, LandMBB);
1492 }
1493 
1494 
1495 void AMDGPUCFGStructurizer::mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB,
1496  MachineBasicBlock *LandMBB) {
1497  DEBUG(dbgs() << "loopbreakPattern exiting = BB" << ExitingMBB->getNumber()
1498  << " land = BB" << LandMBB->getNumber() << "\n";);
1499  MachineInstr *BranchMI = getLoopendBlockBranchInstr(ExitingMBB);
1500  assert(BranchMI && isCondBranch(BranchMI));
1501  DebugLoc DL = BranchMI->getDebugLoc();
1502  MachineBasicBlock *TrueBranch = getTrueBranch(BranchMI);
1503  MachineBasicBlock::iterator I = BranchMI;
1504  if (TrueBranch != LandMBB)
1505  reversePredicateSetter(I, *I->getParent());
1506  insertCondBranchBefore(ExitingMBB, I, AMDGPU::IF_PREDICATE_SET, AMDGPU::PREDICATE_BIT, DL);
1507  insertInstrBefore(I, AMDGPU::BREAK);
1508  insertInstrBefore(I, AMDGPU::ENDIF);
1509  //now branchInst can be erase safely
1510  BranchMI->eraseFromParent();
1511  //now take care of successors, retire blocks
1512  ExitingMBB->removeSuccessor(LandMBB, true);
1513 }
1514 
1515 void AMDGPUCFGStructurizer::settleLoopcontBlock(MachineBasicBlock *ContingMBB,
1516  MachineBasicBlock *ContMBB) {
1517  DEBUG(dbgs() << "settleLoopcontBlock conting = BB"
1518  << ContingMBB->getNumber()
1519  << ", cont = BB" << ContMBB->getNumber() << "\n";);
1520 
1521  MachineInstr *MI = getLoopendBlockBranchInstr(ContingMBB);
1522  if (MI) {
1523  assert(isCondBranch(MI));
1525  MachineBasicBlock *TrueBranch = getTrueBranch(MI);
1526  int OldOpcode = MI->getOpcode();
1527  DebugLoc DL = MI->getDebugLoc();
1528 
1529  bool UseContinueLogical = ((&*ContingMBB->rbegin()) == MI);
1530 
1531  if (!UseContinueLogical) {
1532  int BranchOpcode =
1533  TrueBranch == ContMBB ? getBranchNzeroOpcode(OldOpcode) :
1534  getBranchZeroOpcode(OldOpcode);
1535  insertCondBranchBefore(I, BranchOpcode, DL);
1536  // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
1537  insertInstrEnd(ContingMBB, AMDGPU::CONTINUE, DL);
1538  insertInstrEnd(ContingMBB, AMDGPU::ENDIF, DL);
1539  } else {
1540  int BranchOpcode =
1541  TrueBranch == ContMBB ? getContinueNzeroOpcode(OldOpcode) :
1542  getContinueZeroOpcode(OldOpcode);
1543  insertCondBranchBefore(I, BranchOpcode, DL);
1544  }
1545 
1546  MI->eraseFromParent();
1547  } else {
1548  // if we've arrived here then we've already erased the branch instruction
1549  // travel back up the basic block to see the last reference of our debug
1550  // location we've just inserted that reference here so it should be
1551  // representative insertEnd to ensure phi-moves, if exist, go before the
1552  // continue-instr.
1553  insertInstrEnd(ContingMBB, AMDGPU::CONTINUE,
1554  getLastDebugLocInBB(ContingMBB));
1555  }
1556 }
1557 
1558 int AMDGPUCFGStructurizer::cloneOnSideEntryTo(MachineBasicBlock *PreMBB,
1559  MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB) {
1560  int Cloned = 0;
1561  assert(PreMBB->isSuccessor(SrcMBB));
1562  while (SrcMBB && SrcMBB != DstMBB) {
1563  assert(SrcMBB->succ_size() == 1);
1564  if (SrcMBB->pred_size() > 1) {
1565  SrcMBB = cloneBlockForPredecessor(SrcMBB, PreMBB);
1566  ++Cloned;
1567  }
1568 
1569  PreMBB = SrcMBB;
1570  SrcMBB = *SrcMBB->succ_begin();
1571  }
1572 
1573  return Cloned;
1574 }
1575 
1577 AMDGPUCFGStructurizer::cloneBlockForPredecessor(MachineBasicBlock *MBB,
1578  MachineBasicBlock *PredMBB) {
1579  assert(PredMBB->isSuccessor(MBB) &&
1580  "succBlk is not a prececessor of curBlk");
1581 
1582  MachineBasicBlock *CloneMBB = clone(MBB); //clone instructions
1583  replaceInstrUseOfBlockWith(PredMBB, MBB, CloneMBB);
1584  //srcBlk, oldBlk, newBlk
1585 
1586  PredMBB->replaceSuccessor(MBB, CloneMBB);
1587 
1588  // add all successor to cloneBlk
1589  cloneSuccessorList(CloneMBB, MBB);
1590 
1591  numClonedInstr += MBB->size();
1592 
1593  DEBUG(
1594  dbgs() << "Cloned block: " << "BB"
1595  << MBB->getNumber() << "size " << MBB->size() << "\n";
1596  );
1597 
1598  SHOWNEWBLK(CloneMBB, "result of Cloned block: ");
1599 
1600  return CloneMBB;
1601 }
1602 
1603 void AMDGPUCFGStructurizer::migrateInstruction(MachineBasicBlock *SrcMBB,
1605  MachineBasicBlock::iterator SpliceEnd;
1606  //look for the input branchinstr, not the AMDGPU branchinstr
1607  MachineInstr *BranchMI = getNormalBlockBranchInstr(SrcMBB);
1608  if (!BranchMI) {
1609  DEBUG(
1610  dbgs() << "migrateInstruction don't see branch instr\n" ;
1611  );
1612  SpliceEnd = SrcMBB->end();
1613  } else {
1614  DEBUG(dbgs() << "migrateInstruction see branch instr: " << *BranchMI);
1615  SpliceEnd = BranchMI;
1616  }
1617  DEBUG(
1618  dbgs() << "migrateInstruction before splice dstSize = " << DstMBB->size()
1619  << "srcSize = " << SrcMBB->size() << "\n";
1620  );
1621 
1622  //splice insert before insertPos
1623  DstMBB->splice(I, SrcMBB, SrcMBB->begin(), SpliceEnd);
1624 
1625  DEBUG(
1626  dbgs() << "migrateInstruction after splice dstSize = " << DstMBB->size()
1627  << "srcSize = " << SrcMBB->size() << '\n';
1628  );
1629 }
1630 
1632 AMDGPUCFGStructurizer::normalizeInfiniteLoopExit(MachineLoop* LoopRep) {
1633  MachineBasicBlock *LoopHeader = LoopRep->getHeader();
1634  MachineBasicBlock *LoopLatch = LoopRep->getLoopLatch();
1635 
1636  if (!LoopHeader || !LoopLatch)
1637  return nullptr;
1638  MachineInstr *BranchMI = getLoopendBlockBranchInstr(LoopLatch);
1639  // Is LoopRep an infinite loop ?
1640  if (!BranchMI || !isUncondBranch(BranchMI))
1641  return nullptr;
1642 
1643  MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock();
1644  FuncRep->push_back(DummyExitBlk); //insert to function
1645  SHOWNEWBLK(DummyExitBlk, "DummyExitBlock to normalize infiniteLoop: ");
1646  DEBUG(dbgs() << "Old branch instr: " << *BranchMI << "\n";);
1647  LLVMContext &Ctx = LoopHeader->getParent()->getFunction()->getContext();
1648  Ctx.emitError("Extra register needed to handle CFG");
1649  return nullptr;
1650 }
1651 
1652 void AMDGPUCFGStructurizer::removeUnconditionalBranch(MachineBasicBlock *MBB) {
1653  MachineInstr *BranchMI;
1654 
1655  // I saw two unconditional branch in one basic block in example
1656  // test_fc_do_while_or.c need to fix the upstream on this to remove the loop.
1657  while ((BranchMI = getLoopendBlockBranchInstr(MBB))
1658  && isUncondBranch(BranchMI)) {
1659  DEBUG(dbgs() << "Removing uncond branch instr: " << *BranchMI);
1660  BranchMI->eraseFromParent();
1661  }
1662 }
1663 
1664 void AMDGPUCFGStructurizer::removeRedundantConditionalBranch(
1665  MachineBasicBlock *MBB) {
1666  if (MBB->succ_size() != 2)
1667  return;
1668  MachineBasicBlock *MBB1 = *MBB->succ_begin();
1669  MachineBasicBlock *MBB2 = *std::next(MBB->succ_begin());
1670  if (MBB1 != MBB2)
1671  return;
1672 
1673  MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB);
1674  assert(BranchMI && isCondBranch(BranchMI));
1675  DEBUG(dbgs() << "Removing unneeded cond branch instr: " << *BranchMI);
1676  BranchMI->eraseFromParent();
1677  SHOWNEWBLK(MBB1, "Removing redundant successor");
1678  MBB->removeSuccessor(MBB1, true);
1679 }
1680 
1681 void AMDGPUCFGStructurizer::addDummyExitBlock(
1683  MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock();
1684  FuncRep->push_back(DummyExitBlk); //insert to function
1685  insertInstrEnd(DummyExitBlk, AMDGPU::RETURN);
1686 
1688  E = RetMBB.end(); It != E; ++It) {
1689  MachineBasicBlock *MBB = *It;
1690  MachineInstr *MI = getReturnInstr(MBB);
1691  if (MI)
1692  MI->eraseFromParent();
1693  MBB->addSuccessor(DummyExitBlk);
1694  DEBUG(
1695  dbgs() << "Add dummyExitBlock to BB" << MBB->getNumber()
1696  << " successors\n";
1697  );
1698  }
1699  SHOWNEWBLK(DummyExitBlk, "DummyExitBlock: ");
1700 }
1701 
1702 void AMDGPUCFGStructurizer::removeSuccessor(MachineBasicBlock *MBB) {
1703  while (MBB->succ_size())
1704  MBB->removeSuccessor(*MBB->succ_begin());
1705 }
1706 
1707 void AMDGPUCFGStructurizer::recordSccnum(MachineBasicBlock *MBB,
1708  int SccNum) {
1709  BlockInformation *&srcBlkInfo = BlockInfoMap[MBB];
1710  if (!srcBlkInfo)
1711  srcBlkInfo = new BlockInformation();
1712  srcBlkInfo->SccNum = SccNum;
1713 }
1714 
1715 void AMDGPUCFGStructurizer::retireBlock(MachineBasicBlock *MBB) {
1716  DEBUG(
1717  dbgs() << "Retiring BB" << MBB->getNumber() << "\n";
1718  );
1719 
1720  BlockInformation *&SrcBlkInfo = BlockInfoMap[MBB];
1721 
1722  if (!SrcBlkInfo)
1723  SrcBlkInfo = new BlockInformation();
1724 
1725  SrcBlkInfo->IsRetired = true;
1726  assert(MBB->succ_size() == 0 && MBB->pred_size() == 0
1727  && "can't retire block yet");
1728 }
1729 
1730 char AMDGPUCFGStructurizer::ID = 0;
1731 
1732 } // end anonymous namespace
1733 
1734 
1735 INITIALIZE_PASS_BEGIN(AMDGPUCFGStructurizer, "amdgpustructurizer",
1736  "AMDGPU CFG Structurizer", false, false)
1740 INITIALIZE_PASS_END(AMDGPUCFGStructurizer, "amdgpustructurizer",
1741  "AMDGPU CFG Structurizer", false, false)
1742 
1744  return new AMDGPUCFGStructurizer();
1745 }
unsigned succ_size() const
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
This builds on the llvm/ADT/GraphTraits.h file to find the strongly connected components (SCCs) of a ...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:226
AMDGPU specific subclass of TargetSubtarget.
STATISTIC(NumFunctions,"Total number of functions")
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
size_t i
Interface definition for R600InstrInfo.
MachineBasicBlock * getMBB() const
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:52
unsigned createVirtualRegister(const TargetRegisterClass *RegClass)
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
AMDGPU CFG Structurizer
void print(raw_ostream &OS, const SlotIndexes *=nullptr) const
LoopT * getParentLoop() const
Definition: LoopInfo.h:103
A debug info location.
Definition: DebugLoc.h:34
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
SmallVector< MachineBasicBlock *, 4 > MBBVector
FunctionPass * createAMDGPUCFGStructurizerPass()
MachineInstr * CreateMachineInstr(const MCInstrDesc &MCID, const DebugLoc &DL, bool NoImp=false)
CreateMachineInstr - Allocate a new MachineInstr.
BlockT * getHeader() const
Definition: LoopInfo.h:102
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:157
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:53
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
#define SHOWNEWINSTR(i)
static F t[256]
void initializeAMDGPUCFGStructurizerPass(PassRegistry &)
iterator begin() const
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:32
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
std::vector< MachineBasicBlock * >::iterator succ_iterator
void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
Definition: LoopInfoImpl.h:36
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:60
INITIALIZE_PASS_BEGIN(AMDGPUCFGStructurizer,"amdgpustructurizer","AMDGPU CFG Structurizer", false, false) INITIALIZE_PASS_END(AMDGPUCFGStructurizer
void getExitBlocks(SmallVectorImpl< BlockT * > &ExitBlocks) const
Return all of the successor blocks of this loop.
Definition: LoopInfoImpl.h:65
scc_iterator< T > scc_begin(const T &G)
Construct the begin iterator for a deduced graph type T.
Definition: SCCIterator.h:226
void emitError(unsigned LocCookie, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
const HexagonRegisterInfo & getRegisterInfo() const
HexagonInstrInfo specifics.
MachineBasicBlock * MBB
const MachineJumpTableInfo * getJumpTableInfo() const
getJumpTableInfo - Return the jump table info object for the current function.
std::vector< MachineBasicBlock * >::iterator pred_iterator
reverse_iterator rend()
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
reverse_iterator rbegin()
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:273
static GCRegistry::Add< CoreCLRGC > E("coreclr","CoreCLR-compatible GC")
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:131
#define INVALIDSCCNUM
size_type size() const
Definition: SmallPtrSet.h:99
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:48
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:115
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:279
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:368
#define SHOWNEWBLK(b, msg)
AMDGPU CFG false
void setMBB(MachineBasicBlock *MBB)
Represent the analysis usage information of a pass.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:109
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE,"Assign register bank of generic virtual registers", false, false) RegBankSelect
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
bool isEmpty() const
isEmpty - Return true if there are no jump tables.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void dump() const
dump - Print the current MachineFunction to cerr, useful for debugger use.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:425
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
CloneMachineInstr - Create a new MachineInstr which is a copy of the 'Orig' instruction, identical in all ways except the instruction has no parent, prev, or next.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:843
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:586
void replaceSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New)
Replace successor OLD with NEW and update probability info.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:250
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
Representation of each machine instruction.
Definition: MachineInstr.h:52
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:119
Flatten the CFG
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
void push_back(MachineInstr *MI)
#define I(x, y, z)
Definition: MD5.cpp:54
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:135
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
iterator_range< df_iterator< T > > depth_first(const T &G)
unsigned getReg() const
getReg - Returns the register number.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void push_back(MachineBasicBlock *MBB)
#define DEBUG(X)
Definition: Debug.h:100
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:47
static uint32_t blk(uint32_t *Buf, int I)
Definition: SHA1.cpp:36
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:40
iterator end() const
bool isValid() const
Check for null.
std::vector< LoopT * >::const_iterator iterator
Definition: LoopInfo.h:129
LoopInfoBase< MachineBasicBlock, MachineLoop >::iterator iterator
The iterator interface to the top-level loops in the current function.
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
unsigned pred_size() const
Enumerate the SCCs of a directed graph in reverse topological order of the SCC DAG.
Definition: SCCIterator.h:43