LLVM  6.0.0svn
AMDILCFGStructurizer.cpp
Go to the documentation of this file.
1 //===- AMDILCFGStructurizer.cpp - CFG Structurizer ------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //==-----------------------------------------------------------------------===//
9 
10 #include "AMDGPU.h"
11 #include "AMDGPUSubtarget.h"
12 #include "R600InstrInfo.h"
13 #include "R600RegisterInfo.h"
15 #include "llvm/ADT/SCCIterator.h"
16 #include "llvm/ADT/SmallPtrSet.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/Statistic.h"
19 #include "llvm/ADT/StringRef.h"
32 #include "llvm/IR/DebugLoc.h"
33 #include "llvm/IR/LLVMContext.h"
34 #include "llvm/Pass.h"
35 #include "llvm/Support/Debug.h"
38 #include <cassert>
39 #include <cstddef>
40 #include <deque>
41 #include <iterator>
42 #include <map>
43 #include <utility>
44 #include <vector>
45 
46 using namespace llvm;
47 
48 #define DEBUG_TYPE "structcfg"
49 
50 #define DEFAULT_VEC_SLOTS 8
51 
52 // TODO: move-begin.
53 
54 //===----------------------------------------------------------------------===//
55 //
56 // Statistics for CFGStructurizer.
57 //
58 //===----------------------------------------------------------------------===//
59 
60 STATISTIC(numSerialPatternMatch, "CFGStructurizer number of serial pattern "
61  "matched");
62 STATISTIC(numIfPatternMatch, "CFGStructurizer number of if pattern "
63  "matched");
64 STATISTIC(numClonedBlock, "CFGStructurizer cloned blocks");
65 STATISTIC(numClonedInstr, "CFGStructurizer cloned instructions");
66 
67 namespace llvm {
68 
70 
71 } // end namespace llvm
72 
73 namespace {
74 
75 //===----------------------------------------------------------------------===//
76 //
77 // Miscellaneous utility for CFGStructurizer.
78 //
79 //===----------------------------------------------------------------------===//
80 
81 #define SHOWNEWINSTR(i) \
82  DEBUG(dbgs() << "New instr: " << *i << "\n");
83 
84 #define SHOWNEWBLK(b, msg) \
85 DEBUG( \
86  dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
87  dbgs() << "\n"; \
88 );
89 
90 #define SHOWBLK_DETAIL(b, msg) \
91 DEBUG( \
92  if (b) { \
93  dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
94  b->print(dbgs()); \
95  dbgs() << "\n"; \
96  } \
97 );
98 
99 #define INVALIDSCCNUM -1
100 
101 //===----------------------------------------------------------------------===//
102 //
103 // supporting data structure for CFGStructurizer
104 //
105 //===----------------------------------------------------------------------===//
106 
107 class BlockInformation {
108 public:
109  bool IsRetired = false;
110  int SccNum = INVALIDSCCNUM;
111 
112  BlockInformation() = default;
113 };
114 
115 //===----------------------------------------------------------------------===//
116 //
117 // CFGStructurizer
118 //
119 //===----------------------------------------------------------------------===//
120 
121 class AMDGPUCFGStructurizer : public MachineFunctionPass {
122 public:
124  using MBBInfoMap = std::map<MachineBasicBlock *, BlockInformation *>;
125  using LoopLandInfoMap = std::map<MachineLoop *, MachineBasicBlock *>;
126 
127  enum PathToKind {
128  Not_SinglePath = 0,
129  SinglePath_InPath = 1,
130  SinglePath_NotInPath = 2
131  };
132 
133  static char ID;
134 
135  AMDGPUCFGStructurizer() : MachineFunctionPass(ID) {
137  }
138 
139  StringRef getPassName() const override {
140  return "AMDGPU Control Flow Graph structurizer Pass";
141  }
142 
143  void getAnalysisUsage(AnalysisUsage &AU) const override {
148  }
149 
150  /// Perform the CFG structurization
151  bool run();
152 
153  /// Perform the CFG preparation
154  /// This step will remove every unconditionnal/dead jump instructions and make
155  /// sure all loops have an exit block
156  bool prepare();
157 
158  bool runOnMachineFunction(MachineFunction &MF) override {
159  TII = MF.getSubtarget<R600Subtarget>().getInstrInfo();
160  TRI = &TII->getRegisterInfo();
161  DEBUG(MF.dump(););
162  OrderedBlks.clear();
163  Visited.clear();
164  FuncRep = &MF;
165  MLI = &getAnalysis<MachineLoopInfo>();
166  DEBUG(dbgs() << "LoopInfo:\n"; PrintLoopinfo(*MLI););
167  MDT = &getAnalysis<MachineDominatorTree>();
168  DEBUG(MDT->print(dbgs(), (const Module*)nullptr););
169  PDT = &getAnalysis<MachinePostDominatorTree>();
170  DEBUG(PDT->print(dbgs()););
171  prepare();
172  run();
173  DEBUG(MF.dump(););
174  return true;
175  }
176 
177 protected:
180  MachineLoopInfo *MLI;
181  const R600InstrInfo *TII = nullptr;
182  const R600RegisterInfo *TRI = nullptr;
183 
184  // PRINT FUNCTIONS
185  /// Print the ordered Blocks.
186  void printOrderedBlocks() const {
187  size_t i = 0;
188  for (MBBVector::const_iterator iterBlk = OrderedBlks.begin(),
189  iterBlkEnd = OrderedBlks.end(); iterBlk != iterBlkEnd; ++iterBlk, ++i) {
190  dbgs() << "BB" << (*iterBlk)->getNumber();
191  dbgs() << "(" << getSCCNum(*iterBlk) << "," << (*iterBlk)->size() << ")";
192  if (i != 0 && i % 10 == 0) {
193  dbgs() << "\n";
194  } else {
195  dbgs() << " ";
196  }
197  }
198  }
199 
200  static void PrintLoopinfo(const MachineLoopInfo &LoopInfo) {
201  for (MachineLoop::iterator iter = LoopInfo.begin(),
202  iterEnd = LoopInfo.end(); iter != iterEnd; ++iter) {
203  (*iter)->print(dbgs(), 0);
204  }
205  }
206 
207  // UTILITY FUNCTIONS
208  int getSCCNum(MachineBasicBlock *MBB) const;
209  MachineBasicBlock *getLoopLandInfo(MachineLoop *LoopRep) const;
210  bool hasBackEdge(MachineBasicBlock *MBB) const;
211  bool isRetiredBlock(MachineBasicBlock *MBB) const;
212  bool isActiveLoophead(MachineBasicBlock *MBB) const;
213  PathToKind singlePathTo(MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB,
214  bool AllowSideEntry = true) const;
215  int countActiveBlock(MBBVector::const_iterator It,
217  bool needMigrateBlock(MachineBasicBlock *MBB) const;
218 
219  // Utility Functions
220  void reversePredicateSetter(MachineBasicBlock::iterator I,
221  MachineBasicBlock &MBB);
222  /// Compute the reversed DFS post order of Blocks
223  void orderBlocks(MachineFunction *MF);
224 
225  // Function originally from CFGStructTraits
226  void insertInstrEnd(MachineBasicBlock *MBB, int NewOpcode,
227  const DebugLoc &DL = DebugLoc());
228  MachineInstr *insertInstrBefore(MachineBasicBlock *MBB, int NewOpcode,
229  const DebugLoc &DL = DebugLoc());
230  MachineInstr *insertInstrBefore(MachineBasicBlock::iterator I, int NewOpcode);
231  void insertCondBranchBefore(MachineBasicBlock::iterator I, int NewOpcode,
232  const DebugLoc &DL);
233  void insertCondBranchBefore(MachineBasicBlock *MBB,
234  MachineBasicBlock::iterator I, int NewOpcode,
235  int RegNum, const DebugLoc &DL);
236 
237  static int getBranchNzeroOpcode(int OldOpcode);
238  static int getBranchZeroOpcode(int OldOpcode);
239  static int getContinueNzeroOpcode(int OldOpcode);
240  static int getContinueZeroOpcode(int OldOpcode);
241  static MachineBasicBlock *getTrueBranch(MachineInstr *MI);
242  static void setTrueBranch(MachineInstr *MI, MachineBasicBlock *MBB);
243  static MachineBasicBlock *getFalseBranch(MachineBasicBlock *MBB,
244  MachineInstr *MI);
245  static bool isCondBranch(MachineInstr *MI);
246  static bool isUncondBranch(MachineInstr *MI);
247  static DebugLoc getLastDebugLocInBB(MachineBasicBlock *MBB);
248  static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *MBB);
249 
250  /// The correct naming for this is getPossibleLoopendBlockBranchInstr.
251  ///
252  /// BB with backward-edge could have move instructions after the branch
253  /// instruction. Such move instruction "belong to" the loop backward-edge.
254  MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *MBB);
255 
256  static MachineInstr *getReturnInstr(MachineBasicBlock *MBB);
257  static bool isReturnBlock(MachineBasicBlock *MBB);
258  static void cloneSuccessorList(MachineBasicBlock *DstMBB,
259  MachineBasicBlock *SrcMBB);
260  static MachineBasicBlock *clone(MachineBasicBlock *MBB);
261 
262  /// MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose
263  /// because the AMDGPU instruction is not recognized as terminator fix this
264  /// and retire this routine
265  void replaceInstrUseOfBlockWith(MachineBasicBlock *SrcMBB,
266  MachineBasicBlock *OldMBB, MachineBasicBlock *NewBlk);
267 
268  static void wrapup(MachineBasicBlock *MBB);
269 
270  int patternMatch(MachineBasicBlock *MBB);
271  int patternMatchGroup(MachineBasicBlock *MBB);
272  int serialPatternMatch(MachineBasicBlock *MBB);
273  int ifPatternMatch(MachineBasicBlock *MBB);
274  int loopendPatternMatch();
275  int mergeLoop(MachineLoop *LoopRep);
276 
277  /// return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in
278  /// the same loop with LoopLandInfo without explicitly keeping track of
279  /// loopContBlks and loopBreakBlks, this is a method to get the information.
280  bool isSameloopDetachedContbreak(MachineBasicBlock *Src1MBB,
281  MachineBasicBlock *Src2MBB);
282  int handleJumpintoIf(MachineBasicBlock *HeadMBB,
283  MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB);
284  int handleJumpintoIfImp(MachineBasicBlock *HeadMBB,
285  MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB);
286  int improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
287  MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB,
288  MachineBasicBlock **LandMBBPtr);
289  void showImproveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
290  MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB,
291  MachineBasicBlock *LandMBB, bool Detail = false);
292  int cloneOnSideEntryTo(MachineBasicBlock *PreMBB,
293  MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB);
294  void mergeSerialBlock(MachineBasicBlock *DstMBB,
295  MachineBasicBlock *SrcMBB);
296 
297  void mergeIfthenelseBlock(MachineInstr *BranchMI,
298  MachineBasicBlock *MBB, MachineBasicBlock *TrueMBB,
299  MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB);
300  void mergeLooplandBlock(MachineBasicBlock *DstMBB,
301  MachineBasicBlock *LandMBB);
302  void mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB,
303  MachineBasicBlock *LandMBB);
304  void settleLoopcontBlock(MachineBasicBlock *ContingMBB,
305  MachineBasicBlock *ContMBB);
306 
307  /// normalizeInfiniteLoopExit change
308  /// B1:
309  /// uncond_br LoopHeader
310  ///
311  /// to
312  /// B1:
313  /// cond_br 1 LoopHeader dummyExit
314  /// and return the newly added dummy exit block
315  MachineBasicBlock *normalizeInfiniteLoopExit(MachineLoop *LoopRep);
316  void removeUnconditionalBranch(MachineBasicBlock *MBB);
317 
318  /// Remove duplicate branches instructions in a block.
319  /// For instance
320  /// B0:
321  /// cond_br X B1 B2
322  /// cond_br X B1 B2
323  /// is transformed to
324  /// B0:
325  /// cond_br X B1 B2
326  void removeRedundantConditionalBranch(MachineBasicBlock *MBB);
327 
328  void addDummyExitBlock(SmallVectorImpl<MachineBasicBlock *> &RetMBB);
329  void removeSuccessor(MachineBasicBlock *MBB);
330  MachineBasicBlock *cloneBlockForPredecessor(MachineBasicBlock *MBB,
331  MachineBasicBlock *PredMBB);
332  void migrateInstruction(MachineBasicBlock *SrcMBB,
334  void recordSccnum(MachineBasicBlock *MBB, int SCCNum);
335  void retireBlock(MachineBasicBlock *MBB);
336 
337 private:
338  MBBInfoMap BlockInfoMap;
339  LoopLandInfoMap LLInfoMap;
340  std::map<MachineLoop *, bool> Visited;
341  MachineFunction *FuncRep;
343 };
344 
345 } // end anonymous namespace
346 
348 
349 int AMDGPUCFGStructurizer::getSCCNum(MachineBasicBlock *MBB) const {
350  MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB);
351  if (It == BlockInfoMap.end())
352  return INVALIDSCCNUM;
353  return (*It).second->SccNum;
354 }
355 
356 MachineBasicBlock *AMDGPUCFGStructurizer::getLoopLandInfo(MachineLoop *LoopRep)
357  const {
358  LoopLandInfoMap::const_iterator It = LLInfoMap.find(LoopRep);
359  if (It == LLInfoMap.end())
360  return nullptr;
361  return (*It).second;
362 }
363 
364 bool AMDGPUCFGStructurizer::hasBackEdge(MachineBasicBlock *MBB) const {
365  MachineLoop *LoopRep = MLI->getLoopFor(MBB);
366  if (!LoopRep)
367  return false;
368  MachineBasicBlock *LoopHeader = LoopRep->getHeader();
369  return MBB->isSuccessor(LoopHeader);
370 }
371 
372 bool AMDGPUCFGStructurizer::isRetiredBlock(MachineBasicBlock *MBB) const {
373  MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB);
374  if (It == BlockInfoMap.end())
375  return false;
376  return (*It).second->IsRetired;
377 }
378 
379 bool AMDGPUCFGStructurizer::isActiveLoophead(MachineBasicBlock *MBB) const {
380  MachineLoop *LoopRep = MLI->getLoopFor(MBB);
381  while (LoopRep && LoopRep->getHeader() == MBB) {
382  MachineBasicBlock *LoopLand = getLoopLandInfo(LoopRep);
383  if(!LoopLand)
384  return true;
385  if (!isRetiredBlock(LoopLand))
386  return true;
387  LoopRep = LoopRep->getParentLoop();
388  }
389  return false;
390 }
391 
392 AMDGPUCFGStructurizer::PathToKind AMDGPUCFGStructurizer::singlePathTo(
393  MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB,
394  bool AllowSideEntry) const {
395  assert(DstMBB);
396  if (SrcMBB == DstMBB)
397  return SinglePath_InPath;
398  while (SrcMBB && SrcMBB->succ_size() == 1) {
399  SrcMBB = *SrcMBB->succ_begin();
400  if (SrcMBB == DstMBB)
401  return SinglePath_InPath;
402  if (!AllowSideEntry && SrcMBB->pred_size() > 1)
403  return Not_SinglePath;
404  }
405  if (SrcMBB && SrcMBB->succ_size()==0)
406  return SinglePath_NotInPath;
407  return Not_SinglePath;
408 }
409 
410 int AMDGPUCFGStructurizer::countActiveBlock(MBBVector::const_iterator It,
412  int Count = 0;
413  while (It != E) {
414  if (!isRetiredBlock(*It))
415  ++Count;
416  ++It;
417  }
418  return Count;
419 }
420 
421 bool AMDGPUCFGStructurizer::needMigrateBlock(MachineBasicBlock *MBB) const {
422  unsigned BlockSizeThreshold = 30;
423  unsigned CloneInstrThreshold = 100;
424  bool MultiplePreds = MBB && (MBB->pred_size() > 1);
425 
426  if(!MultiplePreds)
427  return false;
428  unsigned BlkSize = MBB->size();
429  return ((BlkSize > BlockSizeThreshold) &&
430  (BlkSize * (MBB->pred_size() - 1) > CloneInstrThreshold));
431 }
432 
433 void AMDGPUCFGStructurizer::reversePredicateSetter(
435  assert(I.isValid() && "Expected valid iterator");
436  for (;; --I) {
437  if (I == MBB.end())
438  continue;
439  if (I->getOpcode() == AMDGPU::PRED_X) {
440  switch (I->getOperand(2).getImm()) {
441  case AMDGPU::PRED_SETE_INT:
442  I->getOperand(2).setImm(AMDGPU::PRED_SETNE_INT);
443  return;
444  case AMDGPU::PRED_SETNE_INT:
445  I->getOperand(2).setImm(AMDGPU::PRED_SETE_INT);
446  return;
447  case AMDGPU::PRED_SETE:
448  I->getOperand(2).setImm(AMDGPU::PRED_SETNE);
449  return;
450  case AMDGPU::PRED_SETNE:
451  I->getOperand(2).setImm(AMDGPU::PRED_SETE);
452  return;
453  default:
454  llvm_unreachable("PRED_X Opcode invalid!");
455  }
456  }
457  }
458 }
459 
460 void AMDGPUCFGStructurizer::insertInstrEnd(MachineBasicBlock *MBB,
461  int NewOpcode, const DebugLoc &DL) {
462  MachineInstr *MI =
463  MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DL);
464  MBB->push_back(MI);
465  //assume the instruction doesn't take any reg operand ...
466  SHOWNEWINSTR(MI);
467 }
468 
469 MachineInstr *AMDGPUCFGStructurizer::insertInstrBefore(MachineBasicBlock *MBB,
470  int NewOpcode,
471  const DebugLoc &DL) {
472  MachineInstr *MI =
473  MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DL);
474  if (MBB->begin() != MBB->end())
475  MBB->insert(MBB->begin(), MI);
476  else
477  MBB->push_back(MI);
478  SHOWNEWINSTR(MI);
479  return MI;
480 }
481 
482 MachineInstr *AMDGPUCFGStructurizer::insertInstrBefore(
483  MachineBasicBlock::iterator I, int NewOpcode) {
484  MachineInstr *OldMI = &(*I);
485  MachineBasicBlock *MBB = OldMI->getParent();
486  MachineInstr *NewMBB =
487  MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DebugLoc());
488  MBB->insert(I, NewMBB);
489  //assume the instruction doesn't take any reg operand ...
490  SHOWNEWINSTR(NewMBB);
491  return NewMBB;
492 }
493 
494 void AMDGPUCFGStructurizer::insertCondBranchBefore(
495  MachineBasicBlock::iterator I, int NewOpcode, const DebugLoc &DL) {
496  MachineInstr *OldMI = &(*I);
497  MachineBasicBlock *MBB = OldMI->getParent();
498  MachineFunction *MF = MBB->getParent();
499  MachineInstr *NewMI = MF->CreateMachineInstr(TII->get(NewOpcode), DL);
500  MBB->insert(I, NewMI);
501  MachineInstrBuilder MIB(*MF, NewMI);
502  MIB.addReg(OldMI->getOperand(1).getReg(), false);
503  SHOWNEWINSTR(NewMI);
504  //erase later oldInstr->eraseFromParent();
505 }
506 
507 void AMDGPUCFGStructurizer::insertCondBranchBefore(
509  int RegNum, const DebugLoc &DL) {
510  MachineFunction *MF = blk->getParent();
511  MachineInstr *NewInstr = MF->CreateMachineInstr(TII->get(NewOpcode), DL);
512  //insert before
513  blk->insert(I, NewInstr);
514  MachineInstrBuilder(*MF, NewInstr).addReg(RegNum, false);
515  SHOWNEWINSTR(NewInstr);
516 }
517 
518 int AMDGPUCFGStructurizer::getBranchNzeroOpcode(int OldOpcode) {
519  switch(OldOpcode) {
520  case AMDGPU::JUMP_COND:
521  case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
522  case AMDGPU::BRANCH_COND_i32:
523  case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALNZ_f32;
524  default: llvm_unreachable("internal error");
525  }
526  return -1;
527 }
528 
529 int AMDGPUCFGStructurizer::getBranchZeroOpcode(int OldOpcode) {
530  switch(OldOpcode) {
531  case AMDGPU::JUMP_COND:
532  case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
533  case AMDGPU::BRANCH_COND_i32:
534  case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALZ_f32;
535  default: llvm_unreachable("internal error");
536  }
537  return -1;
538 }
539 
540 int AMDGPUCFGStructurizer::getContinueNzeroOpcode(int OldOpcode) {
541  switch(OldOpcode) {
542  case AMDGPU::JUMP_COND:
543  case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALNZ_i32;
544  default: llvm_unreachable("internal error");
545  }
546  return -1;
547 }
548 
549 int AMDGPUCFGStructurizer::getContinueZeroOpcode(int OldOpcode) {
550  switch(OldOpcode) {
551  case AMDGPU::JUMP_COND:
552  case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALZ_i32;
553  default: llvm_unreachable("internal error");
554  }
555  return -1;
556 }
557 
558 MachineBasicBlock *AMDGPUCFGStructurizer::getTrueBranch(MachineInstr *MI) {
559  return MI->getOperand(0).getMBB();
560 }
561 
562 void AMDGPUCFGStructurizer::setTrueBranch(MachineInstr *MI,
563  MachineBasicBlock *MBB) {
564  MI->getOperand(0).setMBB(MBB);
565 }
566 
568 AMDGPUCFGStructurizer::getFalseBranch(MachineBasicBlock *MBB,
569  MachineInstr *MI) {
570  assert(MBB->succ_size() == 2);
571  MachineBasicBlock *TrueBranch = getTrueBranch(MI);
574  ++Next;
575  return (*It == TrueBranch) ? *Next : *It;
576 }
577 
578 bool AMDGPUCFGStructurizer::isCondBranch(MachineInstr *MI) {
579  switch (MI->getOpcode()) {
580  case AMDGPU::JUMP_COND:
581  case AMDGPU::BRANCH_COND_i32:
582  case AMDGPU::BRANCH_COND_f32: return true;
583  default:
584  return false;
585  }
586  return false;
587 }
588 
589 bool AMDGPUCFGStructurizer::isUncondBranch(MachineInstr *MI) {
590  switch (MI->getOpcode()) {
591  case AMDGPU::JUMP:
592  case AMDGPU::BRANCH:
593  return true;
594  default:
595  return false;
596  }
597  return false;
598 }
599 
600 DebugLoc AMDGPUCFGStructurizer::getLastDebugLocInBB(MachineBasicBlock *MBB) {
601  //get DebugLoc from the first MachineBasicBlock instruction with debug info
602  DebugLoc DL;
603  for (MachineBasicBlock::iterator It = MBB->begin(); It != MBB->end();
604  ++It) {
605  MachineInstr *instr = &(*It);
606  if (instr->getDebugLoc())
607  DL = instr->getDebugLoc();
608  }
609  return DL;
610 }
611 
612 MachineInstr *AMDGPUCFGStructurizer::getNormalBlockBranchInstr(
613  MachineBasicBlock *MBB) {
615  MachineInstr *MI = &*It;
616  if (MI && (isCondBranch(MI) || isUncondBranch(MI)))
617  return MI;
618  return nullptr;
619 }
620 
621 MachineInstr *AMDGPUCFGStructurizer::getLoopendBlockBranchInstr(
622  MachineBasicBlock *MBB) {
623  for (MachineBasicBlock::reverse_iterator It = MBB->rbegin(), E = MBB->rend();
624  It != E; ++It) {
625  // FIXME: Simplify
626  MachineInstr *MI = &*It;
627  if (MI) {
628  if (isCondBranch(MI) || isUncondBranch(MI))
629  return MI;
630  else if (!TII->isMov(MI->getOpcode()))
631  break;
632  }
633  }
634  return nullptr;
635 }
636 
637 MachineInstr *AMDGPUCFGStructurizer::getReturnInstr(MachineBasicBlock *MBB) {
639  if (It != MBB->rend()) {
640  MachineInstr *instr = &(*It);
641  if (instr->getOpcode() == AMDGPU::RETURN)
642  return instr;
643  }
644  return nullptr;
645 }
646 
647 bool AMDGPUCFGStructurizer::isReturnBlock(MachineBasicBlock *MBB) {
648  MachineInstr *MI = getReturnInstr(MBB);
649  bool IsReturn = (MBB->succ_size() == 0);
650  if (MI)
651  assert(IsReturn);
652  else if (IsReturn)
653  DEBUG(
654  dbgs() << "BB" << MBB->getNumber()
655  <<" is return block without RETURN instr\n";);
656  return IsReturn;
657 }
658 
659 void AMDGPUCFGStructurizer::cloneSuccessorList(MachineBasicBlock *DstMBB,
660  MachineBasicBlock *SrcMBB) {
661  for (MachineBasicBlock::succ_iterator It = SrcMBB->succ_begin(),
662  iterEnd = SrcMBB->succ_end(); It != iterEnd; ++It)
663  DstMBB->addSuccessor(*It); // *iter's predecessor is also taken care of
664 }
665 
666 MachineBasicBlock *AMDGPUCFGStructurizer::clone(MachineBasicBlock *MBB) {
667  MachineFunction *Func = MBB->getParent();
668  MachineBasicBlock *NewMBB = Func->CreateMachineBasicBlock();
669  Func->push_back(NewMBB); //insert to function
670  for (const MachineInstr &It : *MBB)
671  NewMBB->push_back(Func->CloneMachineInstr(&It));
672  return NewMBB;
673 }
674 
675 void AMDGPUCFGStructurizer::replaceInstrUseOfBlockWith(
676  MachineBasicBlock *SrcMBB, MachineBasicBlock *OldMBB,
677  MachineBasicBlock *NewBlk) {
678  MachineInstr *BranchMI = getLoopendBlockBranchInstr(SrcMBB);
679  if (BranchMI && isCondBranch(BranchMI) &&
680  getTrueBranch(BranchMI) == OldMBB)
681  setTrueBranch(BranchMI, NewBlk);
682 }
683 
684 void AMDGPUCFGStructurizer::wrapup(MachineBasicBlock *MBB) {
685  assert((!MBB->getParent()->getJumpTableInfo()
686  || MBB->getParent()->getJumpTableInfo()->isEmpty())
687  && "found a jump table");
688 
689  //collect continue right before endloop
691  MachineBasicBlock::iterator Pre = MBB->begin();
694  while (It != E) {
695  if (Pre->getOpcode() == AMDGPU::CONTINUE
696  && It->getOpcode() == AMDGPU::ENDLOOP)
697  ContInstr.push_back(&*Pre);
698  Pre = It;
699  ++It;
700  }
701 
702  //delete continue right before endloop
703  for (unsigned i = 0; i < ContInstr.size(); ++i)
704  ContInstr[i]->eraseFromParent();
705 
706  // TODO to fix up jump table so later phase won't be confused. if
707  // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but
708  // there isn't such an interface yet. alternatively, replace all the other
709  // blocks in the jump table with the entryBlk //}
710 }
711 
712 bool AMDGPUCFGStructurizer::prepare() {
713  bool Changed = false;
714 
715  //FIXME: if not reducible flow graph, make it so ???
716 
717  DEBUG(dbgs() << "AMDGPUCFGStructurizer::prepare\n";);
718 
719  orderBlocks(FuncRep);
720 
722 
723  // Add an ExitBlk to loop that don't have one
724  for (MachineLoopInfo::iterator It = MLI->begin(),
725  E = MLI->end(); It != E; ++It) {
726  MachineLoop *LoopRep = (*It);
727  MBBVector ExitingMBBs;
728  LoopRep->getExitingBlocks(ExitingMBBs);
729 
730  if (ExitingMBBs.size() == 0) {
731  MachineBasicBlock* DummyExitBlk = normalizeInfiniteLoopExit(LoopRep);
732  if (DummyExitBlk)
733  RetBlks.push_back(DummyExitBlk);
734  }
735  }
736 
737  // Remove unconditional branch instr.
738  // Add dummy exit block iff there are multiple returns.
740  It = OrderedBlks.begin(), E = OrderedBlks.end(); It != E; ++It) {
741  MachineBasicBlock *MBB = *It;
742  removeUnconditionalBranch(MBB);
743  removeRedundantConditionalBranch(MBB);
744  if (isReturnBlock(MBB)) {
745  RetBlks.push_back(MBB);
746  }
747  assert(MBB->succ_size() <= 2);
748  }
749 
750  if (RetBlks.size() >= 2) {
751  addDummyExitBlock(RetBlks);
752  Changed = true;
753  }
754 
755  return Changed;
756 }
757 
758 bool AMDGPUCFGStructurizer::run() {
759  //Assume reducible CFG...
760  DEBUG(dbgs() << "AMDGPUCFGStructurizer::run\n");
761 
762 #ifdef STRESSTEST
763  //Use the worse block ordering to test the algorithm.
764  ReverseVector(orderedBlks);
765 #endif
766 
767  DEBUG(dbgs() << "Ordered blocks:\n"; printOrderedBlocks(););
768  int NumIter = 0;
769  bool Finish = false;
770  MachineBasicBlock *MBB;
771  bool MakeProgress = false;
772  int NumRemainedBlk = countActiveBlock(OrderedBlks.begin(),
773  OrderedBlks.end());
774 
775  do {
776  ++NumIter;
777  DEBUG(
778  dbgs() << "numIter = " << NumIter
779  << ", numRemaintedBlk = " << NumRemainedBlk << "\n";
780  );
781 
783  OrderedBlks.begin();
785  OrderedBlks.end();
786 
788  It;
789  MachineBasicBlock *SccBeginMBB = nullptr;
790  int SccNumBlk = 0; // The number of active blocks, init to a
791  // maximum possible number.
792  int SccNumIter; // Number of iteration in this SCC.
793 
794  while (It != E) {
795  MBB = *It;
796 
797  if (!SccBeginMBB) {
798  SccBeginIter = It;
799  SccBeginMBB = MBB;
800  SccNumIter = 0;
801  SccNumBlk = NumRemainedBlk; // Init to maximum possible number.
802  DEBUG(
803  dbgs() << "start processing SCC" << getSCCNum(SccBeginMBB);
804  dbgs() << "\n";
805  );
806  }
807 
808  if (!isRetiredBlock(MBB))
809  patternMatch(MBB);
810 
811  ++It;
812 
813  bool ContNextScc = true;
814  if (It == E
815  || getSCCNum(SccBeginMBB) != getSCCNum(*It)) {
816  // Just finish one scc.
817  ++SccNumIter;
818  int sccRemainedNumBlk = countActiveBlock(SccBeginIter, It);
819  if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= SccNumBlk) {
820  DEBUG(
821  dbgs() << "Can't reduce SCC " << getSCCNum(MBB)
822  << ", sccNumIter = " << SccNumIter;
823  dbgs() << "doesn't make any progress\n";
824  );
825  ContNextScc = true;
826  } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < SccNumBlk) {
827  SccNumBlk = sccRemainedNumBlk;
828  It = SccBeginIter;
829  ContNextScc = false;
830  DEBUG(
831  dbgs() << "repeat processing SCC" << getSCCNum(MBB)
832  << "sccNumIter = " << SccNumIter << '\n';
833  );
834  } else {
835  // Finish the current scc.
836  ContNextScc = true;
837  }
838  } else {
839  // Continue on next component in the current scc.
840  ContNextScc = false;
841  }
842 
843  if (ContNextScc)
844  SccBeginMBB = nullptr;
845  } //while, "one iteration" over the function.
846 
847  MachineBasicBlock *EntryMBB =
849  if (EntryMBB->succ_size() == 0) {
850  Finish = true;
851  DEBUG(
852  dbgs() << "Reduce to one block\n";
853  );
854  } else {
855  int NewnumRemainedBlk
856  = countActiveBlock(OrderedBlks.begin(), OrderedBlks.end());
857  // consider cloned blocks ??
858  if (NewnumRemainedBlk == 1 || NewnumRemainedBlk < NumRemainedBlk) {
859  MakeProgress = true;
860  NumRemainedBlk = NewnumRemainedBlk;
861  } else {
862  MakeProgress = false;
863  DEBUG(
864  dbgs() << "No progress\n";
865  );
866  }
867  }
868  } while (!Finish && MakeProgress);
869 
870  // Misc wrap up to maintain the consistency of the Function representation.
872 
873  // Detach retired Block, release memory.
874  for (MBBInfoMap::iterator It = BlockInfoMap.begin(), E = BlockInfoMap.end();
875  It != E; ++It) {
876  if ((*It).second && (*It).second->IsRetired) {
877  assert(((*It).first)->getNumber() != -1);
878  DEBUG(
879  dbgs() << "Erase BB" << ((*It).first)->getNumber() << "\n";
880  );
881  (*It).first->eraseFromParent(); //Remove from the parent Function.
882  }
883  delete (*It).second;
884  }
885  BlockInfoMap.clear();
886  LLInfoMap.clear();
887 
888  if (!Finish) {
889  DEBUG(FuncRep->viewCFG());
890  report_fatal_error("IRREDUCIBLE_CFG");
891  }
892 
893  return true;
894 }
895 
896 void AMDGPUCFGStructurizer::orderBlocks(MachineFunction *MF) {
897  int SccNum = 0;
898  MachineBasicBlock *MBB;
899  for (scc_iterator<MachineFunction *> It = scc_begin(MF); !It.isAtEnd();
900  ++It, ++SccNum) {
901  const std::vector<MachineBasicBlock *> &SccNext = *It;
902  for (std::vector<MachineBasicBlock *>::const_iterator
903  blockIter = SccNext.begin(), blockEnd = SccNext.end();
904  blockIter != blockEnd; ++blockIter) {
905  MBB = *blockIter;
906  OrderedBlks.push_back(MBB);
907  recordSccnum(MBB, SccNum);
908  }
909  }
910 
911  // walk through all the block in func to check for unreachable
912  for (auto *MBB : nodes(MF)) {
913  SccNum = getSCCNum(MBB);
914  if (SccNum == INVALIDSCCNUM)
915  dbgs() << "unreachable block BB" << MBB->getNumber() << "\n";
916  }
917 }
918 
919 int AMDGPUCFGStructurizer::patternMatch(MachineBasicBlock *MBB) {
920  int NumMatch = 0;
921  int CurMatch;
922 
923  DEBUG(
924  dbgs() << "Begin patternMatch BB" << MBB->getNumber() << "\n";
925  );
926 
927  while ((CurMatch = patternMatchGroup(MBB)) > 0)
928  NumMatch += CurMatch;
929 
930  DEBUG(
931  dbgs() << "End patternMatch BB" << MBB->getNumber()
932  << ", numMatch = " << NumMatch << "\n";
933  );
934 
935  return NumMatch;
936 }
937 
938 int AMDGPUCFGStructurizer::patternMatchGroup(MachineBasicBlock *MBB) {
939  int NumMatch = 0;
940  NumMatch += loopendPatternMatch();
941  NumMatch += serialPatternMatch(MBB);
942  NumMatch += ifPatternMatch(MBB);
943  return NumMatch;
944 }
945 
946 int AMDGPUCFGStructurizer::serialPatternMatch(MachineBasicBlock *MBB) {
947  if (MBB->succ_size() != 1)
948  return 0;
949 
950  MachineBasicBlock *childBlk = *MBB->succ_begin();
951  if (childBlk->pred_size() != 1 || isActiveLoophead(childBlk))
952  return 0;
953 
954  mergeSerialBlock(MBB, childBlk);
955  ++numSerialPatternMatch;
956  return 1;
957 }
958 
959 int AMDGPUCFGStructurizer::ifPatternMatch(MachineBasicBlock *MBB) {
960  //two edges
961  if (MBB->succ_size() != 2)
962  return 0;
963  if (hasBackEdge(MBB))
964  return 0;
965  MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB);
966  if (!BranchMI)
967  return 0;
968 
969  assert(isCondBranch(BranchMI));
970  int NumMatch = 0;
971 
972  MachineBasicBlock *TrueMBB = getTrueBranch(BranchMI);
973  NumMatch += serialPatternMatch(TrueMBB);
974  NumMatch += ifPatternMatch(TrueMBB);
975  MachineBasicBlock *FalseMBB = getFalseBranch(MBB, BranchMI);
976  NumMatch += serialPatternMatch(FalseMBB);
977  NumMatch += ifPatternMatch(FalseMBB);
978  MachineBasicBlock *LandBlk;
979  int Cloned = 0;
980 
981  assert (!TrueMBB->succ_empty() || !FalseMBB->succ_empty());
982  // TODO: Simplify
983  if (TrueMBB->succ_size() == 1 && FalseMBB->succ_size() == 1
984  && *TrueMBB->succ_begin() == *FalseMBB->succ_begin()) {
985  // Diamond pattern
986  LandBlk = *TrueMBB->succ_begin();
987  } else if (TrueMBB->succ_size() == 1 && *TrueMBB->succ_begin() == FalseMBB) {
988  // Triangle pattern, false is empty
989  LandBlk = FalseMBB;
990  FalseMBB = nullptr;
991  } else if (FalseMBB->succ_size() == 1
992  && *FalseMBB->succ_begin() == TrueMBB) {
993  // Triangle pattern, true is empty
994  // We reverse the predicate to make a triangle, empty false pattern;
995  std::swap(TrueMBB, FalseMBB);
996  reversePredicateSetter(MBB->end(), *MBB);
997  LandBlk = FalseMBB;
998  FalseMBB = nullptr;
999  } else if (FalseMBB->succ_size() == 1
1000  && isSameloopDetachedContbreak(TrueMBB, FalseMBB)) {
1001  LandBlk = *FalseMBB->succ_begin();
1002  } else if (TrueMBB->succ_size() == 1
1003  && isSameloopDetachedContbreak(FalseMBB, TrueMBB)) {
1004  LandBlk = *TrueMBB->succ_begin();
1005  } else {
1006  return NumMatch + handleJumpintoIf(MBB, TrueMBB, FalseMBB);
1007  }
1008 
1009  // improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the
1010  // new BB created for landBlk==NULL may introduce new challenge to the
1011  // reduction process.
1012  if (LandBlk &&
1013  ((TrueMBB && TrueMBB->pred_size() > 1)
1014  || (FalseMBB && FalseMBB->pred_size() > 1))) {
1015  Cloned += improveSimpleJumpintoIf(MBB, TrueMBB, FalseMBB, &LandBlk);
1016  }
1017 
1018  if (TrueMBB && TrueMBB->pred_size() > 1) {
1019  TrueMBB = cloneBlockForPredecessor(TrueMBB, MBB);
1020  ++Cloned;
1021  }
1022 
1023  if (FalseMBB && FalseMBB->pred_size() > 1) {
1024  FalseMBB = cloneBlockForPredecessor(FalseMBB, MBB);
1025  ++Cloned;
1026  }
1027 
1028  mergeIfthenelseBlock(BranchMI, MBB, TrueMBB, FalseMBB, LandBlk);
1029 
1030  ++numIfPatternMatch;
1031 
1032  numClonedBlock += Cloned;
1033 
1034  return 1 + Cloned + NumMatch;
1035 }
1036 
1037 int AMDGPUCFGStructurizer::loopendPatternMatch() {
1038  std::deque<MachineLoop *> NestedLoops;
1039  for (auto &It: *MLI)
1040  for (MachineLoop *ML : depth_first(It))
1041  NestedLoops.push_front(ML);
1042 
1043  if (NestedLoops.empty())
1044  return 0;
1045 
1046  // Process nested loop outside->inside (we did push_front),
1047  // so "continue" to a outside loop won't be mistaken as "break"
1048  // of the current loop.
1049  int Num = 0;
1050  for (MachineLoop *ExaminedLoop : NestedLoops) {
1051  if (ExaminedLoop->getNumBlocks() == 0 || Visited[ExaminedLoop])
1052  continue;
1053  DEBUG(dbgs() << "Processing:\n"; ExaminedLoop->dump(););
1054  int NumBreak = mergeLoop(ExaminedLoop);
1055  if (NumBreak == -1)
1056  break;
1057  Num += NumBreak;
1058  }
1059  return Num;
1060 }
1061 
1062 int AMDGPUCFGStructurizer::mergeLoop(MachineLoop *LoopRep) {
1063  MachineBasicBlock *LoopHeader = LoopRep->getHeader();
1064  MBBVector ExitingMBBs;
1065  LoopRep->getExitingBlocks(ExitingMBBs);
1066  assert(!ExitingMBBs.empty() && "Infinite Loop not supported");
1067  DEBUG(dbgs() << "Loop has " << ExitingMBBs.size() << " exiting blocks\n";);
1068  // We assume a single ExitBlk
1069  MBBVector ExitBlks;
1070  LoopRep->getExitBlocks(ExitBlks);
1072  for (unsigned i = 0, e = ExitBlks.size(); i < e; ++i)
1073  ExitBlkSet.insert(ExitBlks[i]);
1074  assert(ExitBlkSet.size() == 1);
1075  MachineBasicBlock *ExitBlk = *ExitBlks.begin();
1076  assert(ExitBlk && "Loop has several exit block");
1077  MBBVector LatchBlks;
1078  for (auto *LB : inverse_children<MachineBasicBlock*>(LoopHeader))
1079  if (LoopRep->contains(LB))
1080  LatchBlks.push_back(LB);
1081 
1082  for (unsigned i = 0, e = ExitingMBBs.size(); i < e; ++i)
1083  mergeLoopbreakBlock(ExitingMBBs[i], ExitBlk);
1084  for (unsigned i = 0, e = LatchBlks.size(); i < e; ++i)
1085  settleLoopcontBlock(LatchBlks[i], LoopHeader);
1086  int Match = 0;
1087  do {
1088  Match = 0;
1089  Match += serialPatternMatch(LoopHeader);
1090  Match += ifPatternMatch(LoopHeader);
1091  } while (Match > 0);
1092  mergeLooplandBlock(LoopHeader, ExitBlk);
1093  MachineLoop *ParentLoop = LoopRep->getParentLoop();
1094  if (ParentLoop)
1095  MLI->changeLoopFor(LoopHeader, ParentLoop);
1096  else
1097  MLI->removeBlock(LoopHeader);
1098  Visited[LoopRep] = true;
1099  return 1;
1100 }
1101 
1102 bool AMDGPUCFGStructurizer::isSameloopDetachedContbreak(
1103  MachineBasicBlock *Src1MBB, MachineBasicBlock *Src2MBB) {
1104  if (Src1MBB->succ_size() == 0) {
1105  MachineLoop *LoopRep = MLI->getLoopFor(Src1MBB);
1106  if (LoopRep&& LoopRep == MLI->getLoopFor(Src2MBB)) {
1107  MachineBasicBlock *&TheEntry = LLInfoMap[LoopRep];
1108  if (TheEntry) {
1109  DEBUG(
1110  dbgs() << "isLoopContBreakBlock yes src1 = BB"
1111  << Src1MBB->getNumber()
1112  << " src2 = BB" << Src2MBB->getNumber() << "\n";
1113  );
1114  return true;
1115  }
1116  }
1117  }
1118  return false;
1119 }
1120 
1121 int AMDGPUCFGStructurizer::handleJumpintoIf(MachineBasicBlock *HeadMBB,
1122  MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) {
1123  int Num = handleJumpintoIfImp(HeadMBB, TrueMBB, FalseMBB);
1124  if (Num == 0) {
1125  DEBUG(
1126  dbgs() << "handleJumpintoIf swap trueBlk and FalseBlk" << "\n";
1127  );
1128  Num = handleJumpintoIfImp(HeadMBB, FalseMBB, TrueMBB);
1129  }
1130  return Num;
1131 }
1132 
1133 int AMDGPUCFGStructurizer::handleJumpintoIfImp(MachineBasicBlock *HeadMBB,
1134  MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) {
1135  int Num = 0;
1136  MachineBasicBlock *DownBlk;
1137 
1138  //trueBlk could be the common post dominator
1139  DownBlk = TrueMBB;
1140 
1141  DEBUG(
1142  dbgs() << "handleJumpintoIfImp head = BB" << HeadMBB->getNumber()
1143  << " true = BB" << TrueMBB->getNumber()
1144  << ", numSucc=" << TrueMBB->succ_size()
1145  << " false = BB" << FalseMBB->getNumber() << "\n";
1146  );
1147 
1148  while (DownBlk) {
1149  DEBUG(
1150  dbgs() << "check down = BB" << DownBlk->getNumber();
1151  );
1152 
1153  if (singlePathTo(FalseMBB, DownBlk) == SinglePath_InPath) {
1154  DEBUG(
1155  dbgs() << " working\n";
1156  );
1157 
1158  Num += cloneOnSideEntryTo(HeadMBB, TrueMBB, DownBlk);
1159  Num += cloneOnSideEntryTo(HeadMBB, FalseMBB, DownBlk);
1160 
1161  numClonedBlock += Num;
1162  Num += serialPatternMatch(*HeadMBB->succ_begin());
1163  Num += serialPatternMatch(*std::next(HeadMBB->succ_begin()));
1164  Num += ifPatternMatch(HeadMBB);
1165  assert(Num > 0);
1166 
1167  break;
1168  }
1169  DEBUG(
1170  dbgs() << " not working\n";
1171  );
1172  DownBlk = (DownBlk->succ_size() == 1) ? (*DownBlk->succ_begin()) : nullptr;
1173  } // walk down the postDomTree
1174 
1175  return Num;
1176 }
1177 
1178 #ifndef NDEBUG
1179 void AMDGPUCFGStructurizer::showImproveSimpleJumpintoIf(
1180  MachineBasicBlock *HeadMBB, MachineBasicBlock *TrueMBB,
1181  MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB, bool Detail) {
1182  dbgs() << "head = BB" << HeadMBB->getNumber()
1183  << " size = " << HeadMBB->size();
1184  if (Detail) {
1185  dbgs() << "\n";
1186  HeadMBB->print(dbgs());
1187  dbgs() << "\n";
1188  }
1189 
1190  if (TrueMBB) {
1191  dbgs() << ", true = BB" << TrueMBB->getNumber() << " size = "
1192  << TrueMBB->size() << " numPred = " << TrueMBB->pred_size();
1193  if (Detail) {
1194  dbgs() << "\n";
1195  TrueMBB->print(dbgs());
1196  dbgs() << "\n";
1197  }
1198  }
1199  if (FalseMBB) {
1200  dbgs() << ", false = BB" << FalseMBB->getNumber() << " size = "
1201  << FalseMBB->size() << " numPred = " << FalseMBB->pred_size();
1202  if (Detail) {
1203  dbgs() << "\n";
1204  FalseMBB->print(dbgs());
1205  dbgs() << "\n";
1206  }
1207  }
1208  if (LandMBB) {
1209  dbgs() << ", land = BB" << LandMBB->getNumber() << " size = "
1210  << LandMBB->size() << " numPred = " << LandMBB->pred_size();
1211  if (Detail) {
1212  dbgs() << "\n";
1213  LandMBB->print(dbgs());
1214  dbgs() << "\n";
1215  }
1216  }
1217 
1218  dbgs() << "\n";
1219 }
1220 #endif
1221 
1222 int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
1223  MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB,
1224  MachineBasicBlock **LandMBBPtr) {
1225  bool MigrateTrue = false;
1226  bool MigrateFalse = false;
1227 
1228  MachineBasicBlock *LandBlk = *LandMBBPtr;
1229 
1230  assert((!TrueMBB || TrueMBB->succ_size() <= 1)
1231  && (!FalseMBB || FalseMBB->succ_size() <= 1));
1232 
1233  if (TrueMBB == FalseMBB)
1234  return 0;
1235 
1236  MigrateTrue = needMigrateBlock(TrueMBB);
1237  MigrateFalse = needMigrateBlock(FalseMBB);
1238 
1239  if (!MigrateTrue && !MigrateFalse)
1240  return 0;
1241 
1242  // If we need to migrate either trueBlk and falseBlk, migrate the rest that
1243  // have more than one predecessors. without doing this, its predecessor
1244  // rather than headBlk will have undefined value in initReg.
1245  if (!MigrateTrue && TrueMBB && TrueMBB->pred_size() > 1)
1246  MigrateTrue = true;
1247  if (!MigrateFalse && FalseMBB && FalseMBB->pred_size() > 1)
1248  MigrateFalse = true;
1249 
1250  DEBUG(
1251  dbgs() << "before improveSimpleJumpintoIf: ";
1252  showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0);
1253  );
1254 
1255  // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk
1256  //
1257  // new: headBlk => if () {initReg = 1; org trueBlk branch} else
1258  // {initReg = 0; org falseBlk branch }
1259  // => landBlk => if (initReg) {org trueBlk} else {org falseBlk}
1260  // => org landBlk
1261  // if landBlk->pred_size() > 2, put the about if-else inside
1262  // if (initReg !=2) {...}
1263  //
1264  // add initReg = initVal to headBlk
1265 
1266  const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
1267  if (!MigrateTrue || !MigrateFalse) {
1268  // XXX: We have an opportunity here to optimize the "branch into if" case
1269  // here. Branch into if looks like this:
1270  // entry
1271  // / |
1272  // diamond_head branch_from
1273  // / \ |
1274  // diamond_false diamond_true
1275  // \ /
1276  // done
1277  //
1278  // The diamond_head block begins the "if" and the diamond_true block
1279  // is the block being "branched into".
1280  //
1281  // If MigrateTrue is true, then TrueBB is the block being "branched into"
1282  // and if MigrateFalse is true, then FalseBB is the block being
1283  // "branched into"
1284  //
1285  // Here is the pseudo code for how I think the optimization should work:
1286  // 1. Insert MOV GPR0, 0 before the branch instruction in diamond_head.
1287  // 2. Insert MOV GPR0, 1 before the branch instruction in branch_from.
1288  // 3. Move the branch instruction from diamond_head into its own basic
1289  // block (new_block).
1290  // 4. Add an unconditional branch from diamond_head to new_block
1291  // 5. Replace the branch instruction in branch_from with an unconditional
1292  // branch to new_block. If branch_from has multiple predecessors, then
1293  // we need to replace the True/False block in the branch
1294  // instruction instead of replacing it.
1295  // 6. Change the condition of the branch instruction in new_block from
1296  // COND to (COND || GPR0)
1297  //
1298  // In order insert these MOV instruction, we will need to use the
1299  // RegisterScavenger. Usually liveness stops being tracked during
1300  // the late machine optimization passes, however if we implement
1301  // bool TargetRegisterInfo::requiresRegisterScavenging(
1302  // const MachineFunction &MF)
1303  // and have it return true, liveness will be tracked correctly
1304  // by generic optimization passes. We will also need to make sure that
1305  // all of our target-specific passes that run after regalloc and before
1306  // the CFGStructurizer track liveness and we will need to modify this pass
1307  // to correctly track liveness.
1308  //
1309  // After the above changes, the new CFG should look like this:
1310  // entry
1311  // / |
1312  // diamond_head branch_from
1313  // \ /
1314  // new_block
1315  // / |
1316  // diamond_false diamond_true
1317  // \ /
1318  // done
1319  //
1320  // Without this optimization, we are forced to duplicate the diamond_true
1321  // block and we will end up with a CFG like this:
1322  //
1323  // entry
1324  // / |
1325  // diamond_head branch_from
1326  // / \ |
1327  // diamond_false diamond_true diamond_true (duplicate)
1328  // \ / |
1329  // done --------------------|
1330  //
1331  // Duplicating diamond_true can be very costly especially if it has a
1332  // lot of instructions.
1333  return 0;
1334  }
1335 
1336  int NumNewBlk = 0;
1337 
1338  bool LandBlkHasOtherPred = (LandBlk->pred_size() > 2);
1339 
1340  //insert AMDGPU::ENDIF to avoid special case "input landBlk == NULL"
1341  MachineBasicBlock::iterator I = insertInstrBefore(LandBlk, AMDGPU::ENDIF);
1342 
1343  if (LandBlkHasOtherPred) {
1344  report_fatal_error("Extra register needed to handle CFG");
1345  unsigned CmpResReg =
1346  HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC);
1347  report_fatal_error("Extra compare instruction needed to handle CFG");
1348  insertCondBranchBefore(LandBlk, I, AMDGPU::IF_PREDICATE_SET,
1349  CmpResReg, DebugLoc());
1350  }
1351 
1352  // XXX: We are running this after RA, so creating virtual registers will
1353  // cause an assertion failure in the PostRA scheduling pass.
1354  unsigned InitReg =
1355  HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC);
1356  insertCondBranchBefore(LandBlk, I, AMDGPU::IF_PREDICATE_SET, InitReg,
1357  DebugLoc());
1358 
1359  if (MigrateTrue) {
1360  migrateInstruction(TrueMBB, LandBlk, I);
1361  // need to uncondionally insert the assignment to ensure a path from its
1362  // predecessor rather than headBlk has valid value in initReg if
1363  // (initVal != 1).
1364  report_fatal_error("Extra register needed to handle CFG");
1365  }
1366  insertInstrBefore(I, AMDGPU::ELSE);
1367 
1368  if (MigrateFalse) {
1369  migrateInstruction(FalseMBB, LandBlk, I);
1370  // need to uncondionally insert the assignment to ensure a path from its
1371  // predecessor rather than headBlk has valid value in initReg if
1372  // (initVal != 0)
1373  report_fatal_error("Extra register needed to handle CFG");
1374  }
1375 
1376  if (LandBlkHasOtherPred) {
1377  // add endif
1378  insertInstrBefore(I, AMDGPU::ENDIF);
1379 
1380  // put initReg = 2 to other predecessors of landBlk
1381  for (MachineBasicBlock::pred_iterator PI = LandBlk->pred_begin(),
1382  PE = LandBlk->pred_end(); PI != PE; ++PI) {
1383  MachineBasicBlock *MBB = *PI;
1384  if (MBB != TrueMBB && MBB != FalseMBB)
1385  report_fatal_error("Extra register needed to handle CFG");
1386  }
1387  }
1388  DEBUG(
1389  dbgs() << "result from improveSimpleJumpintoIf: ";
1390  showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0);
1391  );
1392 
1393  // update landBlk
1394  *LandMBBPtr = LandBlk;
1395 
1396  return NumNewBlk;
1397 }
1398 
1399 void AMDGPUCFGStructurizer::mergeSerialBlock(MachineBasicBlock *DstMBB,
1400  MachineBasicBlock *SrcMBB) {
1401  DEBUG(
1402  dbgs() << "serialPattern BB" << DstMBB->getNumber()
1403  << " <= BB" << SrcMBB->getNumber() << "\n";
1404  );
1405  DstMBB->splice(DstMBB->end(), SrcMBB, SrcMBB->begin(), SrcMBB->end());
1406 
1407  DstMBB->removeSuccessor(SrcMBB, true);
1408  cloneSuccessorList(DstMBB, SrcMBB);
1409 
1410  removeSuccessor(SrcMBB);
1411  MLI->removeBlock(SrcMBB);
1412  retireBlock(SrcMBB);
1413 }
1414 
1415 void AMDGPUCFGStructurizer::mergeIfthenelseBlock(MachineInstr *BranchMI,
1416  MachineBasicBlock *MBB, MachineBasicBlock *TrueMBB,
1417  MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB) {
1418  assert (TrueMBB);
1419  DEBUG(
1420  dbgs() << "ifPattern BB" << MBB->getNumber();
1421  dbgs() << "{ ";
1422  if (TrueMBB) {
1423  dbgs() << "BB" << TrueMBB->getNumber();
1424  }
1425  dbgs() << " } else ";
1426  dbgs() << "{ ";
1427  if (FalseMBB) {
1428  dbgs() << "BB" << FalseMBB->getNumber();
1429  }
1430  dbgs() << " }\n ";
1431  dbgs() << "landBlock: ";
1432  if (!LandMBB) {
1433  dbgs() << "NULL";
1434  } else {
1435  dbgs() << "BB" << LandMBB->getNumber();
1436  }
1437  dbgs() << "\n";
1438  );
1439 
1440  int OldOpcode = BranchMI->getOpcode();
1441  DebugLoc BranchDL = BranchMI->getDebugLoc();
1442 
1443 // transform to
1444 // if cond
1445 // trueBlk
1446 // else
1447 // falseBlk
1448 // endif
1449 // landBlk
1450 
1451  MachineBasicBlock::iterator I = BranchMI;
1452  insertCondBranchBefore(I, getBranchNzeroOpcode(OldOpcode),
1453  BranchDL);
1454 
1455  if (TrueMBB) {
1456  MBB->splice(I, TrueMBB, TrueMBB->begin(), TrueMBB->end());
1457  MBB->removeSuccessor(TrueMBB, true);
1458  if (LandMBB && TrueMBB->succ_size()!=0)
1459  TrueMBB->removeSuccessor(LandMBB, true);
1460  retireBlock(TrueMBB);
1461  MLI->removeBlock(TrueMBB);
1462  }
1463 
1464  if (FalseMBB) {
1465  insertInstrBefore(I, AMDGPU::ELSE);
1466  MBB->splice(I, FalseMBB, FalseMBB->begin(),
1467  FalseMBB->end());
1468  MBB->removeSuccessor(FalseMBB, true);
1469  if (LandMBB && FalseMBB->succ_size() != 0)
1470  FalseMBB->removeSuccessor(LandMBB, true);
1471  retireBlock(FalseMBB);
1472  MLI->removeBlock(FalseMBB);
1473  }
1474  insertInstrBefore(I, AMDGPU::ENDIF);
1475 
1476  BranchMI->eraseFromParent();
1477 
1478  if (LandMBB && TrueMBB && FalseMBB)
1479  MBB->addSuccessor(LandMBB);
1480 }
1481 
1482 void AMDGPUCFGStructurizer::mergeLooplandBlock(MachineBasicBlock *DstBlk,
1483  MachineBasicBlock *LandMBB) {
1484  DEBUG(dbgs() << "loopPattern header = BB" << DstBlk->getNumber()
1485  << " land = BB" << LandMBB->getNumber() << "\n";);
1486 
1487  insertInstrBefore(DstBlk, AMDGPU::WHILELOOP, DebugLoc());
1488  insertInstrEnd(DstBlk, AMDGPU::ENDLOOP, DebugLoc());
1489  DstBlk->replaceSuccessor(DstBlk, LandMBB);
1490 }
1491 
1492 void AMDGPUCFGStructurizer::mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB,
1493  MachineBasicBlock *LandMBB) {
1494  DEBUG(dbgs() << "loopbreakPattern exiting = BB" << ExitingMBB->getNumber()
1495  << " land = BB" << LandMBB->getNumber() << "\n";);
1496  MachineInstr *BranchMI = getLoopendBlockBranchInstr(ExitingMBB);
1497  assert(BranchMI && isCondBranch(BranchMI));
1498  DebugLoc DL = BranchMI->getDebugLoc();
1499  MachineBasicBlock *TrueBranch = getTrueBranch(BranchMI);
1500  MachineBasicBlock::iterator I = BranchMI;
1501  if (TrueBranch != LandMBB)
1502  reversePredicateSetter(I, *I->getParent());
1503  insertCondBranchBefore(ExitingMBB, I, AMDGPU::IF_PREDICATE_SET, AMDGPU::PREDICATE_BIT, DL);
1504  insertInstrBefore(I, AMDGPU::BREAK);
1505  insertInstrBefore(I, AMDGPU::ENDIF);
1506  //now branchInst can be erase safely
1507  BranchMI->eraseFromParent();
1508  //now take care of successors, retire blocks
1509  ExitingMBB->removeSuccessor(LandMBB, true);
1510 }
1511 
1512 void AMDGPUCFGStructurizer::settleLoopcontBlock(MachineBasicBlock *ContingMBB,
1513  MachineBasicBlock *ContMBB) {
1514  DEBUG(dbgs() << "settleLoopcontBlock conting = BB"
1515  << ContingMBB->getNumber()
1516  << ", cont = BB" << ContMBB->getNumber() << "\n";);
1517 
1518  MachineInstr *MI = getLoopendBlockBranchInstr(ContingMBB);
1519  if (MI) {
1520  assert(isCondBranch(MI));
1522  MachineBasicBlock *TrueBranch = getTrueBranch(MI);
1523  int OldOpcode = MI->getOpcode();
1524  DebugLoc DL = MI->getDebugLoc();
1525 
1526  bool UseContinueLogical = ((&*ContingMBB->rbegin()) == MI);
1527 
1528  if (!UseContinueLogical) {
1529  int BranchOpcode =
1530  TrueBranch == ContMBB ? getBranchNzeroOpcode(OldOpcode) :
1531  getBranchZeroOpcode(OldOpcode);
1532  insertCondBranchBefore(I, BranchOpcode, DL);
1533  // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
1534  insertInstrEnd(ContingMBB, AMDGPU::CONTINUE, DL);
1535  insertInstrEnd(ContingMBB, AMDGPU::ENDIF, DL);
1536  } else {
1537  int BranchOpcode =
1538  TrueBranch == ContMBB ? getContinueNzeroOpcode(OldOpcode) :
1539  getContinueZeroOpcode(OldOpcode);
1540  insertCondBranchBefore(I, BranchOpcode, DL);
1541  }
1542 
1543  MI->eraseFromParent();
1544  } else {
1545  // if we've arrived here then we've already erased the branch instruction
1546  // travel back up the basic block to see the last reference of our debug
1547  // location we've just inserted that reference here so it should be
1548  // representative insertEnd to ensure phi-moves, if exist, go before the
1549  // continue-instr.
1550  insertInstrEnd(ContingMBB, AMDGPU::CONTINUE,
1551  getLastDebugLocInBB(ContingMBB));
1552  }
1553 }
1554 
1555 int AMDGPUCFGStructurizer::cloneOnSideEntryTo(MachineBasicBlock *PreMBB,
1556  MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB) {
1557  int Cloned = 0;
1558  assert(PreMBB->isSuccessor(SrcMBB));
1559  while (SrcMBB && SrcMBB != DstMBB) {
1560  assert(SrcMBB->succ_size() == 1);
1561  if (SrcMBB->pred_size() > 1) {
1562  SrcMBB = cloneBlockForPredecessor(SrcMBB, PreMBB);
1563  ++Cloned;
1564  }
1565 
1566  PreMBB = SrcMBB;
1567  SrcMBB = *SrcMBB->succ_begin();
1568  }
1569 
1570  return Cloned;
1571 }
1572 
1574 AMDGPUCFGStructurizer::cloneBlockForPredecessor(MachineBasicBlock *MBB,
1575  MachineBasicBlock *PredMBB) {
1576  assert(PredMBB->isSuccessor(MBB) &&
1577  "succBlk is not a prececessor of curBlk");
1578 
1579  MachineBasicBlock *CloneMBB = clone(MBB); //clone instructions
1580  replaceInstrUseOfBlockWith(PredMBB, MBB, CloneMBB);
1581  //srcBlk, oldBlk, newBlk
1582 
1583  PredMBB->replaceSuccessor(MBB, CloneMBB);
1584 
1585  // add all successor to cloneBlk
1586  cloneSuccessorList(CloneMBB, MBB);
1587 
1588  numClonedInstr += MBB->size();
1589 
1590  DEBUG(
1591  dbgs() << "Cloned block: " << "BB"
1592  << MBB->getNumber() << "size " << MBB->size() << "\n";
1593  );
1594 
1595  SHOWNEWBLK(CloneMBB, "result of Cloned block: ");
1596 
1597  return CloneMBB;
1598 }
1599 
1600 void AMDGPUCFGStructurizer::migrateInstruction(MachineBasicBlock *SrcMBB,
1602  MachineBasicBlock::iterator SpliceEnd;
1603  //look for the input branchinstr, not the AMDGPU branchinstr
1604  MachineInstr *BranchMI = getNormalBlockBranchInstr(SrcMBB);
1605  if (!BranchMI) {
1606  DEBUG(
1607  dbgs() << "migrateInstruction don't see branch instr\n";
1608  );
1609  SpliceEnd = SrcMBB->end();
1610  } else {
1611  DEBUG(dbgs() << "migrateInstruction see branch instr: " << *BranchMI);
1612  SpliceEnd = BranchMI;
1613  }
1614  DEBUG(
1615  dbgs() << "migrateInstruction before splice dstSize = " << DstMBB->size()
1616  << "srcSize = " << SrcMBB->size() << "\n";
1617  );
1618 
1619  //splice insert before insertPos
1620  DstMBB->splice(I, SrcMBB, SrcMBB->begin(), SpliceEnd);
1621 
1622  DEBUG(
1623  dbgs() << "migrateInstruction after splice dstSize = " << DstMBB->size()
1624  << "srcSize = " << SrcMBB->size() << '\n';
1625  );
1626 }
1627 
1629 AMDGPUCFGStructurizer::normalizeInfiniteLoopExit(MachineLoop* LoopRep) {
1630  MachineBasicBlock *LoopHeader = LoopRep->getHeader();
1631  MachineBasicBlock *LoopLatch = LoopRep->getLoopLatch();
1632 
1633  if (!LoopHeader || !LoopLatch)
1634  return nullptr;
1635  MachineInstr *BranchMI = getLoopendBlockBranchInstr(LoopLatch);
1636  // Is LoopRep an infinite loop ?
1637  if (!BranchMI || !isUncondBranch(BranchMI))
1638  return nullptr;
1639 
1640  MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock();
1641  FuncRep->push_back(DummyExitBlk); //insert to function
1642  SHOWNEWBLK(DummyExitBlk, "DummyExitBlock to normalize infiniteLoop: ");
1643  DEBUG(dbgs() << "Old branch instr: " << *BranchMI << "\n";);
1644  LLVMContext &Ctx = LoopHeader->getParent()->getFunction()->getContext();
1645  Ctx.emitError("Extra register needed to handle CFG");
1646  return nullptr;
1647 }
1648 
1649 void AMDGPUCFGStructurizer::removeUnconditionalBranch(MachineBasicBlock *MBB) {
1650  MachineInstr *BranchMI;
1651 
1652  // I saw two unconditional branch in one basic block in example
1653  // test_fc_do_while_or.c need to fix the upstream on this to remove the loop.
1654  while ((BranchMI = getLoopendBlockBranchInstr(MBB))
1655  && isUncondBranch(BranchMI)) {
1656  DEBUG(dbgs() << "Removing uncond branch instr: " << *BranchMI);
1657  BranchMI->eraseFromParent();
1658  }
1659 }
1660 
1661 void AMDGPUCFGStructurizer::removeRedundantConditionalBranch(
1662  MachineBasicBlock *MBB) {
1663  if (MBB->succ_size() != 2)
1664  return;
1665  MachineBasicBlock *MBB1 = *MBB->succ_begin();
1666  MachineBasicBlock *MBB2 = *std::next(MBB->succ_begin());
1667  if (MBB1 != MBB2)
1668  return;
1669 
1670  MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB);
1671  assert(BranchMI && isCondBranch(BranchMI));
1672  DEBUG(dbgs() << "Removing unneeded cond branch instr: " << *BranchMI);
1673  BranchMI->eraseFromParent();
1674  SHOWNEWBLK(MBB1, "Removing redundant successor");
1675  MBB->removeSuccessor(MBB1, true);
1676 }
1677 
1678 void AMDGPUCFGStructurizer::addDummyExitBlock(
1680  MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock();
1681  FuncRep->push_back(DummyExitBlk); //insert to function
1682  insertInstrEnd(DummyExitBlk, AMDGPU::RETURN);
1683 
1685  E = RetMBB.end(); It != E; ++It) {
1686  MachineBasicBlock *MBB = *It;
1687  MachineInstr *MI = getReturnInstr(MBB);
1688  if (MI)
1689  MI->eraseFromParent();
1690  MBB->addSuccessor(DummyExitBlk);
1691  DEBUG(
1692  dbgs() << "Add dummyExitBlock to BB" << MBB->getNumber()
1693  << " successors\n";
1694  );
1695  }
1696  SHOWNEWBLK(DummyExitBlk, "DummyExitBlock: ");
1697 }
1698 
1699 void AMDGPUCFGStructurizer::removeSuccessor(MachineBasicBlock *MBB) {
1700  while (MBB->succ_size())
1701  MBB->removeSuccessor(*MBB->succ_begin());
1702 }
1703 
1704 void AMDGPUCFGStructurizer::recordSccnum(MachineBasicBlock *MBB,
1705  int SccNum) {
1706  BlockInformation *&srcBlkInfo = BlockInfoMap[MBB];
1707  if (!srcBlkInfo)
1708  srcBlkInfo = new BlockInformation();
1709  srcBlkInfo->SccNum = SccNum;
1710 }
1711 
1712 void AMDGPUCFGStructurizer::retireBlock(MachineBasicBlock *MBB) {
1713  DEBUG(
1714  dbgs() << "Retiring BB" << MBB->getNumber() << "\n";
1715  );
1716 
1717  BlockInformation *&SrcBlkInfo = BlockInfoMap[MBB];
1718 
1719  if (!SrcBlkInfo)
1720  SrcBlkInfo = new BlockInformation();
1721 
1722  SrcBlkInfo->IsRetired = true;
1723  assert(MBB->succ_size() == 0 && MBB->pred_size() == 0
1724  && "can't retire block yet");
1725 }
1726 
1727 INITIALIZE_PASS_BEGIN(AMDGPUCFGStructurizer, "amdgpustructurizer",
1728  "AMDGPU CFG Structurizer", false, false)
1732 INITIALIZE_PASS_END(AMDGPUCFGStructurizer, "amdgpustructurizer",
1734 
1736  return new AMDGPUCFGStructurizer();
1737 }
INITIALIZE_PASS_BEGIN(AMDGPUCFGStructurizer, "amdgpustructurizer", "AMDGPU CFG Structurizer", false, false) INITIALIZE_PASS_END(AMDGPUCFGStructurizer
This builds on the llvm/ADT/GraphTraits.h file to find the strongly connected components (SCCs) of a ...
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:157
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
AMDGPU specific subclass of TargetSubtarget.
typename SuperClass::const_iterator const_iterator
Definition: SmallVector.h:329
MachineBasicBlock * getMBB() const
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:103
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
Interface definition for R600InstrInfo.
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:63
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:136
unsigned createVirtualRegister(const TargetRegisterClass *RegClass)
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:257
AMDGPU CFG Structurizer
unsigned getReg() const
getReg - Returns the register number.
STATISTIC(NumFunctions, "Total number of functions")
A debug info location.
Definition: DebugLoc.h:34
FunctionPass * createAMDGPUCFGStructurizerPass()
Interface definition for R600RegisterInfo.
void dump() const
dump - Print the current MachineFunction to cerr, useful for debugger use.
MachineInstr * CreateMachineInstr(const MCInstrDesc &MCID, const DebugLoc &DL, bool NoImp=false)
CreateMachineInstr - Allocate a new MachineInstr.
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
#define SHOWNEWINSTR(i)
void initializeAMDGPUCFGStructurizerPass(PassRegistry &)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:279
BlockT * getHeader() const
Definition: LoopInfo.h:107
scc_iterator< T > scc_begin(const T &G)
Construct the begin iterator for a deduced graph type T.
Definition: SCCIterator.h:226
void getExitBlocks(SmallVectorImpl< BlockT *> &ExitBlocks) const
Return all of the successor blocks of this loop.
Definition: LoopInfoImpl.h:63
void emitError(unsigned LocCookie, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
std::vector< MachineLoop *>::const_iterator iterator
Definition: LoopInfo.h:146
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they&#39;re not in a MachineFuncti...
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
reverse_iterator rend()
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
reverse_iterator rbegin()
iterator begin() const
#define INVALIDSCCNUM
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:69
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:116
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:363
#define SHOWNEWBLK(b, msg)
void setMBB(MachineBasicBlock *MBB)
Represent the analysis usage information of a pass.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
void print(raw_ostream &OS, const SlotIndexes *=nullptr) const
std::vector< MachineBasicBlock * >::iterator pred_iterator
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:194
void getExitingBlocks(SmallVectorImpl< BlockT *> &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
Definition: LoopInfoImpl.h:35
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
size_type size() const
Definition: SmallPtrSet.h:92
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:117
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:410
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:864
unsigned pred_size() const
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:923
typename SuperClass::iterator iterator
Definition: SmallVector.h:328
void replaceSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New)
Replace successor OLD with NEW and update probability info.
iterator end() const
unsigned succ_size() const
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:139
LoopT * getParentLoop() const
Definition: LoopInfo.h:108
Representation of each machine instruction.
Definition: MachineInstr.h:59
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:120
Flatten the CFG
iterator_range< typename GraphTraits< GraphType >::nodes_iterator > nodes(const GraphType &G)
Definition: GraphTraits.h:89
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB &#39;Other&#39; at the position From, and insert it into this MBB right before &#39;...
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:61
void push_back(MachineInstr *MI)
#define I(x, y, z)
Definition: MD5.cpp:58
LoopInfoBase< MachineBasicBlock, MachineLoop >::iterator iterator
The iterator interface to the top-level loops in the current function.
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
iterator_range< df_iterator< T > > depth_first(const T &G)
bool isEmpty() const
isEmpty - Return true if there are no jump tables.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void push_back(MachineBasicBlock *MBB)
#define DEBUG(X)
Definition: Debug.h:118
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
static uint32_t blk(uint32_t *Buf, int I)
Definition: SHA1.cpp:36
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:39
const MachineJumpTableInfo * getJumpTableInfo() const
getJumpTableInfo - Return the jump table info object for the current function.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:284
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
std::vector< MachineBasicBlock * >::iterator succ_iterator
Enumerate the SCCs of a directed graph in reverse topological order of the SCC DAG.
Definition: SCCIterator.h:43