LLVM  7.0.0svn
AMDILCFGStructurizer.cpp
Go to the documentation of this file.
1 //===- AMDILCFGStructurizer.cpp - CFG Structurizer ------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //==-----------------------------------------------------------------------===//
9 
10 #include "AMDGPU.h"
11 #include "AMDGPUSubtarget.h"
12 #include "R600InstrInfo.h"
13 #include "R600RegisterInfo.h"
16 #include "llvm/ADT/SCCIterator.h"
17 #include "llvm/ADT/SmallPtrSet.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/Statistic.h"
20 #include "llvm/ADT/StringRef.h"
32 #include "llvm/IR/DebugLoc.h"
33 #include "llvm/IR/LLVMContext.h"
34 #include "llvm/Pass.h"
35 #include "llvm/Support/Debug.h"
39 #include <cassert>
40 #include <cstddef>
41 #include <deque>
42 #include <iterator>
43 #include <map>
44 #include <utility>
45 #include <vector>
46 
47 using namespace llvm;
48 
49 #define DEBUG_TYPE "structcfg"
50 
51 #define DEFAULT_VEC_SLOTS 8
52 
53 // TODO: move-begin.
54 
55 //===----------------------------------------------------------------------===//
56 //
57 // Statistics for CFGStructurizer.
58 //
59 //===----------------------------------------------------------------------===//
60 
61 STATISTIC(numSerialPatternMatch, "CFGStructurizer number of serial pattern "
62  "matched");
63 STATISTIC(numIfPatternMatch, "CFGStructurizer number of if pattern "
64  "matched");
65 STATISTIC(numClonedBlock, "CFGStructurizer cloned blocks");
66 STATISTIC(numClonedInstr, "CFGStructurizer cloned instructions");
67 
68 namespace llvm {
69 
71 
72 } // end namespace llvm
73 
74 namespace {
75 
76 //===----------------------------------------------------------------------===//
77 //
78 // Miscellaneous utility for CFGStructurizer.
79 //
80 //===----------------------------------------------------------------------===//
81 
82 #define SHOWNEWINSTR(i) LLVM_DEBUG(dbgs() << "New instr: " << *i << "\n");
83 
84 #define SHOWNEWBLK(b, msg) \
85  LLVM_DEBUG(dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
86  dbgs() << "\n";);
87 
88 #define SHOWBLK_DETAIL(b, msg) \
89  LLVM_DEBUG(if (b) { \
90  dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
91  b->print(dbgs()); \
92  dbgs() << "\n"; \
93  });
94 
95 #define INVALIDSCCNUM -1
96 
97 //===----------------------------------------------------------------------===//
98 //
99 // supporting data structure for CFGStructurizer
100 //
101 //===----------------------------------------------------------------------===//
102 
103 class BlockInformation {
104 public:
105  bool IsRetired = false;
106  int SccNum = INVALIDSCCNUM;
107 
108  BlockInformation() = default;
109 };
110 
111 //===----------------------------------------------------------------------===//
112 //
113 // CFGStructurizer
114 //
115 //===----------------------------------------------------------------------===//
116 
117 class AMDGPUCFGStructurizer : public MachineFunctionPass {
118 public:
120  using MBBInfoMap = std::map<MachineBasicBlock *, BlockInformation *>;
121  using LoopLandInfoMap = std::map<MachineLoop *, MachineBasicBlock *>;
122 
123  enum PathToKind {
124  Not_SinglePath = 0,
125  SinglePath_InPath = 1,
126  SinglePath_NotInPath = 2
127  };
128 
129  static char ID;
130 
131  AMDGPUCFGStructurizer() : MachineFunctionPass(ID) {
133  }
134 
135  StringRef getPassName() const override {
136  return "AMDGPU Control Flow Graph structurizer Pass";
137  }
138 
139  void getAnalysisUsage(AnalysisUsage &AU) const override {
144  }
145 
146  /// Perform the CFG structurization
147  bool run();
148 
149  /// Perform the CFG preparation
150  /// This step will remove every unconditionnal/dead jump instructions and make
151  /// sure all loops have an exit block
152  bool prepare();
153 
154  bool runOnMachineFunction(MachineFunction &MF) override {
155  TII = MF.getSubtarget<R600Subtarget>().getInstrInfo();
156  TRI = &TII->getRegisterInfo();
157  LLVM_DEBUG(MF.dump(););
158  OrderedBlks.clear();
159  Visited.clear();
160  FuncRep = &MF;
161  MLI = &getAnalysis<MachineLoopInfo>();
162  LLVM_DEBUG(dbgs() << "LoopInfo:\n"; PrintLoopinfo(*MLI););
163  MDT = &getAnalysis<MachineDominatorTree>();
164  LLVM_DEBUG(MDT->print(dbgs(), (const Module *)nullptr););
165  PDT = &getAnalysis<MachinePostDominatorTree>();
166  LLVM_DEBUG(PDT->print(dbgs()););
167  prepare();
168  run();
169  LLVM_DEBUG(MF.dump(););
170  return true;
171  }
172 
173 protected:
176  MachineLoopInfo *MLI;
177  const R600InstrInfo *TII = nullptr;
178  const R600RegisterInfo *TRI = nullptr;
179 
180  // PRINT FUNCTIONS
181  /// Print the ordered Blocks.
182  void printOrderedBlocks() const {
183  size_t i = 0;
184  for (MBBVector::const_iterator iterBlk = OrderedBlks.begin(),
185  iterBlkEnd = OrderedBlks.end(); iterBlk != iterBlkEnd; ++iterBlk, ++i) {
186  dbgs() << "BB" << (*iterBlk)->getNumber();
187  dbgs() << "(" << getSCCNum(*iterBlk) << "," << (*iterBlk)->size() << ")";
188  if (i != 0 && i % 10 == 0) {
189  dbgs() << "\n";
190  } else {
191  dbgs() << " ";
192  }
193  }
194  }
195 
196  static void PrintLoopinfo(const MachineLoopInfo &LoopInfo) {
197  for (MachineLoop::iterator iter = LoopInfo.begin(),
198  iterEnd = LoopInfo.end(); iter != iterEnd; ++iter) {
199  (*iter)->print(dbgs(), 0);
200  }
201  }
202 
203  // UTILITY FUNCTIONS
204  int getSCCNum(MachineBasicBlock *MBB) const;
205  MachineBasicBlock *getLoopLandInfo(MachineLoop *LoopRep) const;
206  bool hasBackEdge(MachineBasicBlock *MBB) const;
207  bool isRetiredBlock(MachineBasicBlock *MBB) const;
208  bool isActiveLoophead(MachineBasicBlock *MBB) const;
209  PathToKind singlePathTo(MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB,
210  bool AllowSideEntry = true) const;
211  int countActiveBlock(MBBVector::const_iterator It,
213  bool needMigrateBlock(MachineBasicBlock *MBB) const;
214 
215  // Utility Functions
216  void reversePredicateSetter(MachineBasicBlock::iterator I,
217  MachineBasicBlock &MBB);
218  /// Compute the reversed DFS post order of Blocks
219  void orderBlocks(MachineFunction *MF);
220 
221  // Function originally from CFGStructTraits
222  void insertInstrEnd(MachineBasicBlock *MBB, int NewOpcode,
223  const DebugLoc &DL = DebugLoc());
224  MachineInstr *insertInstrBefore(MachineBasicBlock *MBB, int NewOpcode,
225  const DebugLoc &DL = DebugLoc());
226  MachineInstr *insertInstrBefore(MachineBasicBlock::iterator I, int NewOpcode);
227  void insertCondBranchBefore(MachineBasicBlock::iterator I, int NewOpcode,
228  const DebugLoc &DL);
229  void insertCondBranchBefore(MachineBasicBlock *MBB,
230  MachineBasicBlock::iterator I, int NewOpcode,
231  int RegNum, const DebugLoc &DL);
232 
233  static int getBranchNzeroOpcode(int OldOpcode);
234  static int getBranchZeroOpcode(int OldOpcode);
235  static int getContinueNzeroOpcode(int OldOpcode);
236  static int getContinueZeroOpcode(int OldOpcode);
237  static MachineBasicBlock *getTrueBranch(MachineInstr *MI);
238  static void setTrueBranch(MachineInstr *MI, MachineBasicBlock *MBB);
239  static MachineBasicBlock *getFalseBranch(MachineBasicBlock *MBB,
240  MachineInstr *MI);
241  static bool isCondBranch(MachineInstr *MI);
242  static bool isUncondBranch(MachineInstr *MI);
243  static DebugLoc getLastDebugLocInBB(MachineBasicBlock *MBB);
244  static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *MBB);
245 
246  /// The correct naming for this is getPossibleLoopendBlockBranchInstr.
247  ///
248  /// BB with backward-edge could have move instructions after the branch
249  /// instruction. Such move instruction "belong to" the loop backward-edge.
250  MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *MBB);
251 
252  static MachineInstr *getReturnInstr(MachineBasicBlock *MBB);
253  static bool isReturnBlock(MachineBasicBlock *MBB);
254  static void cloneSuccessorList(MachineBasicBlock *DstMBB,
255  MachineBasicBlock *SrcMBB);
256  static MachineBasicBlock *clone(MachineBasicBlock *MBB);
257 
258  /// MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose
259  /// because the AMDGPU instruction is not recognized as terminator fix this
260  /// and retire this routine
261  void replaceInstrUseOfBlockWith(MachineBasicBlock *SrcMBB,
262  MachineBasicBlock *OldMBB, MachineBasicBlock *NewBlk);
263 
264  static void wrapup(MachineBasicBlock *MBB);
265 
266  int patternMatch(MachineBasicBlock *MBB);
267  int patternMatchGroup(MachineBasicBlock *MBB);
268  int serialPatternMatch(MachineBasicBlock *MBB);
269  int ifPatternMatch(MachineBasicBlock *MBB);
270  int loopendPatternMatch();
271  int mergeLoop(MachineLoop *LoopRep);
272 
273  /// return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in
274  /// the same loop with LoopLandInfo without explicitly keeping track of
275  /// loopContBlks and loopBreakBlks, this is a method to get the information.
276  bool isSameloopDetachedContbreak(MachineBasicBlock *Src1MBB,
277  MachineBasicBlock *Src2MBB);
278  int handleJumpintoIf(MachineBasicBlock *HeadMBB,
279  MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB);
280  int handleJumpintoIfImp(MachineBasicBlock *HeadMBB,
281  MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB);
282  int improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
283  MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB,
284  MachineBasicBlock **LandMBBPtr);
285  void showImproveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
286  MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB,
287  MachineBasicBlock *LandMBB, bool Detail = false);
288  int cloneOnSideEntryTo(MachineBasicBlock *PreMBB,
289  MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB);
290  void mergeSerialBlock(MachineBasicBlock *DstMBB,
291  MachineBasicBlock *SrcMBB);
292 
293  void mergeIfthenelseBlock(MachineInstr *BranchMI,
294  MachineBasicBlock *MBB, MachineBasicBlock *TrueMBB,
295  MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB);
296  void mergeLooplandBlock(MachineBasicBlock *DstMBB,
297  MachineBasicBlock *LandMBB);
298  void mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB,
299  MachineBasicBlock *LandMBB);
300  void settleLoopcontBlock(MachineBasicBlock *ContingMBB,
301  MachineBasicBlock *ContMBB);
302 
303  /// normalizeInfiniteLoopExit change
304  /// B1:
305  /// uncond_br LoopHeader
306  ///
307  /// to
308  /// B1:
309  /// cond_br 1 LoopHeader dummyExit
310  /// and return the newly added dummy exit block
311  MachineBasicBlock *normalizeInfiniteLoopExit(MachineLoop *LoopRep);
312  void removeUnconditionalBranch(MachineBasicBlock *MBB);
313 
314  /// Remove duplicate branches instructions in a block.
315  /// For instance
316  /// B0:
317  /// cond_br X B1 B2
318  /// cond_br X B1 B2
319  /// is transformed to
320  /// B0:
321  /// cond_br X B1 B2
322  void removeRedundantConditionalBranch(MachineBasicBlock *MBB);
323 
324  void addDummyExitBlock(SmallVectorImpl<MachineBasicBlock *> &RetMBB);
325  void removeSuccessor(MachineBasicBlock *MBB);
326  MachineBasicBlock *cloneBlockForPredecessor(MachineBasicBlock *MBB,
327  MachineBasicBlock *PredMBB);
328  void migrateInstruction(MachineBasicBlock *SrcMBB,
330  void recordSccnum(MachineBasicBlock *MBB, int SCCNum);
331  void retireBlock(MachineBasicBlock *MBB);
332 
333 private:
334  MBBInfoMap BlockInfoMap;
335  LoopLandInfoMap LLInfoMap;
336  std::map<MachineLoop *, bool> Visited;
337  MachineFunction *FuncRep;
339 };
340 
341 } // end anonymous namespace
342 
344 
346  MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB);
347  if (It == BlockInfoMap.end())
348  return INVALIDSCCNUM;
349  return (*It).second->SccNum;
350 }
351 
352 MachineBasicBlock *AMDGPUCFGStructurizer::getLoopLandInfo(MachineLoop *LoopRep)
353  const {
354  LoopLandInfoMap::const_iterator It = LLInfoMap.find(LoopRep);
355  if (It == LLInfoMap.end())
356  return nullptr;
357  return (*It).second;
358 }
359 
360 bool AMDGPUCFGStructurizer::hasBackEdge(MachineBasicBlock *MBB) const {
361  MachineLoop *LoopRep = MLI->getLoopFor(MBB);
362  if (!LoopRep)
363  return false;
364  MachineBasicBlock *LoopHeader = LoopRep->getHeader();
365  return MBB->isSuccessor(LoopHeader);
366 }
367 
368 bool AMDGPUCFGStructurizer::isRetiredBlock(MachineBasicBlock *MBB) const {
369  MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB);
370  if (It == BlockInfoMap.end())
371  return false;
372  return (*It).second->IsRetired;
373 }
374 
375 bool AMDGPUCFGStructurizer::isActiveLoophead(MachineBasicBlock *MBB) const {
376  MachineLoop *LoopRep = MLI->getLoopFor(MBB);
377  while (LoopRep && LoopRep->getHeader() == MBB) {
378  MachineBasicBlock *LoopLand = getLoopLandInfo(LoopRep);
379  if(!LoopLand)
380  return true;
381  if (!isRetiredBlock(LoopLand))
382  return true;
383  LoopRep = LoopRep->getParentLoop();
384  }
385  return false;
386 }
387 
388 AMDGPUCFGStructurizer::PathToKind AMDGPUCFGStructurizer::singlePathTo(
389  MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB,
390  bool AllowSideEntry) const {
391  assert(DstMBB);
392  if (SrcMBB == DstMBB)
393  return SinglePath_InPath;
394  while (SrcMBB && SrcMBB->succ_size() == 1) {
395  SrcMBB = *SrcMBB->succ_begin();
396  if (SrcMBB == DstMBB)
397  return SinglePath_InPath;
398  if (!AllowSideEntry && SrcMBB->pred_size() > 1)
399  return Not_SinglePath;
400  }
401  if (SrcMBB && SrcMBB->succ_size()==0)
402  return SinglePath_NotInPath;
403  return Not_SinglePath;
404 }
405 
406 int AMDGPUCFGStructurizer::countActiveBlock(MBBVector::const_iterator It,
408  int Count = 0;
409  while (It != E) {
410  if (!isRetiredBlock(*It))
411  ++Count;
412  ++It;
413  }
414  return Count;
415 }
416 
417 bool AMDGPUCFGStructurizer::needMigrateBlock(MachineBasicBlock *MBB) const {
418  unsigned BlockSizeThreshold = 30;
419  unsigned CloneInstrThreshold = 100;
420  bool MultiplePreds = MBB && (MBB->pred_size() > 1);
421 
422  if(!MultiplePreds)
423  return false;
424  unsigned BlkSize = MBB->size();
425  return ((BlkSize > BlockSizeThreshold) &&
426  (BlkSize * (MBB->pred_size() - 1) > CloneInstrThreshold));
427 }
428 
429 void AMDGPUCFGStructurizer::reversePredicateSetter(
431  assert(I.isValid() && "Expected valid iterator");
432  for (;; --I) {
433  if (I == MBB.end())
434  continue;
435  if (I->getOpcode() == AMDGPU::PRED_X) {
436  switch (I->getOperand(2).getImm()) {
437  case AMDGPU::PRED_SETE_INT:
438  I->getOperand(2).setImm(AMDGPU::PRED_SETNE_INT);
439  return;
440  case AMDGPU::PRED_SETNE_INT:
441  I->getOperand(2).setImm(AMDGPU::PRED_SETE_INT);
442  return;
443  case AMDGPU::PRED_SETE:
444  I->getOperand(2).setImm(AMDGPU::PRED_SETNE);
445  return;
446  case AMDGPU::PRED_SETNE:
447  I->getOperand(2).setImm(AMDGPU::PRED_SETE);
448  return;
449  default:
450  llvm_unreachable("PRED_X Opcode invalid!");
451  }
452  }
453  }
454 }
455 
456 void AMDGPUCFGStructurizer::insertInstrEnd(MachineBasicBlock *MBB,
457  int NewOpcode, const DebugLoc &DL) {
458  MachineInstr *MI =
459  MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DL);
460  MBB->push_back(MI);
461  //assume the instruction doesn't take any reg operand ...
462  SHOWNEWINSTR(MI);
463 }
464 
465 MachineInstr *AMDGPUCFGStructurizer::insertInstrBefore(MachineBasicBlock *MBB,
466  int NewOpcode,
467  const DebugLoc &DL) {
468  MachineInstr *MI =
469  MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DL);
470  if (MBB->begin() != MBB->end())
471  MBB->insert(MBB->begin(), MI);
472  else
473  MBB->push_back(MI);
474  SHOWNEWINSTR(MI);
475  return MI;
476 }
477 
478 MachineInstr *AMDGPUCFGStructurizer::insertInstrBefore(
479  MachineBasicBlock::iterator I, int NewOpcode) {
480  MachineInstr *OldMI = &(*I);
481  MachineBasicBlock *MBB = OldMI->getParent();
482  MachineInstr *NewMBB =
483  MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DebugLoc());
484  MBB->insert(I, NewMBB);
485  //assume the instruction doesn't take any reg operand ...
486  SHOWNEWINSTR(NewMBB);
487  return NewMBB;
488 }
489 
490 void AMDGPUCFGStructurizer::insertCondBranchBefore(
491  MachineBasicBlock::iterator I, int NewOpcode, const DebugLoc &DL) {
492  MachineInstr *OldMI = &(*I);
493  MachineBasicBlock *MBB = OldMI->getParent();
494  MachineFunction *MF = MBB->getParent();
495  MachineInstr *NewMI = MF->CreateMachineInstr(TII->get(NewOpcode), DL);
496  MBB->insert(I, NewMI);
497  MachineInstrBuilder MIB(*MF, NewMI);
498  MIB.addReg(OldMI->getOperand(1).getReg(), false);
499  SHOWNEWINSTR(NewMI);
500  //erase later oldInstr->eraseFromParent();
501 }
502 
503 void AMDGPUCFGStructurizer::insertCondBranchBefore(
505  int RegNum, const DebugLoc &DL) {
506  MachineFunction *MF = blk->getParent();
507  MachineInstr *NewInstr = MF->CreateMachineInstr(TII->get(NewOpcode), DL);
508  //insert before
509  blk->insert(I, NewInstr);
510  MachineInstrBuilder(*MF, NewInstr).addReg(RegNum, false);
511  SHOWNEWINSTR(NewInstr);
512 }
513 
514 int AMDGPUCFGStructurizer::getBranchNzeroOpcode(int OldOpcode) {
515  switch(OldOpcode) {
516  case AMDGPU::JUMP_COND:
517  case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
518  case AMDGPU::BRANCH_COND_i32:
519  case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALNZ_f32;
520  default: llvm_unreachable("internal error");
521  }
522  return -1;
523 }
524 
525 int AMDGPUCFGStructurizer::getBranchZeroOpcode(int OldOpcode) {
526  switch(OldOpcode) {
527  case AMDGPU::JUMP_COND:
528  case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
529  case AMDGPU::BRANCH_COND_i32:
530  case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALZ_f32;
531  default: llvm_unreachable("internal error");
532  }
533  return -1;
534 }
535 
536 int AMDGPUCFGStructurizer::getContinueNzeroOpcode(int OldOpcode) {
537  switch(OldOpcode) {
538  case AMDGPU::JUMP_COND:
539  case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALNZ_i32;
540  default: llvm_unreachable("internal error");
541  }
542  return -1;
543 }
544 
545 int AMDGPUCFGStructurizer::getContinueZeroOpcode(int OldOpcode) {
546  switch(OldOpcode) {
547  case AMDGPU::JUMP_COND:
548  case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALZ_i32;
549  default: llvm_unreachable("internal error");
550  }
551  return -1;
552 }
553 
554 MachineBasicBlock *AMDGPUCFGStructurizer::getTrueBranch(MachineInstr *MI) {
555  return MI->getOperand(0).getMBB();
556 }
557 
558 void AMDGPUCFGStructurizer::setTrueBranch(MachineInstr *MI,
559  MachineBasicBlock *MBB) {
560  MI->getOperand(0).setMBB(MBB);
561 }
562 
564 AMDGPUCFGStructurizer::getFalseBranch(MachineBasicBlock *MBB,
565  MachineInstr *MI) {
566  assert(MBB->succ_size() == 2);
567  MachineBasicBlock *TrueBranch = getTrueBranch(MI);
570  ++Next;
571  return (*It == TrueBranch) ? *Next : *It;
572 }
573 
574 bool AMDGPUCFGStructurizer::isCondBranch(MachineInstr *MI) {
575  switch (MI->getOpcode()) {
576  case AMDGPU::JUMP_COND:
577  case AMDGPU::BRANCH_COND_i32:
578  case AMDGPU::BRANCH_COND_f32: return true;
579  default:
580  return false;
581  }
582  return false;
583 }
584 
585 bool AMDGPUCFGStructurizer::isUncondBranch(MachineInstr *MI) {
586  switch (MI->getOpcode()) {
587  case AMDGPU::JUMP:
588  case AMDGPU::BRANCH:
589  return true;
590  default:
591  return false;
592  }
593  return false;
594 }
595 
596 DebugLoc AMDGPUCFGStructurizer::getLastDebugLocInBB(MachineBasicBlock *MBB) {
597  //get DebugLoc from the first MachineBasicBlock instruction with debug info
598  DebugLoc DL;
599  for (MachineBasicBlock::iterator It = MBB->begin(); It != MBB->end();
600  ++It) {
601  MachineInstr *instr = &(*It);
602  if (instr->getDebugLoc())
603  DL = instr->getDebugLoc();
604  }
605  return DL;
606 }
607 
608 MachineInstr *AMDGPUCFGStructurizer::getNormalBlockBranchInstr(
609  MachineBasicBlock *MBB) {
611  MachineInstr *MI = &*It;
612  if (MI && (isCondBranch(MI) || isUncondBranch(MI)))
613  return MI;
614  return nullptr;
615 }
616 
617 MachineInstr *AMDGPUCFGStructurizer::getLoopendBlockBranchInstr(
618  MachineBasicBlock *MBB) {
619  for (MachineBasicBlock::reverse_iterator It = MBB->rbegin(), E = MBB->rend();
620  It != E; ++It) {
621  // FIXME: Simplify
622  MachineInstr *MI = &*It;
623  if (MI) {
624  if (isCondBranch(MI) || isUncondBranch(MI))
625  return MI;
626  else if (!TII->isMov(MI->getOpcode()))
627  break;
628  }
629  }
630  return nullptr;
631 }
632 
633 MachineInstr *AMDGPUCFGStructurizer::getReturnInstr(MachineBasicBlock *MBB) {
635  if (It != MBB->rend()) {
636  MachineInstr *instr = &(*It);
637  if (instr->getOpcode() == AMDGPU::RETURN)
638  return instr;
639  }
640  return nullptr;
641 }
642 
643 bool AMDGPUCFGStructurizer::isReturnBlock(MachineBasicBlock *MBB) {
644  MachineInstr *MI = getReturnInstr(MBB);
645  bool IsReturn = (MBB->succ_size() == 0);
646  if (MI)
647  assert(IsReturn);
648  else if (IsReturn)
649  LLVM_DEBUG(dbgs() << "BB" << MBB->getNumber()
650  << " is return block without RETURN instr\n";);
651  return IsReturn;
652 }
653 
654 void AMDGPUCFGStructurizer::cloneSuccessorList(MachineBasicBlock *DstMBB,
655  MachineBasicBlock *SrcMBB) {
656  for (MachineBasicBlock::succ_iterator It = SrcMBB->succ_begin(),
657  iterEnd = SrcMBB->succ_end(); It != iterEnd; ++It)
658  DstMBB->addSuccessor(*It); // *iter's predecessor is also taken care of
659 }
660 
661 MachineBasicBlock *AMDGPUCFGStructurizer::clone(MachineBasicBlock *MBB) {
662  MachineFunction *Func = MBB->getParent();
663  MachineBasicBlock *NewMBB = Func->CreateMachineBasicBlock();
664  Func->push_back(NewMBB); //insert to function
665  for (const MachineInstr &It : *MBB)
666  NewMBB->push_back(Func->CloneMachineInstr(&It));
667  return NewMBB;
668 }
669 
670 void AMDGPUCFGStructurizer::replaceInstrUseOfBlockWith(
671  MachineBasicBlock *SrcMBB, MachineBasicBlock *OldMBB,
672  MachineBasicBlock *NewBlk) {
673  MachineInstr *BranchMI = getLoopendBlockBranchInstr(SrcMBB);
674  if (BranchMI && isCondBranch(BranchMI) &&
675  getTrueBranch(BranchMI) == OldMBB)
676  setTrueBranch(BranchMI, NewBlk);
677 }
678 
679 void AMDGPUCFGStructurizer::wrapup(MachineBasicBlock *MBB) {
680  assert((!MBB->getParent()->getJumpTableInfo()
681  || MBB->getParent()->getJumpTableInfo()->isEmpty())
682  && "found a jump table");
683 
684  //collect continue right before endloop
686  MachineBasicBlock::iterator Pre = MBB->begin();
689  while (It != E) {
690  if (Pre->getOpcode() == AMDGPU::CONTINUE
691  && It->getOpcode() == AMDGPU::ENDLOOP)
692  ContInstr.push_back(&*Pre);
693  Pre = It;
694  ++It;
695  }
696 
697  //delete continue right before endloop
698  for (unsigned i = 0; i < ContInstr.size(); ++i)
699  ContInstr[i]->eraseFromParent();
700 
701  // TODO to fix up jump table so later phase won't be confused. if
702  // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but
703  // there isn't such an interface yet. alternatively, replace all the other
704  // blocks in the jump table with the entryBlk //}
705 }
706 
707 bool AMDGPUCFGStructurizer::prepare() {
708  bool Changed = false;
709 
710  //FIXME: if not reducible flow graph, make it so ???
711 
712  LLVM_DEBUG(dbgs() << "AMDGPUCFGStructurizer::prepare\n";);
713 
714  orderBlocks(FuncRep);
715 
717 
718  // Add an ExitBlk to loop that don't have one
719  for (MachineLoopInfo::iterator It = MLI->begin(),
720  E = MLI->end(); It != E; ++It) {
721  MachineLoop *LoopRep = (*It);
722  MBBVector ExitingMBBs;
723  LoopRep->getExitingBlocks(ExitingMBBs);
724 
725  if (ExitingMBBs.size() == 0) {
726  MachineBasicBlock* DummyExitBlk = normalizeInfiniteLoopExit(LoopRep);
727  if (DummyExitBlk)
728  RetBlks.push_back(DummyExitBlk);
729  }
730  }
731 
732  // Remove unconditional branch instr.
733  // Add dummy exit block iff there are multiple returns.
735  It = OrderedBlks.begin(), E = OrderedBlks.end(); It != E; ++It) {
736  MachineBasicBlock *MBB = *It;
737  removeUnconditionalBranch(MBB);
738  removeRedundantConditionalBranch(MBB);
739  if (isReturnBlock(MBB)) {
740  RetBlks.push_back(MBB);
741  }
742  assert(MBB->succ_size() <= 2);
743  }
744 
745  if (RetBlks.size() >= 2) {
746  addDummyExitBlock(RetBlks);
747  Changed = true;
748  }
749 
750  return Changed;
751 }
752 
753 bool AMDGPUCFGStructurizer::run() {
754  //Assume reducible CFG...
755  LLVM_DEBUG(dbgs() << "AMDGPUCFGStructurizer::run\n");
756 
757 #ifdef STRESSTEST
758  //Use the worse block ordering to test the algorithm.
759  ReverseVector(orderedBlks);
760 #endif
761 
762  LLVM_DEBUG(dbgs() << "Ordered blocks:\n"; printOrderedBlocks(););
763  int NumIter = 0;
764  bool Finish = false;
765  MachineBasicBlock *MBB;
766  bool MakeProgress = false;
767  int NumRemainedBlk = countActiveBlock(OrderedBlks.begin(),
768  OrderedBlks.end());
769 
770  do {
771  ++NumIter;
772  LLVM_DEBUG(dbgs() << "numIter = " << NumIter
773  << ", numRemaintedBlk = " << NumRemainedBlk << "\n";);
774 
776  OrderedBlks.begin();
778  OrderedBlks.end();
779 
781  It;
782  MachineBasicBlock *SccBeginMBB = nullptr;
783  int SccNumBlk = 0; // The number of active blocks, init to a
784  // maximum possible number.
785  int SccNumIter; // Number of iteration in this SCC.
786 
787  while (It != E) {
788  MBB = *It;
789 
790  if (!SccBeginMBB) {
791  SccBeginIter = It;
792  SccBeginMBB = MBB;
793  SccNumIter = 0;
794  SccNumBlk = NumRemainedBlk; // Init to maximum possible number.
795  LLVM_DEBUG(dbgs() << "start processing SCC" << getSCCNum(SccBeginMBB);
796  dbgs() << "\n";);
797  }
798 
799  if (!isRetiredBlock(MBB))
800  patternMatch(MBB);
801 
802  ++It;
803 
804  bool ContNextScc = true;
805  if (It == E
806  || getSCCNum(SccBeginMBB) != getSCCNum(*It)) {
807  // Just finish one scc.
808  ++SccNumIter;
809  int sccRemainedNumBlk = countActiveBlock(SccBeginIter, It);
810  if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= SccNumBlk) {
811  LLVM_DEBUG(dbgs() << "Can't reduce SCC " << getSCCNum(MBB)
812  << ", sccNumIter = " << SccNumIter;
813  dbgs() << "doesn't make any progress\n";);
814  ContNextScc = true;
815  } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < SccNumBlk) {
816  SccNumBlk = sccRemainedNumBlk;
817  It = SccBeginIter;
818  ContNextScc = false;
819  LLVM_DEBUG(dbgs() << "repeat processing SCC" << getSCCNum(MBB)
820  << "sccNumIter = " << SccNumIter << '\n';);
821  } else {
822  // Finish the current scc.
823  ContNextScc = true;
824  }
825  } else {
826  // Continue on next component in the current scc.
827  ContNextScc = false;
828  }
829 
830  if (ContNextScc)
831  SccBeginMBB = nullptr;
832  } //while, "one iteration" over the function.
833 
834  MachineBasicBlock *EntryMBB =
836  if (EntryMBB->succ_size() == 0) {
837  Finish = true;
838  LLVM_DEBUG(dbgs() << "Reduce to one block\n";);
839  } else {
840  int NewnumRemainedBlk
841  = countActiveBlock(OrderedBlks.begin(), OrderedBlks.end());
842  // consider cloned blocks ??
843  if (NewnumRemainedBlk == 1 || NewnumRemainedBlk < NumRemainedBlk) {
844  MakeProgress = true;
845  NumRemainedBlk = NewnumRemainedBlk;
846  } else {
847  MakeProgress = false;
848  LLVM_DEBUG(dbgs() << "No progress\n";);
849  }
850  }
851  } while (!Finish && MakeProgress);
852 
853  // Misc wrap up to maintain the consistency of the Function representation.
855 
856  // Detach retired Block, release memory.
857  for (MBBInfoMap::iterator It = BlockInfoMap.begin(), E = BlockInfoMap.end();
858  It != E; ++It) {
859  if ((*It).second && (*It).second->IsRetired) {
860  assert(((*It).first)->getNumber() != -1);
861  LLVM_DEBUG(dbgs() << "Erase BB" << ((*It).first)->getNumber() << "\n";);
862  (*It).first->eraseFromParent(); //Remove from the parent Function.
863  }
864  delete (*It).second;
865  }
866  BlockInfoMap.clear();
867  LLInfoMap.clear();
868 
869  if (!Finish) {
870  LLVM_DEBUG(FuncRep->viewCFG());
871  report_fatal_error("IRREDUCIBLE_CFG");
872  }
873 
874  return true;
875 }
876 
877 void AMDGPUCFGStructurizer::orderBlocks(MachineFunction *MF) {
878  int SccNum = 0;
879  MachineBasicBlock *MBB;
880  for (scc_iterator<MachineFunction *> It = scc_begin(MF); !It.isAtEnd();
881  ++It, ++SccNum) {
882  const std::vector<MachineBasicBlock *> &SccNext = *It;
883  for (std::vector<MachineBasicBlock *>::const_iterator
884  blockIter = SccNext.begin(), blockEnd = SccNext.end();
885  blockIter != blockEnd; ++blockIter) {
886  MBB = *blockIter;
887  OrderedBlks.push_back(MBB);
888  recordSccnum(MBB, SccNum);
889  }
890  }
891 
892  // walk through all the block in func to check for unreachable
893  for (auto *MBB : nodes(MF)) {
894  SccNum = getSCCNum(MBB);
895  if (SccNum == INVALIDSCCNUM)
896  dbgs() << "unreachable block BB" << MBB->getNumber() << "\n";
897  }
898 }
899 
900 int AMDGPUCFGStructurizer::patternMatch(MachineBasicBlock *MBB) {
901  int NumMatch = 0;
902  int CurMatch;
903 
904  LLVM_DEBUG(dbgs() << "Begin patternMatch BB" << MBB->getNumber() << "\n";);
905 
906  while ((CurMatch = patternMatchGroup(MBB)) > 0)
907  NumMatch += CurMatch;
908 
909  LLVM_DEBUG(dbgs() << "End patternMatch BB" << MBB->getNumber()
910  << ", numMatch = " << NumMatch << "\n";);
911 
912  return NumMatch;
913 }
914 
915 int AMDGPUCFGStructurizer::patternMatchGroup(MachineBasicBlock *MBB) {
916  int NumMatch = 0;
917  NumMatch += loopendPatternMatch();
918  NumMatch += serialPatternMatch(MBB);
919  NumMatch += ifPatternMatch(MBB);
920  return NumMatch;
921 }
922 
923 int AMDGPUCFGStructurizer::serialPatternMatch(MachineBasicBlock *MBB) {
924  if (MBB->succ_size() != 1)
925  return 0;
926 
927  MachineBasicBlock *childBlk = *MBB->succ_begin();
928  if (childBlk->pred_size() != 1 || isActiveLoophead(childBlk))
929  return 0;
930 
931  mergeSerialBlock(MBB, childBlk);
932  ++numSerialPatternMatch;
933  return 1;
934 }
935 
936 int AMDGPUCFGStructurizer::ifPatternMatch(MachineBasicBlock *MBB) {
937  //two edges
938  if (MBB->succ_size() != 2)
939  return 0;
940  if (hasBackEdge(MBB))
941  return 0;
942  MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB);
943  if (!BranchMI)
944  return 0;
945 
946  assert(isCondBranch(BranchMI));
947  int NumMatch = 0;
948 
949  MachineBasicBlock *TrueMBB = getTrueBranch(BranchMI);
950  NumMatch += serialPatternMatch(TrueMBB);
951  NumMatch += ifPatternMatch(TrueMBB);
952  MachineBasicBlock *FalseMBB = getFalseBranch(MBB, BranchMI);
953  NumMatch += serialPatternMatch(FalseMBB);
954  NumMatch += ifPatternMatch(FalseMBB);
955  MachineBasicBlock *LandBlk;
956  int Cloned = 0;
957 
958  assert (!TrueMBB->succ_empty() || !FalseMBB->succ_empty());
959  // TODO: Simplify
960  if (TrueMBB->succ_size() == 1 && FalseMBB->succ_size() == 1
961  && *TrueMBB->succ_begin() == *FalseMBB->succ_begin()) {
962  // Diamond pattern
963  LandBlk = *TrueMBB->succ_begin();
964  } else if (TrueMBB->succ_size() == 1 && *TrueMBB->succ_begin() == FalseMBB) {
965  // Triangle pattern, false is empty
966  LandBlk = FalseMBB;
967  FalseMBB = nullptr;
968  } else if (FalseMBB->succ_size() == 1
969  && *FalseMBB->succ_begin() == TrueMBB) {
970  // Triangle pattern, true is empty
971  // We reverse the predicate to make a triangle, empty false pattern;
972  std::swap(TrueMBB, FalseMBB);
973  reversePredicateSetter(MBB->end(), *MBB);
974  LandBlk = FalseMBB;
975  FalseMBB = nullptr;
976  } else if (FalseMBB->succ_size() == 1
977  && isSameloopDetachedContbreak(TrueMBB, FalseMBB)) {
978  LandBlk = *FalseMBB->succ_begin();
979  } else if (TrueMBB->succ_size() == 1
980  && isSameloopDetachedContbreak(FalseMBB, TrueMBB)) {
981  LandBlk = *TrueMBB->succ_begin();
982  } else {
983  return NumMatch + handleJumpintoIf(MBB, TrueMBB, FalseMBB);
984  }
985 
986  // improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the
987  // new BB created for landBlk==NULL may introduce new challenge to the
988  // reduction process.
989  if (LandBlk &&
990  ((TrueMBB && TrueMBB->pred_size() > 1)
991  || (FalseMBB && FalseMBB->pred_size() > 1))) {
992  Cloned += improveSimpleJumpintoIf(MBB, TrueMBB, FalseMBB, &LandBlk);
993  }
994 
995  if (TrueMBB && TrueMBB->pred_size() > 1) {
996  TrueMBB = cloneBlockForPredecessor(TrueMBB, MBB);
997  ++Cloned;
998  }
999 
1000  if (FalseMBB && FalseMBB->pred_size() > 1) {
1001  FalseMBB = cloneBlockForPredecessor(FalseMBB, MBB);
1002  ++Cloned;
1003  }
1004 
1005  mergeIfthenelseBlock(BranchMI, MBB, TrueMBB, FalseMBB, LandBlk);
1006 
1007  ++numIfPatternMatch;
1008 
1009  numClonedBlock += Cloned;
1010 
1011  return 1 + Cloned + NumMatch;
1012 }
1013 
1014 int AMDGPUCFGStructurizer::loopendPatternMatch() {
1015  std::deque<MachineLoop *> NestedLoops;
1016  for (auto &It: *MLI)
1017  for (MachineLoop *ML : depth_first(It))
1018  NestedLoops.push_front(ML);
1019 
1020  if (NestedLoops.empty())
1021  return 0;
1022 
1023  // Process nested loop outside->inside (we did push_front),
1024  // so "continue" to a outside loop won't be mistaken as "break"
1025  // of the current loop.
1026  int Num = 0;
1027  for (MachineLoop *ExaminedLoop : NestedLoops) {
1028  if (ExaminedLoop->getNumBlocks() == 0 || Visited[ExaminedLoop])
1029  continue;
1030  LLVM_DEBUG(dbgs() << "Processing:\n"; ExaminedLoop->dump(););
1031  int NumBreak = mergeLoop(ExaminedLoop);
1032  if (NumBreak == -1)
1033  break;
1034  Num += NumBreak;
1035  }
1036  return Num;
1037 }
1038 
1039 int AMDGPUCFGStructurizer::mergeLoop(MachineLoop *LoopRep) {
1040  MachineBasicBlock *LoopHeader = LoopRep->getHeader();
1041  MBBVector ExitingMBBs;
1042  LoopRep->getExitingBlocks(ExitingMBBs);
1043  assert(!ExitingMBBs.empty() && "Infinite Loop not supported");
1044  LLVM_DEBUG(dbgs() << "Loop has " << ExitingMBBs.size()
1045  << " exiting blocks\n";);
1046  // We assume a single ExitBlk
1047  MBBVector ExitBlks;
1048  LoopRep->getExitBlocks(ExitBlks);
1050  for (unsigned i = 0, e = ExitBlks.size(); i < e; ++i)
1051  ExitBlkSet.insert(ExitBlks[i]);
1052  assert(ExitBlkSet.size() == 1);
1053  MachineBasicBlock *ExitBlk = *ExitBlks.begin();
1054  assert(ExitBlk && "Loop has several exit block");
1055  MBBVector LatchBlks;
1056  for (auto *LB : inverse_children<MachineBasicBlock*>(LoopHeader))
1057  if (LoopRep->contains(LB))
1058  LatchBlks.push_back(LB);
1059 
1060  for (unsigned i = 0, e = ExitingMBBs.size(); i < e; ++i)
1061  mergeLoopbreakBlock(ExitingMBBs[i], ExitBlk);
1062  for (unsigned i = 0, e = LatchBlks.size(); i < e; ++i)
1063  settleLoopcontBlock(LatchBlks[i], LoopHeader);
1064  int Match = 0;
1065  do {
1066  Match = 0;
1067  Match += serialPatternMatch(LoopHeader);
1068  Match += ifPatternMatch(LoopHeader);
1069  } while (Match > 0);
1070  mergeLooplandBlock(LoopHeader, ExitBlk);
1071  MachineLoop *ParentLoop = LoopRep->getParentLoop();
1072  if (ParentLoop)
1073  MLI->changeLoopFor(LoopHeader, ParentLoop);
1074  else
1075  MLI->removeBlock(LoopHeader);
1076  Visited[LoopRep] = true;
1077  return 1;
1078 }
1079 
1080 bool AMDGPUCFGStructurizer::isSameloopDetachedContbreak(
1081  MachineBasicBlock *Src1MBB, MachineBasicBlock *Src2MBB) {
1082  if (Src1MBB->succ_size() == 0) {
1083  MachineLoop *LoopRep = MLI->getLoopFor(Src1MBB);
1084  if (LoopRep&& LoopRep == MLI->getLoopFor(Src2MBB)) {
1085  MachineBasicBlock *&TheEntry = LLInfoMap[LoopRep];
1086  if (TheEntry) {
1087  LLVM_DEBUG(dbgs() << "isLoopContBreakBlock yes src1 = BB"
1088  << Src1MBB->getNumber() << " src2 = BB"
1089  << Src2MBB->getNumber() << "\n";);
1090  return true;
1091  }
1092  }
1093  }
1094  return false;
1095 }
1096 
1097 int AMDGPUCFGStructurizer::handleJumpintoIf(MachineBasicBlock *HeadMBB,
1098  MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) {
1099  int Num = handleJumpintoIfImp(HeadMBB, TrueMBB, FalseMBB);
1100  if (Num == 0) {
1101  LLVM_DEBUG(dbgs() << "handleJumpintoIf swap trueBlk and FalseBlk"
1102  << "\n";);
1103  Num = handleJumpintoIfImp(HeadMBB, FalseMBB, TrueMBB);
1104  }
1105  return Num;
1106 }
1107 
1108 int AMDGPUCFGStructurizer::handleJumpintoIfImp(MachineBasicBlock *HeadMBB,
1109  MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) {
1110  int Num = 0;
1111  MachineBasicBlock *DownBlk;
1112 
1113  //trueBlk could be the common post dominator
1114  DownBlk = TrueMBB;
1115 
1116  LLVM_DEBUG(dbgs() << "handleJumpintoIfImp head = BB" << HeadMBB->getNumber()
1117  << " true = BB" << TrueMBB->getNumber()
1118  << ", numSucc=" << TrueMBB->succ_size() << " false = BB"
1119  << FalseMBB->getNumber() << "\n";);
1120 
1121  while (DownBlk) {
1122  LLVM_DEBUG(dbgs() << "check down = BB" << DownBlk->getNumber(););
1123 
1124  if (singlePathTo(FalseMBB, DownBlk) == SinglePath_InPath) {
1125  LLVM_DEBUG(dbgs() << " working\n";);
1126 
1127  Num += cloneOnSideEntryTo(HeadMBB, TrueMBB, DownBlk);
1128  Num += cloneOnSideEntryTo(HeadMBB, FalseMBB, DownBlk);
1129 
1130  numClonedBlock += Num;
1131  Num += serialPatternMatch(*HeadMBB->succ_begin());
1132  Num += serialPatternMatch(*std::next(HeadMBB->succ_begin()));
1133  Num += ifPatternMatch(HeadMBB);
1134  assert(Num > 0);
1135 
1136  break;
1137  }
1138  LLVM_DEBUG(dbgs() << " not working\n";);
1139  DownBlk = (DownBlk->succ_size() == 1) ? (*DownBlk->succ_begin()) : nullptr;
1140  } // walk down the postDomTree
1141 
1142  return Num;
1143 }
1144 
1145 #ifndef NDEBUG
1146 void AMDGPUCFGStructurizer::showImproveSimpleJumpintoIf(
1147  MachineBasicBlock *HeadMBB, MachineBasicBlock *TrueMBB,
1148  MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB, bool Detail) {
1149  dbgs() << "head = BB" << HeadMBB->getNumber()
1150  << " size = " << HeadMBB->size();
1151  if (Detail) {
1152  dbgs() << "\n";
1153  HeadMBB->print(dbgs());
1154  dbgs() << "\n";
1155  }
1156 
1157  if (TrueMBB) {
1158  dbgs() << ", true = BB" << TrueMBB->getNumber() << " size = "
1159  << TrueMBB->size() << " numPred = " << TrueMBB->pred_size();
1160  if (Detail) {
1161  dbgs() << "\n";
1162  TrueMBB->print(dbgs());
1163  dbgs() << "\n";
1164  }
1165  }
1166  if (FalseMBB) {
1167  dbgs() << ", false = BB" << FalseMBB->getNumber() << " size = "
1168  << FalseMBB->size() << " numPred = " << FalseMBB->pred_size();
1169  if (Detail) {
1170  dbgs() << "\n";
1171  FalseMBB->print(dbgs());
1172  dbgs() << "\n";
1173  }
1174  }
1175  if (LandMBB) {
1176  dbgs() << ", land = BB" << LandMBB->getNumber() << " size = "
1177  << LandMBB->size() << " numPred = " << LandMBB->pred_size();
1178  if (Detail) {
1179  dbgs() << "\n";
1180  LandMBB->print(dbgs());
1181  dbgs() << "\n";
1182  }
1183  }
1184 
1185  dbgs() << "\n";
1186 }
1187 #endif
1188 
1189 int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
1190  MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB,
1191  MachineBasicBlock **LandMBBPtr) {
1192  bool MigrateTrue = false;
1193  bool MigrateFalse = false;
1194 
1195  MachineBasicBlock *LandBlk = *LandMBBPtr;
1196 
1197  assert((!TrueMBB || TrueMBB->succ_size() <= 1)
1198  && (!FalseMBB || FalseMBB->succ_size() <= 1));
1199 
1200  if (TrueMBB == FalseMBB)
1201  return 0;
1202 
1203  MigrateTrue = needMigrateBlock(TrueMBB);
1204  MigrateFalse = needMigrateBlock(FalseMBB);
1205 
1206  if (!MigrateTrue && !MigrateFalse)
1207  return 0;
1208 
1209  // If we need to migrate either trueBlk and falseBlk, migrate the rest that
1210  // have more than one predecessors. without doing this, its predecessor
1211  // rather than headBlk will have undefined value in initReg.
1212  if (!MigrateTrue && TrueMBB && TrueMBB->pred_size() > 1)
1213  MigrateTrue = true;
1214  if (!MigrateFalse && FalseMBB && FalseMBB->pred_size() > 1)
1215  MigrateFalse = true;
1216 
1217  LLVM_DEBUG(
1218  dbgs() << "before improveSimpleJumpintoIf: ";
1219  showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0););
1220 
1221  // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk
1222  //
1223  // new: headBlk => if () {initReg = 1; org trueBlk branch} else
1224  // {initReg = 0; org falseBlk branch }
1225  // => landBlk => if (initReg) {org trueBlk} else {org falseBlk}
1226  // => org landBlk
1227  // if landBlk->pred_size() > 2, put the about if-else inside
1228  // if (initReg !=2) {...}
1229  //
1230  // add initReg = initVal to headBlk
1231 
1232  const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
1233  if (!MigrateTrue || !MigrateFalse) {
1234  // XXX: We have an opportunity here to optimize the "branch into if" case
1235  // here. Branch into if looks like this:
1236  // entry
1237  // / |
1238  // diamond_head branch_from
1239  // / \ |
1240  // diamond_false diamond_true
1241  // \ /
1242  // done
1243  //
1244  // The diamond_head block begins the "if" and the diamond_true block
1245  // is the block being "branched into".
1246  //
1247  // If MigrateTrue is true, then TrueBB is the block being "branched into"
1248  // and if MigrateFalse is true, then FalseBB is the block being
1249  // "branched into"
1250  //
1251  // Here is the pseudo code for how I think the optimization should work:
1252  // 1. Insert MOV GPR0, 0 before the branch instruction in diamond_head.
1253  // 2. Insert MOV GPR0, 1 before the branch instruction in branch_from.
1254  // 3. Move the branch instruction from diamond_head into its own basic
1255  // block (new_block).
1256  // 4. Add an unconditional branch from diamond_head to new_block
1257  // 5. Replace the branch instruction in branch_from with an unconditional
1258  // branch to new_block. If branch_from has multiple predecessors, then
1259  // we need to replace the True/False block in the branch
1260  // instruction instead of replacing it.
1261  // 6. Change the condition of the branch instruction in new_block from
1262  // COND to (COND || GPR0)
1263  //
1264  // In order insert these MOV instruction, we will need to use the
1265  // RegisterScavenger. Usually liveness stops being tracked during
1266  // the late machine optimization passes, however if we implement
1267  // bool TargetRegisterInfo::requiresRegisterScavenging(
1268  // const MachineFunction &MF)
1269  // and have it return true, liveness will be tracked correctly
1270  // by generic optimization passes. We will also need to make sure that
1271  // all of our target-specific passes that run after regalloc and before
1272  // the CFGStructurizer track liveness and we will need to modify this pass
1273  // to correctly track liveness.
1274  //
1275  // After the above changes, the new CFG should look like this:
1276  // entry
1277  // / |
1278  // diamond_head branch_from
1279  // \ /
1280  // new_block
1281  // / |
1282  // diamond_false diamond_true
1283  // \ /
1284  // done
1285  //
1286  // Without this optimization, we are forced to duplicate the diamond_true
1287  // block and we will end up with a CFG like this:
1288  //
1289  // entry
1290  // / |
1291  // diamond_head branch_from
1292  // / \ |
1293  // diamond_false diamond_true diamond_true (duplicate)
1294  // \ / |
1295  // done --------------------|
1296  //
1297  // Duplicating diamond_true can be very costly especially if it has a
1298  // lot of instructions.
1299  return 0;
1300  }
1301 
1302  int NumNewBlk = 0;
1303 
1304  bool LandBlkHasOtherPred = (LandBlk->pred_size() > 2);
1305 
1306  //insert AMDGPU::ENDIF to avoid special case "input landBlk == NULL"
1307  MachineBasicBlock::iterator I = insertInstrBefore(LandBlk, AMDGPU::ENDIF);
1308 
1309  if (LandBlkHasOtherPred) {
1310  report_fatal_error("Extra register needed to handle CFG");
1311  unsigned CmpResReg =
1312  HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC);
1313  report_fatal_error("Extra compare instruction needed to handle CFG");
1314  insertCondBranchBefore(LandBlk, I, AMDGPU::IF_PREDICATE_SET,
1315  CmpResReg, DebugLoc());
1316  }
1317 
1318  // XXX: We are running this after RA, so creating virtual registers will
1319  // cause an assertion failure in the PostRA scheduling pass.
1320  unsigned InitReg =
1321  HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC);
1322  insertCondBranchBefore(LandBlk, I, AMDGPU::IF_PREDICATE_SET, InitReg,
1323  DebugLoc());
1324 
1325  if (MigrateTrue) {
1326  migrateInstruction(TrueMBB, LandBlk, I);
1327  // need to uncondionally insert the assignment to ensure a path from its
1328  // predecessor rather than headBlk has valid value in initReg if
1329  // (initVal != 1).
1330  report_fatal_error("Extra register needed to handle CFG");
1331  }
1332  insertInstrBefore(I, AMDGPU::ELSE);
1333 
1334  if (MigrateFalse) {
1335  migrateInstruction(FalseMBB, LandBlk, I);
1336  // need to uncondionally insert the assignment to ensure a path from its
1337  // predecessor rather than headBlk has valid value in initReg if
1338  // (initVal != 0)
1339  report_fatal_error("Extra register needed to handle CFG");
1340  }
1341 
1342  if (LandBlkHasOtherPred) {
1343  // add endif
1344  insertInstrBefore(I, AMDGPU::ENDIF);
1345 
1346  // put initReg = 2 to other predecessors of landBlk
1347  for (MachineBasicBlock::pred_iterator PI = LandBlk->pred_begin(),
1348  PE = LandBlk->pred_end(); PI != PE; ++PI) {
1349  MachineBasicBlock *MBB = *PI;
1350  if (MBB != TrueMBB && MBB != FalseMBB)
1351  report_fatal_error("Extra register needed to handle CFG");
1352  }
1353  }
1354  LLVM_DEBUG(
1355  dbgs() << "result from improveSimpleJumpintoIf: ";
1356  showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0););
1357 
1358  // update landBlk
1359  *LandMBBPtr = LandBlk;
1360 
1361  return NumNewBlk;
1362 }
1363 
1364 void AMDGPUCFGStructurizer::mergeSerialBlock(MachineBasicBlock *DstMBB,
1365  MachineBasicBlock *SrcMBB) {
1366  LLVM_DEBUG(dbgs() << "serialPattern BB" << DstMBB->getNumber() << " <= BB"
1367  << SrcMBB->getNumber() << "\n";);
1368  DstMBB->splice(DstMBB->end(), SrcMBB, SrcMBB->begin(), SrcMBB->end());
1369 
1370  DstMBB->removeSuccessor(SrcMBB, true);
1371  cloneSuccessorList(DstMBB, SrcMBB);
1372 
1373  removeSuccessor(SrcMBB);
1374  MLI->removeBlock(SrcMBB);
1375  retireBlock(SrcMBB);
1376 }
1377 
1378 void AMDGPUCFGStructurizer::mergeIfthenelseBlock(MachineInstr *BranchMI,
1379  MachineBasicBlock *MBB, MachineBasicBlock *TrueMBB,
1380  MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB) {
1381  assert (TrueMBB);
1382  LLVM_DEBUG(dbgs() << "ifPattern BB" << MBB->getNumber(); dbgs() << "{ ";
1383  if (TrueMBB) { dbgs() << "BB" << TrueMBB->getNumber(); } dbgs()
1384  << " } else ";
1385  dbgs() << "{ "; if (FalseMBB) {
1386  dbgs() << "BB" << FalseMBB->getNumber();
1387  } dbgs() << " }\n ";
1388  dbgs() << "landBlock: "; if (!LandMBB) { dbgs() << "NULL"; } else {
1389  dbgs() << "BB" << LandMBB->getNumber();
1390  } dbgs() << "\n";);
1391 
1392  int OldOpcode = BranchMI->getOpcode();
1393  DebugLoc BranchDL = BranchMI->getDebugLoc();
1394 
1395 // transform to
1396 // if cond
1397 // trueBlk
1398 // else
1399 // falseBlk
1400 // endif
1401 // landBlk
1402 
1403  MachineBasicBlock::iterator I = BranchMI;
1404  insertCondBranchBefore(I, getBranchNzeroOpcode(OldOpcode),
1405  BranchDL);
1406 
1407  if (TrueMBB) {
1408  MBB->splice(I, TrueMBB, TrueMBB->begin(), TrueMBB->end());
1409  MBB->removeSuccessor(TrueMBB, true);
1410  if (LandMBB && TrueMBB->succ_size()!=0)
1411  TrueMBB->removeSuccessor(LandMBB, true);
1412  retireBlock(TrueMBB);
1413  MLI->removeBlock(TrueMBB);
1414  }
1415 
1416  if (FalseMBB) {
1417  insertInstrBefore(I, AMDGPU::ELSE);
1418  MBB->splice(I, FalseMBB, FalseMBB->begin(),
1419  FalseMBB->end());
1420  MBB->removeSuccessor(FalseMBB, true);
1421  if (LandMBB && FalseMBB->succ_size() != 0)
1422  FalseMBB->removeSuccessor(LandMBB, true);
1423  retireBlock(FalseMBB);
1424  MLI->removeBlock(FalseMBB);
1425  }
1426  insertInstrBefore(I, AMDGPU::ENDIF);
1427 
1428  BranchMI->eraseFromParent();
1429 
1430  if (LandMBB && TrueMBB && FalseMBB)
1431  MBB->addSuccessor(LandMBB);
1432 }
1433 
1434 void AMDGPUCFGStructurizer::mergeLooplandBlock(MachineBasicBlock *DstBlk,
1435  MachineBasicBlock *LandMBB) {
1436  LLVM_DEBUG(dbgs() << "loopPattern header = BB" << DstBlk->getNumber()
1437  << " land = BB" << LandMBB->getNumber() << "\n";);
1438 
1439  insertInstrBefore(DstBlk, AMDGPU::WHILELOOP, DebugLoc());
1440  insertInstrEnd(DstBlk, AMDGPU::ENDLOOP, DebugLoc());
1441  DstBlk->replaceSuccessor(DstBlk, LandMBB);
1442 }
1443 
1444 void AMDGPUCFGStructurizer::mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB,
1445  MachineBasicBlock *LandMBB) {
1446  LLVM_DEBUG(dbgs() << "loopbreakPattern exiting = BB"
1447  << ExitingMBB->getNumber() << " land = BB"
1448  << LandMBB->getNumber() << "\n";);
1449  MachineInstr *BranchMI = getLoopendBlockBranchInstr(ExitingMBB);
1450  assert(BranchMI && isCondBranch(BranchMI));
1451  DebugLoc DL = BranchMI->getDebugLoc();
1452  MachineBasicBlock *TrueBranch = getTrueBranch(BranchMI);
1453  MachineBasicBlock::iterator I = BranchMI;
1454  if (TrueBranch != LandMBB)
1455  reversePredicateSetter(I, *I->getParent());
1456  insertCondBranchBefore(ExitingMBB, I, AMDGPU::IF_PREDICATE_SET, AMDGPU::PREDICATE_BIT, DL);
1457  insertInstrBefore(I, AMDGPU::BREAK);
1458  insertInstrBefore(I, AMDGPU::ENDIF);
1459  //now branchInst can be erase safely
1460  BranchMI->eraseFromParent();
1461  //now take care of successors, retire blocks
1462  ExitingMBB->removeSuccessor(LandMBB, true);
1463 }
1464 
1465 void AMDGPUCFGStructurizer::settleLoopcontBlock(MachineBasicBlock *ContingMBB,
1466  MachineBasicBlock *ContMBB) {
1467  LLVM_DEBUG(dbgs() << "settleLoopcontBlock conting = BB"
1468  << ContingMBB->getNumber() << ", cont = BB"
1469  << ContMBB->getNumber() << "\n";);
1470 
1471  MachineInstr *MI = getLoopendBlockBranchInstr(ContingMBB);
1472  if (MI) {
1473  assert(isCondBranch(MI));
1475  MachineBasicBlock *TrueBranch = getTrueBranch(MI);
1476  int OldOpcode = MI->getOpcode();
1477  DebugLoc DL = MI->getDebugLoc();
1478 
1479  bool UseContinueLogical = ((&*ContingMBB->rbegin()) == MI);
1480 
1481  if (!UseContinueLogical) {
1482  int BranchOpcode =
1483  TrueBranch == ContMBB ? getBranchNzeroOpcode(OldOpcode) :
1484  getBranchZeroOpcode(OldOpcode);
1485  insertCondBranchBefore(I, BranchOpcode, DL);
1486  // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
1487  insertInstrEnd(ContingMBB, AMDGPU::CONTINUE, DL);
1488  insertInstrEnd(ContingMBB, AMDGPU::ENDIF, DL);
1489  } else {
1490  int BranchOpcode =
1491  TrueBranch == ContMBB ? getContinueNzeroOpcode(OldOpcode) :
1492  getContinueZeroOpcode(OldOpcode);
1493  insertCondBranchBefore(I, BranchOpcode, DL);
1494  }
1495 
1496  MI->eraseFromParent();
1497  } else {
1498  // if we've arrived here then we've already erased the branch instruction
1499  // travel back up the basic block to see the last reference of our debug
1500  // location we've just inserted that reference here so it should be
1501  // representative insertEnd to ensure phi-moves, if exist, go before the
1502  // continue-instr.
1503  insertInstrEnd(ContingMBB, AMDGPU::CONTINUE,
1504  getLastDebugLocInBB(ContingMBB));
1505  }
1506 }
1507 
1508 int AMDGPUCFGStructurizer::cloneOnSideEntryTo(MachineBasicBlock *PreMBB,
1509  MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB) {
1510  int Cloned = 0;
1511  assert(PreMBB->isSuccessor(SrcMBB));
1512  while (SrcMBB && SrcMBB != DstMBB) {
1513  assert(SrcMBB->succ_size() == 1);
1514  if (SrcMBB->pred_size() > 1) {
1515  SrcMBB = cloneBlockForPredecessor(SrcMBB, PreMBB);
1516  ++Cloned;
1517  }
1518 
1519  PreMBB = SrcMBB;
1520  SrcMBB = *SrcMBB->succ_begin();
1521  }
1522 
1523  return Cloned;
1524 }
1525 
1527 AMDGPUCFGStructurizer::cloneBlockForPredecessor(MachineBasicBlock *MBB,
1528  MachineBasicBlock *PredMBB) {
1529  assert(PredMBB->isSuccessor(MBB) &&
1530  "succBlk is not a prececessor of curBlk");
1531 
1532  MachineBasicBlock *CloneMBB = clone(MBB); //clone instructions
1533  replaceInstrUseOfBlockWith(PredMBB, MBB, CloneMBB);
1534  //srcBlk, oldBlk, newBlk
1535 
1536  PredMBB->replaceSuccessor(MBB, CloneMBB);
1537 
1538  // add all successor to cloneBlk
1539  cloneSuccessorList(CloneMBB, MBB);
1540 
1541  numClonedInstr += MBB->size();
1542 
1543  LLVM_DEBUG(dbgs() << "Cloned block: "
1544  << "BB" << MBB->getNumber() << "size " << MBB->size()
1545  << "\n";);
1546 
1547  SHOWNEWBLK(CloneMBB, "result of Cloned block: ");
1548 
1549  return CloneMBB;
1550 }
1551 
1552 void AMDGPUCFGStructurizer::migrateInstruction(MachineBasicBlock *SrcMBB,
1554  MachineBasicBlock::iterator SpliceEnd;
1555  //look for the input branchinstr, not the AMDGPU branchinstr
1556  MachineInstr *BranchMI = getNormalBlockBranchInstr(SrcMBB);
1557  if (!BranchMI) {
1558  LLVM_DEBUG(dbgs() << "migrateInstruction don't see branch instr\n";);
1559  SpliceEnd = SrcMBB->end();
1560  } else {
1561  LLVM_DEBUG(dbgs() << "migrateInstruction see branch instr: " << *BranchMI);
1562  SpliceEnd = BranchMI;
1563  }
1564  LLVM_DEBUG(dbgs() << "migrateInstruction before splice dstSize = "
1565  << DstMBB->size() << "srcSize = " << SrcMBB->size()
1566  << "\n";);
1567 
1568  //splice insert before insertPos
1569  DstMBB->splice(I, SrcMBB, SrcMBB->begin(), SpliceEnd);
1570 
1571  LLVM_DEBUG(dbgs() << "migrateInstruction after splice dstSize = "
1572  << DstMBB->size() << "srcSize = " << SrcMBB->size()
1573  << '\n';);
1574 }
1575 
1577 AMDGPUCFGStructurizer::normalizeInfiniteLoopExit(MachineLoop* LoopRep) {
1578  MachineBasicBlock *LoopHeader = LoopRep->getHeader();
1579  MachineBasicBlock *LoopLatch = LoopRep->getLoopLatch();
1580 
1581  if (!LoopHeader || !LoopLatch)
1582  return nullptr;
1583  MachineInstr *BranchMI = getLoopendBlockBranchInstr(LoopLatch);
1584  // Is LoopRep an infinite loop ?
1585  if (!BranchMI || !isUncondBranch(BranchMI))
1586  return nullptr;
1587 
1588  MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock();
1589  FuncRep->push_back(DummyExitBlk); //insert to function
1590  SHOWNEWBLK(DummyExitBlk, "DummyExitBlock to normalize infiniteLoop: ");
1591  LLVM_DEBUG(dbgs() << "Old branch instr: " << *BranchMI << "\n";);
1592  LLVMContext &Ctx = LoopHeader->getParent()->getFunction().getContext();
1593  Ctx.emitError("Extra register needed to handle CFG");
1594  return nullptr;
1595 }
1596 
1597 void AMDGPUCFGStructurizer::removeUnconditionalBranch(MachineBasicBlock *MBB) {
1598  MachineInstr *BranchMI;
1599 
1600  // I saw two unconditional branch in one basic block in example
1601  // test_fc_do_while_or.c need to fix the upstream on this to remove the loop.
1602  while ((BranchMI = getLoopendBlockBranchInstr(MBB))
1603  && isUncondBranch(BranchMI)) {
1604  LLVM_DEBUG(dbgs() << "Removing uncond branch instr: " << *BranchMI);
1605  BranchMI->eraseFromParent();
1606  }
1607 }
1608 
1609 void AMDGPUCFGStructurizer::removeRedundantConditionalBranch(
1610  MachineBasicBlock *MBB) {
1611  if (MBB->succ_size() != 2)
1612  return;
1613  MachineBasicBlock *MBB1 = *MBB->succ_begin();
1614  MachineBasicBlock *MBB2 = *std::next(MBB->succ_begin());
1615  if (MBB1 != MBB2)
1616  return;
1617 
1618  MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB);
1619  assert(BranchMI && isCondBranch(BranchMI));
1620  LLVM_DEBUG(dbgs() << "Removing unneeded cond branch instr: " << *BranchMI);
1621  BranchMI->eraseFromParent();
1622  SHOWNEWBLK(MBB1, "Removing redundant successor");
1623  MBB->removeSuccessor(MBB1, true);
1624 }
1625 
1626 void AMDGPUCFGStructurizer::addDummyExitBlock(
1628  MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock();
1629  FuncRep->push_back(DummyExitBlk); //insert to function
1630  insertInstrEnd(DummyExitBlk, AMDGPU::RETURN);
1631 
1633  E = RetMBB.end(); It != E; ++It) {
1634  MachineBasicBlock *MBB = *It;
1635  MachineInstr *MI = getReturnInstr(MBB);
1636  if (MI)
1637  MI->eraseFromParent();
1638  MBB->addSuccessor(DummyExitBlk);
1639  LLVM_DEBUG(dbgs() << "Add dummyExitBlock to BB" << MBB->getNumber()
1640  << " successors\n";);
1641  }
1642  SHOWNEWBLK(DummyExitBlk, "DummyExitBlock: ");
1643 }
1644 
1645 void AMDGPUCFGStructurizer::removeSuccessor(MachineBasicBlock *MBB) {
1646  while (MBB->succ_size())
1647  MBB->removeSuccessor(*MBB->succ_begin());
1648 }
1649 
1650 void AMDGPUCFGStructurizer::recordSccnum(MachineBasicBlock *MBB,
1651  int SccNum) {
1652  BlockInformation *&srcBlkInfo = BlockInfoMap[MBB];
1653  if (!srcBlkInfo)
1654  srcBlkInfo = new BlockInformation();
1655  srcBlkInfo->SccNum = SccNum;
1656 }
1657 
1658 void AMDGPUCFGStructurizer::retireBlock(MachineBasicBlock *MBB) {
1659  LLVM_DEBUG(dbgs() << "Retiring BB" << MBB->getNumber() << "\n";);
1660 
1661  BlockInformation *&SrcBlkInfo = BlockInfoMap[MBB];
1662 
1663  if (!SrcBlkInfo)
1664  SrcBlkInfo = new BlockInformation();
1665 
1666  SrcBlkInfo->IsRetired = true;
1667  assert(MBB->succ_size() == 0 && MBB->pred_size() == 0
1668  && "can't retire block yet");
1669 }
1670 
1671 INITIALIZE_PASS_BEGIN(AMDGPUCFGStructurizer, "amdgpustructurizer",
1672  "AMDGPU CFG Structurizer", false, false)
1676 INITIALIZE_PASS_END(AMDGPUCFGStructurizer, "amdgpustructurizer",
1678 
1680  return new AMDGPUCFGStructurizer();
1681 }
void print(raw_ostream &OS, const SlotIndexes *=nullptr, bool IsStandalone=true) const
INITIALIZE_PASS_BEGIN(AMDGPUCFGStructurizer, "amdgpustructurizer", "AMDGPU CFG Structurizer", false, false) INITIALIZE_PASS_END(AMDGPUCFGStructurizer
This builds on the llvm/ADT/GraphTraits.h file to find the strongly connected components (SCCs) of a ...
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:157
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
AMDGPU specific subclass of TargetSubtarget.
typename SuperClass::const_iterator const_iterator
Definition: SmallVector.h:328
MachineBasicBlock * getMBB() const
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:115
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
Interface definition for R600InstrInfo.
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:63
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:137
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:285
AMDGPU CFG Structurizer
unsigned getReg() const
getReg - Returns the register number.
STATISTIC(NumFunctions, "Total number of functions")
unsigned const TargetRegisterInfo * TRI
A debug info location.
Definition: DebugLoc.h:34
FunctionPass * createAMDGPUCFGStructurizerPass()
Interface definition for R600RegisterInfo.
void dump() const
dump - Print the current MachineFunction to cerr, useful for debugger use.
MachineInstr * CreateMachineInstr(const MCInstrDesc &MCID, const DebugLoc &DL, bool NoImp=false)
CreateMachineInstr - Allocate a new MachineInstr.
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
#define SHOWNEWINSTR(i)
void initializeAMDGPUCFGStructurizerPass(PassRegistry &)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
static int getSCCNum(const BasicBlock *BB, const BranchProbabilityInfo::SccInfo &SccI)
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:311
BlockT * getHeader() const
Definition: LoopInfo.h:100
scc_iterator< T > scc_begin(const T &G)
Construct the begin iterator for a deduced graph type T.
Definition: SCCIterator.h:226
void getExitBlocks(SmallVectorImpl< BlockT *> &ExitBlocks) const
Return all of the successor blocks of this loop.
Definition: LoopInfoImpl.h:63
void emitError(unsigned LocCookie, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
std::vector< MachineLoop *>::const_iterator iterator
Definition: LoopInfo.h:139
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they&#39;re not in a MachineFuncti...
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
reverse_iterator rend()
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
reverse_iterator rbegin()
iterator begin() const
#define INVALIDSCCNUM
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:69
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:117
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
#define SHOWNEWBLK(b, msg)
void setMBB(MachineBasicBlock *MBB)
Represent the analysis usage information of a pass.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
std::vector< MachineBasicBlock * >::iterator pred_iterator
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:193
void getExitingBlocks(SmallVectorImpl< BlockT *> &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
Definition: LoopInfoImpl.h:35
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
size_type size() const
Definition: SmallPtrSet.h:93
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:110
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:861
unsigned pred_size() const
const Function & getFunction() const
Return the LLVM function that this machine code represents.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:924
typename SuperClass::iterator iterator
Definition: SmallVector.h:327
void replaceSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New)
Replace successor OLD with NEW and update probability info.
iterator end() const
unsigned succ_size() const
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:156
LoopT * getParentLoop() const
Definition: LoopInfo.h:101
Provides AMDGPU specific target descriptions.
Representation of each machine instruction.
Definition: MachineInstr.h:60
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:121
Flatten the CFG
iterator_range< typename GraphTraits< GraphType >::nodes_iterator > nodes(const GraphType &G)
Definition: GraphTraits.h:102
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB &#39;Other&#39; at the position From, and insert it into this MBB right before &#39;...
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:62
void push_back(MachineInstr *MI)
#define I(x, y, z)
Definition: MD5.cpp:58
LoopInfoBase< MachineBasicBlock, MachineLoop >::iterator iterator
The iterator interface to the top-level loops in the current function.
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
iterator_range< df_iterator< T > > depth_first(const T &G)
bool isEmpty() const
isEmpty - Return true if there are no jump tables.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void push_back(MachineBasicBlock *MBB)
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
static uint32_t blk(uint32_t *Buf, int I)
Definition: SHA1.cpp:36
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:39
#define LLVM_DEBUG(X)
Definition: Debug.h:119
const MachineJumpTableInfo * getJumpTableInfo() const
getJumpTableInfo - Return the jump table info object for the current function.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:316
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
std::vector< MachineBasicBlock * >::iterator succ_iterator
unsigned createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Enumerate the SCCs of a directed graph in reverse topological order of the SCC DAG.
Definition: SCCIterator.h:43