LLVM  14.0.0git
AMDILCFGStructurizer.cpp
Go to the documentation of this file.
1 //===- AMDILCFGStructurizer.cpp - CFG Structurizer ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //==-----------------------------------------------------------------------===//
8 
10 #include "R600.h"
11 #include "R600RegisterInfo.h"
12 #include "R600Subtarget.h"
13 #include "llvm/ADT/SCCIterator.h"
14 #include "llvm/ADT/Statistic.h"
20 #include "llvm/InitializePasses.h"
21 
22 using namespace llvm;
23 
24 #define DEBUG_TYPE "structcfg"
25 
26 #define DEFAULT_VEC_SLOTS 8
27 
28 // TODO: move-begin.
29 
30 //===----------------------------------------------------------------------===//
31 //
32 // Statistics for CFGStructurizer.
33 //
34 //===----------------------------------------------------------------------===//
35 
36 STATISTIC(numSerialPatternMatch, "CFGStructurizer number of serial pattern "
37  "matched");
38 STATISTIC(numIfPatternMatch, "CFGStructurizer number of if pattern "
39  "matched");
40 STATISTIC(numClonedBlock, "CFGStructurizer cloned blocks");
41 STATISTIC(numClonedInstr, "CFGStructurizer cloned instructions");
42 
43 namespace llvm {
44 
46 
47 } // end namespace llvm
48 
49 namespace {
50 
51 //===----------------------------------------------------------------------===//
52 //
53 // Miscellaneous utility for CFGStructurizer.
54 //
55 //===----------------------------------------------------------------------===//
56 
57 #define SHOWNEWINSTR(i) LLVM_DEBUG(dbgs() << "New instr: " << *i << "\n");
58 
59 #define SHOWNEWBLK(b, msg) \
60  LLVM_DEBUG(dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
61  dbgs() << "\n";);
62 
63 #define SHOWBLK_DETAIL(b, msg) \
64  LLVM_DEBUG(if (b) { \
65  dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
66  b->print(dbgs()); \
67  dbgs() << "\n"; \
68  });
69 
70 #define INVALIDSCCNUM -1
71 
72 //===----------------------------------------------------------------------===//
73 //
74 // supporting data structure for CFGStructurizer
75 //
76 //===----------------------------------------------------------------------===//
77 
78 class BlockInformation {
79 public:
80  bool IsRetired = false;
81  int SccNum = INVALIDSCCNUM;
82 
83  BlockInformation() = default;
84 };
85 
86 //===----------------------------------------------------------------------===//
87 //
88 // CFGStructurizer
89 //
90 //===----------------------------------------------------------------------===//
91 
92 class AMDGPUCFGStructurizer : public MachineFunctionPass {
93 public:
95  using MBBInfoMap = std::map<MachineBasicBlock *, BlockInformation *>;
96  using LoopLandInfoMap = std::map<MachineLoop *, MachineBasicBlock *>;
97 
98  enum PathToKind {
99  Not_SinglePath = 0,
100  SinglePath_InPath = 1,
101  SinglePath_NotInPath = 2
102  };
103 
104  static char ID;
105 
106  AMDGPUCFGStructurizer() : MachineFunctionPass(ID) {
108  }
109 
110  StringRef getPassName() const override {
111  return "AMDGPU Control Flow Graph structurizer Pass";
112  }
113 
114  void getAnalysisUsage(AnalysisUsage &AU) const override {
119  }
120 
121  /// Perform the CFG structurization
122  bool run();
123 
124  /// Perform the CFG preparation
125  /// This step will remove every unconditionnal/dead jump instructions and make
126  /// sure all loops have an exit block
127  bool prepare();
128 
129  bool runOnMachineFunction(MachineFunction &MF) override {
130  // FIXME: This pass causes verification failures.
131  MF.getProperties().set(
133 
134  TII = MF.getSubtarget<R600Subtarget>().getInstrInfo();
135  TRI = &TII->getRegisterInfo();
136  LLVM_DEBUG(MF.dump(););
137  OrderedBlks.clear();
138  Visited.clear();
139  FuncRep = &MF;
140  MLI = &getAnalysis<MachineLoopInfo>();
141  LLVM_DEBUG(dbgs() << "LoopInfo:\n"; PrintLoopinfo(*MLI););
142  MDT = &getAnalysis<MachineDominatorTree>();
143  LLVM_DEBUG(MDT->print(dbgs(), (const Module *)nullptr););
144  PDT = &getAnalysis<MachinePostDominatorTree>();
145  LLVM_DEBUG(PDT->print(dbgs()););
146  prepare();
147  run();
148  LLVM_DEBUG(MF.dump(););
149  return true;
150  }
151 
152 protected:
155  MachineLoopInfo *MLI;
156  const R600InstrInfo *TII = nullptr;
157  const R600RegisterInfo *TRI = nullptr;
158 
159  // PRINT FUNCTIONS
160  /// Print the ordered Blocks.
161  void printOrderedBlocks() const {
162  size_t i = 0;
163  for (MBBVector::const_iterator iterBlk = OrderedBlks.begin(),
164  iterBlkEnd = OrderedBlks.end(); iterBlk != iterBlkEnd; ++iterBlk, ++i) {
165  dbgs() << "BB" << (*iterBlk)->getNumber();
166  dbgs() << "(" << getSCCNum(*iterBlk) << "," << (*iterBlk)->size() << ")";
167  if (i != 0 && i % 10 == 0) {
168  dbgs() << "\n";
169  } else {
170  dbgs() << " ";
171  }
172  }
173  }
174 
175  static void PrintLoopinfo(const MachineLoopInfo &LoopInfo) {
176  for (const MachineLoop *L : LoopInfo)
177  L->print(dbgs());
178  }
179 
180  // UTILITY FUNCTIONS
181  int getSCCNum(MachineBasicBlock *MBB) const;
182  MachineBasicBlock *getLoopLandInfo(MachineLoop *LoopRep) const;
183  bool hasBackEdge(MachineBasicBlock *MBB) const;
184  bool isRetiredBlock(MachineBasicBlock *MBB) const;
185  bool isActiveLoophead(MachineBasicBlock *MBB) const;
186  PathToKind singlePathTo(MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB,
187  bool AllowSideEntry = true) const;
188  int countActiveBlock(MBBVector::const_iterator It,
190  bool needMigrateBlock(MachineBasicBlock *MBB) const;
191 
192  // Utility Functions
193  void reversePredicateSetter(MachineBasicBlock::iterator I,
195  /// Compute the reversed DFS post order of Blocks
196  void orderBlocks(MachineFunction *MF);
197 
198  // Function originally from CFGStructTraits
199  void insertInstrEnd(MachineBasicBlock *MBB, int NewOpcode,
200  const DebugLoc &DL = DebugLoc());
201  MachineInstr *insertInstrBefore(MachineBasicBlock *MBB, int NewOpcode,
202  const DebugLoc &DL = DebugLoc());
203  MachineInstr *insertInstrBefore(MachineBasicBlock::iterator I, int NewOpcode);
204  void insertCondBranchBefore(MachineBasicBlock::iterator I, int NewOpcode,
205  const DebugLoc &DL);
206  void insertCondBranchBefore(MachineBasicBlock *MBB,
207  MachineBasicBlock::iterator I, int NewOpcode,
208  int RegNum, const DebugLoc &DL);
209 
210  static int getBranchNzeroOpcode(int OldOpcode);
211  static int getBranchZeroOpcode(int OldOpcode);
212  static int getContinueNzeroOpcode(int OldOpcode);
213  static int getContinueZeroOpcode(int OldOpcode);
214  static MachineBasicBlock *getTrueBranch(MachineInstr *MI);
215  static void setTrueBranch(MachineInstr *MI, MachineBasicBlock *MBB);
216  static MachineBasicBlock *getFalseBranch(MachineBasicBlock *MBB,
217  MachineInstr *MI);
218  static bool isCondBranch(MachineInstr *MI);
219  static bool isUncondBranch(MachineInstr *MI);
220  static DebugLoc getLastDebugLocInBB(MachineBasicBlock *MBB);
221  static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *MBB);
222 
223  /// The correct naming for this is getPossibleLoopendBlockBranchInstr.
224  ///
225  /// BB with backward-edge could have move instructions after the branch
226  /// instruction. Such move instruction "belong to" the loop backward-edge.
227  MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *MBB);
228 
229  static MachineInstr *getReturnInstr(MachineBasicBlock *MBB);
230  static bool isReturnBlock(MachineBasicBlock *MBB);
231  static void cloneSuccessorList(MachineBasicBlock *DstMBB,
232  MachineBasicBlock *SrcMBB);
233  static MachineBasicBlock *clone(MachineBasicBlock *MBB);
234 
235  /// MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose
236  /// because the AMDGPU instruction is not recognized as terminator fix this
237  /// and retire this routine
238  void replaceInstrUseOfBlockWith(MachineBasicBlock *SrcMBB,
239  MachineBasicBlock *OldMBB, MachineBasicBlock *NewBlk);
240 
241  static void wrapup(MachineBasicBlock *MBB);
242 
243  int patternMatch(MachineBasicBlock *MBB);
244  int patternMatchGroup(MachineBasicBlock *MBB);
245  int serialPatternMatch(MachineBasicBlock *MBB);
246  int ifPatternMatch(MachineBasicBlock *MBB);
247  int loopendPatternMatch();
248  int mergeLoop(MachineLoop *LoopRep);
249 
250  /// return true iff src1Blk->succ_empty() && src1Blk and src2Blk are in
251  /// the same loop with LoopLandInfo without explicitly keeping track of
252  /// loopContBlks and loopBreakBlks, this is a method to get the information.
253  bool isSameloopDetachedContbreak(MachineBasicBlock *Src1MBB,
254  MachineBasicBlock *Src2MBB);
255  int handleJumpintoIf(MachineBasicBlock *HeadMBB,
256  MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB);
257  int handleJumpintoIfImp(MachineBasicBlock *HeadMBB,
258  MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB);
259  int improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
260  MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB,
261  MachineBasicBlock **LandMBBPtr);
262  void showImproveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
263  MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB,
264  MachineBasicBlock *LandMBB, bool Detail = false);
265  int cloneOnSideEntryTo(MachineBasicBlock *PreMBB,
266  MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB);
267  void mergeSerialBlock(MachineBasicBlock *DstMBB,
268  MachineBasicBlock *SrcMBB);
269 
270  void mergeIfthenelseBlock(MachineInstr *BranchMI,
272  MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB);
273  void mergeLooplandBlock(MachineBasicBlock *DstMBB,
274  MachineBasicBlock *LandMBB);
275  void mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB,
276  MachineBasicBlock *LandMBB);
277  void settleLoopcontBlock(MachineBasicBlock *ContingMBB,
278  MachineBasicBlock *ContMBB);
279 
280  /// normalizeInfiniteLoopExit change
281  /// B1:
282  /// uncond_br LoopHeader
283  ///
284  /// to
285  /// B1:
286  /// cond_br 1 LoopHeader dummyExit
287  /// and return the newly added dummy exit block
288  MachineBasicBlock *normalizeInfiniteLoopExit(MachineLoop *LoopRep);
289  void removeUnconditionalBranch(MachineBasicBlock *MBB);
290 
291  /// Remove duplicate branches instructions in a block.
292  /// For instance
293  /// B0:
294  /// cond_br X B1 B2
295  /// cond_br X B1 B2
296  /// is transformed to
297  /// B0:
298  /// cond_br X B1 B2
299  void removeRedundantConditionalBranch(MachineBasicBlock *MBB);
300 
301  void addDummyExitBlock(SmallVectorImpl<MachineBasicBlock *> &RetMBB);
302  void removeSuccessor(MachineBasicBlock *MBB);
303  MachineBasicBlock *cloneBlockForPredecessor(MachineBasicBlock *MBB,
304  MachineBasicBlock *PredMBB);
305  void migrateInstruction(MachineBasicBlock *SrcMBB,
307  void recordSccnum(MachineBasicBlock *MBB, int SCCNum);
308  void retireBlock(MachineBasicBlock *MBB);
309 
310 private:
311  MBBInfoMap BlockInfoMap;
312  LoopLandInfoMap LLInfoMap;
313  std::map<MachineLoop *, bool> Visited;
314  MachineFunction *FuncRep;
316 };
317 
318 } // end anonymous namespace
319 
321 
322 int AMDGPUCFGStructurizer::getSCCNum(MachineBasicBlock *MBB) const {
323  MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB);
324  if (It == BlockInfoMap.end())
325  return INVALIDSCCNUM;
326  return (*It).second->SccNum;
327 }
328 
329 MachineBasicBlock *AMDGPUCFGStructurizer::getLoopLandInfo(MachineLoop *LoopRep)
330  const {
331  LoopLandInfoMap::const_iterator It = LLInfoMap.find(LoopRep);
332  if (It == LLInfoMap.end())
333  return nullptr;
334  return (*It).second;
335 }
336 
337 bool AMDGPUCFGStructurizer::hasBackEdge(MachineBasicBlock *MBB) const {
338  MachineLoop *LoopRep = MLI->getLoopFor(MBB);
339  if (!LoopRep)
340  return false;
341  MachineBasicBlock *LoopHeader = LoopRep->getHeader();
342  return MBB->isSuccessor(LoopHeader);
343 }
344 
345 bool AMDGPUCFGStructurizer::isRetiredBlock(MachineBasicBlock *MBB) const {
346  MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB);
347  if (It == BlockInfoMap.end())
348  return false;
349  return (*It).second->IsRetired;
350 }
351 
352 bool AMDGPUCFGStructurizer::isActiveLoophead(MachineBasicBlock *MBB) const {
353  MachineLoop *LoopRep = MLI->getLoopFor(MBB);
354  while (LoopRep && LoopRep->getHeader() == MBB) {
355  MachineBasicBlock *LoopLand = getLoopLandInfo(LoopRep);
356  if(!LoopLand)
357  return true;
358  if (!isRetiredBlock(LoopLand))
359  return true;
360  LoopRep = LoopRep->getParentLoop();
361  }
362  return false;
363 }
364 
365 AMDGPUCFGStructurizer::PathToKind AMDGPUCFGStructurizer::singlePathTo(
366  MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB,
367  bool AllowSideEntry) const {
368  assert(DstMBB);
369  if (SrcMBB == DstMBB)
370  return SinglePath_InPath;
371  while (SrcMBB && SrcMBB->succ_size() == 1) {
372  SrcMBB = *SrcMBB->succ_begin();
373  if (SrcMBB == DstMBB)
374  return SinglePath_InPath;
375  if (!AllowSideEntry && SrcMBB->pred_size() > 1)
376  return Not_SinglePath;
377  }
378  if (SrcMBB && SrcMBB->succ_size()==0)
379  return SinglePath_NotInPath;
380  return Not_SinglePath;
381 }
382 
383 int AMDGPUCFGStructurizer::countActiveBlock(MBBVector::const_iterator It,
385  int Count = 0;
386  while (It != E) {
387  if (!isRetiredBlock(*It))
388  ++Count;
389  ++It;
390  }
391  return Count;
392 }
393 
394 bool AMDGPUCFGStructurizer::needMigrateBlock(MachineBasicBlock *MBB) const {
395  unsigned BlockSizeThreshold = 30;
396  unsigned CloneInstrThreshold = 100;
397  bool MultiplePreds = MBB && (MBB->pred_size() > 1);
398 
399  if(!MultiplePreds)
400  return false;
401  unsigned BlkSize = MBB->size();
402  return ((BlkSize > BlockSizeThreshold) &&
403  (BlkSize * (MBB->pred_size() - 1) > CloneInstrThreshold));
404 }
405 
406 void AMDGPUCFGStructurizer::reversePredicateSetter(
408  assert(I.isValid() && "Expected valid iterator");
409  for (;; --I) {
410  if (I == MBB.end())
411  continue;
412  if (I->getOpcode() == R600::PRED_X) {
413  switch (I->getOperand(2).getImm()) {
414  case R600::PRED_SETE_INT:
415  I->getOperand(2).setImm(R600::PRED_SETNE_INT);
416  return;
417  case R600::PRED_SETNE_INT:
418  I->getOperand(2).setImm(R600::PRED_SETE_INT);
419  return;
420  case R600::PRED_SETE:
421  I->getOperand(2).setImm(R600::PRED_SETNE);
422  return;
423  case R600::PRED_SETNE:
424  I->getOperand(2).setImm(R600::PRED_SETE);
425  return;
426  default:
427  llvm_unreachable("PRED_X Opcode invalid!");
428  }
429  }
430  }
431 }
432 
433 void AMDGPUCFGStructurizer::insertInstrEnd(MachineBasicBlock *MBB,
434  int NewOpcode, const DebugLoc &DL) {
435  MachineInstr *MI =
436  MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DL);
437  MBB->push_back(MI);
438  //assume the instruction doesn't take any reg operand ...
439  SHOWNEWINSTR(MI);
440 }
441 
442 MachineInstr *AMDGPUCFGStructurizer::insertInstrBefore(MachineBasicBlock *MBB,
443  int NewOpcode,
444  const DebugLoc &DL) {
445  MachineInstr *MI =
446  MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DL);
447  if (!MBB->empty())
448  MBB->insert(MBB->begin(), MI);
449  else
450  MBB->push_back(MI);
451  SHOWNEWINSTR(MI);
452  return MI;
453 }
454 
455 MachineInstr *AMDGPUCFGStructurizer::insertInstrBefore(
456  MachineBasicBlock::iterator I, int NewOpcode) {
457  MachineInstr *OldMI = &(*I);
458  MachineBasicBlock *MBB = OldMI->getParent();
459  MachineInstr *NewMBB =
460  MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DebugLoc());
461  MBB->insert(I, NewMBB);
462  //assume the instruction doesn't take any reg operand ...
463  SHOWNEWINSTR(NewMBB);
464  return NewMBB;
465 }
466 
467 void AMDGPUCFGStructurizer::insertCondBranchBefore(
468  MachineBasicBlock::iterator I, int NewOpcode, const DebugLoc &DL) {
469  MachineInstr *OldMI = &(*I);
470  MachineBasicBlock *MBB = OldMI->getParent();
471  MachineFunction *MF = MBB->getParent();
472  MachineInstr *NewMI = MF->CreateMachineInstr(TII->get(NewOpcode), DL);
473  MBB->insert(I, NewMI);
474  MachineInstrBuilder MIB(*MF, NewMI);
475  MIB.addReg(OldMI->getOperand(1).getReg(), false);
476  SHOWNEWINSTR(NewMI);
477  //erase later oldInstr->eraseFromParent();
478 }
479 
480 void AMDGPUCFGStructurizer::insertCondBranchBefore(
482  int RegNum, const DebugLoc &DL) {
483  MachineFunction *MF = blk->getParent();
484  MachineInstr *NewInstr = MF->CreateMachineInstr(TII->get(NewOpcode), DL);
485  //insert before
486  blk->insert(I, NewInstr);
487  MachineInstrBuilder(*MF, NewInstr).addReg(RegNum, false);
488  SHOWNEWINSTR(NewInstr);
489 }
490 
491 int AMDGPUCFGStructurizer::getBranchNzeroOpcode(int OldOpcode) {
492  switch(OldOpcode) {
493  case R600::JUMP_COND:
494  case R600::JUMP: return R600::IF_PREDICATE_SET;
495  case R600::BRANCH_COND_i32:
496  case R600::BRANCH_COND_f32: return R600::IF_LOGICALNZ_f32;
497  default: llvm_unreachable("internal error");
498  }
499  return -1;
500 }
501 
502 int AMDGPUCFGStructurizer::getBranchZeroOpcode(int OldOpcode) {
503  switch(OldOpcode) {
504  case R600::JUMP_COND:
505  case R600::JUMP: return R600::IF_PREDICATE_SET;
506  case R600::BRANCH_COND_i32:
507  case R600::BRANCH_COND_f32: return R600::IF_LOGICALZ_f32;
508  default: llvm_unreachable("internal error");
509  }
510  return -1;
511 }
512 
513 int AMDGPUCFGStructurizer::getContinueNzeroOpcode(int OldOpcode) {
514  switch(OldOpcode) {
515  case R600::JUMP_COND:
516  case R600::JUMP: return R600::CONTINUE_LOGICALNZ_i32;
517  default: llvm_unreachable("internal error");
518  }
519  return -1;
520 }
521 
522 int AMDGPUCFGStructurizer::getContinueZeroOpcode(int OldOpcode) {
523  switch(OldOpcode) {
524  case R600::JUMP_COND:
525  case R600::JUMP: return R600::CONTINUE_LOGICALZ_i32;
526  default: llvm_unreachable("internal error");
527  }
528  return -1;
529 }
530 
531 MachineBasicBlock *AMDGPUCFGStructurizer::getTrueBranch(MachineInstr *MI) {
532  return MI->getOperand(0).getMBB();
533 }
534 
535 void AMDGPUCFGStructurizer::setTrueBranch(MachineInstr *MI,
537  MI->getOperand(0).setMBB(MBB);
538 }
539 
541 AMDGPUCFGStructurizer::getFalseBranch(MachineBasicBlock *MBB,
542  MachineInstr *MI) {
543  assert(MBB->succ_size() == 2);
544  MachineBasicBlock *TrueBranch = getTrueBranch(MI);
547  ++Next;
548  return (*It == TrueBranch) ? *Next : *It;
549 }
550 
551 bool AMDGPUCFGStructurizer::isCondBranch(MachineInstr *MI) {
552  switch (MI->getOpcode()) {
553  case R600::JUMP_COND:
554  case R600::BRANCH_COND_i32:
555  case R600::BRANCH_COND_f32: return true;
556  default:
557  return false;
558  }
559  return false;
560 }
561 
562 bool AMDGPUCFGStructurizer::isUncondBranch(MachineInstr *MI) {
563  switch (MI->getOpcode()) {
564  case R600::JUMP:
565  case R600::BRANCH:
566  return true;
567  default:
568  return false;
569  }
570  return false;
571 }
572 
573 DebugLoc AMDGPUCFGStructurizer::getLastDebugLocInBB(MachineBasicBlock *MBB) {
574  //get DebugLoc from the first MachineBasicBlock instruction with debug info
575  DebugLoc DL;
576  for (MachineInstr &MI : *MBB)
577  if (MI.getDebugLoc())
578  DL = MI.getDebugLoc();
579  return DL;
580 }
581 
582 MachineInstr *AMDGPUCFGStructurizer::getNormalBlockBranchInstr(
585  MachineInstr *MI = &*It;
586  if (MI && (isCondBranch(MI) || isUncondBranch(MI)))
587  return MI;
588  return nullptr;
589 }
590 
591 MachineInstr *AMDGPUCFGStructurizer::getLoopendBlockBranchInstr(
594  It != E; ++It) {
595  // FIXME: Simplify
596  MachineInstr *MI = &*It;
597  if (MI) {
598  if (isCondBranch(MI) || isUncondBranch(MI))
599  return MI;
600  else if (!TII->isMov(MI->getOpcode()))
601  break;
602  }
603  }
604  return nullptr;
605 }
606 
607 MachineInstr *AMDGPUCFGStructurizer::getReturnInstr(MachineBasicBlock *MBB) {
609  if (It != MBB->rend()) {
610  MachineInstr *instr = &(*It);
611  if (instr->getOpcode() == R600::RETURN)
612  return instr;
613  }
614  return nullptr;
615 }
616 
617 bool AMDGPUCFGStructurizer::isReturnBlock(MachineBasicBlock *MBB) {
618  MachineInstr *MI = getReturnInstr(MBB);
619  bool IsReturn = MBB->succ_empty();
620  if (MI)
621  assert(IsReturn);
622  else if (IsReturn)
623  LLVM_DEBUG(dbgs() << "BB" << MBB->getNumber()
624  << " is return block without RETURN instr\n";);
625  return IsReturn;
626 }
627 
628 void AMDGPUCFGStructurizer::cloneSuccessorList(MachineBasicBlock *DstMBB,
629  MachineBasicBlock *SrcMBB) {
630  for (MachineBasicBlock *Succ : SrcMBB->successors())
631  DstMBB->addSuccessor(Succ); // *iter's predecessor is also taken care of
632 }
633 
634 MachineBasicBlock *AMDGPUCFGStructurizer::clone(MachineBasicBlock *MBB) {
636  MachineBasicBlock *NewMBB = Func->CreateMachineBasicBlock();
637  Func->push_back(NewMBB); //insert to function
638  for (const MachineInstr &It : *MBB)
639  NewMBB->push_back(Func->CloneMachineInstr(&It));
640  return NewMBB;
641 }
642 
643 void AMDGPUCFGStructurizer::replaceInstrUseOfBlockWith(
644  MachineBasicBlock *SrcMBB, MachineBasicBlock *OldMBB,
645  MachineBasicBlock *NewBlk) {
646  MachineInstr *BranchMI = getLoopendBlockBranchInstr(SrcMBB);
647  if (BranchMI && isCondBranch(BranchMI) &&
648  getTrueBranch(BranchMI) == OldMBB)
649  setTrueBranch(BranchMI, NewBlk);
650 }
651 
652 void AMDGPUCFGStructurizer::wrapup(MachineBasicBlock *MBB) {
655  && "found a jump table");
656 
657  //collect continue right before endloop
662  while (It != E) {
663  if (Pre->getOpcode() == R600::CONTINUE
664  && It->getOpcode() == R600::ENDLOOP)
665  ContInstr.push_back(&*Pre);
666  Pre = It;
667  ++It;
668  }
669 
670  //delete continue right before endloop
671  for (unsigned i = 0; i < ContInstr.size(); ++i)
672  ContInstr[i]->eraseFromParent();
673 
674  // TODO to fix up jump table so later phase won't be confused. if
675  // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but
676  // there isn't such an interface yet. alternatively, replace all the other
677  // blocks in the jump table with the entryBlk //}
678 }
679 
680 bool AMDGPUCFGStructurizer::prepare() {
681  bool Changed = false;
682 
683  //FIXME: if not reducible flow graph, make it so ???
684 
685  LLVM_DEBUG(dbgs() << "AMDGPUCFGStructurizer::prepare\n";);
686 
687  orderBlocks(FuncRep);
688 
690 
691  // Add an ExitBlk to loop that don't have one
692  for (MachineLoop *LoopRep : *MLI) {
693  MBBVector ExitingMBBs;
694  LoopRep->getExitingBlocks(ExitingMBBs);
695 
696  if (ExitingMBBs.size() == 0) {
697  MachineBasicBlock* DummyExitBlk = normalizeInfiniteLoopExit(LoopRep);
698  if (DummyExitBlk)
699  RetBlks.push_back(DummyExitBlk);
700  }
701  }
702 
703  // Remove unconditional branch instr.
704  // Add dummy exit block iff there are multiple returns.
705  for (MachineBasicBlock *MBB : OrderedBlks) {
706  removeUnconditionalBranch(MBB);
707  removeRedundantConditionalBranch(MBB);
708  if (isReturnBlock(MBB)) {
709  RetBlks.push_back(MBB);
710  }
711  assert(MBB->succ_size() <= 2);
712  }
713 
714  if (RetBlks.size() >= 2) {
715  addDummyExitBlock(RetBlks);
716  Changed = true;
717  }
718 
719  return Changed;
720 }
721 
722 bool AMDGPUCFGStructurizer::run() {
723  //Assume reducible CFG...
724  LLVM_DEBUG(dbgs() << "AMDGPUCFGStructurizer::run\n");
725 
726 #ifdef STRESSTEST
727  //Use the worse block ordering to test the algorithm.
728  ReverseVector(orderedBlks);
729 #endif
730 
731  LLVM_DEBUG(dbgs() << "Ordered blocks:\n"; printOrderedBlocks(););
732  int NumIter = 0;
733  bool Finish = false;
735  bool MakeProgress = false;
736  int NumRemainedBlk = countActiveBlock(OrderedBlks.begin(),
737  OrderedBlks.end());
738 
739  do {
740  ++NumIter;
741  LLVM_DEBUG(dbgs() << "numIter = " << NumIter
742  << ", numRemaintedBlk = " << NumRemainedBlk << "\n";);
743 
745  OrderedBlks.begin();
747  OrderedBlks.end();
748 
750  It;
751  MachineBasicBlock *SccBeginMBB = nullptr;
752  int SccNumBlk = 0; // The number of active blocks, init to a
753  // maximum possible number.
754  int SccNumIter; // Number of iteration in this SCC.
755 
756  while (It != E) {
757  MBB = *It;
758 
759  if (!SccBeginMBB) {
760  SccBeginIter = It;
761  SccBeginMBB = MBB;
762  SccNumIter = 0;
763  SccNumBlk = NumRemainedBlk; // Init to maximum possible number.
764  LLVM_DEBUG(dbgs() << "start processing SCC" << getSCCNum(SccBeginMBB);
765  dbgs() << "\n";);
766  }
767 
768  if (!isRetiredBlock(MBB))
769  patternMatch(MBB);
770 
771  ++It;
772 
773  bool ContNextScc = true;
774  if (It == E
775  || getSCCNum(SccBeginMBB) != getSCCNum(*It)) {
776  // Just finish one scc.
777  ++SccNumIter;
778  int sccRemainedNumBlk = countActiveBlock(SccBeginIter, It);
779  if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= SccNumBlk) {
780  LLVM_DEBUG(dbgs() << "Can't reduce SCC " << getSCCNum(MBB)
781  << ", sccNumIter = " << SccNumIter;
782  dbgs() << "doesn't make any progress\n";);
783  ContNextScc = true;
784  } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < SccNumBlk) {
785  SccNumBlk = sccRemainedNumBlk;
786  It = SccBeginIter;
787  ContNextScc = false;
788  LLVM_DEBUG(dbgs() << "repeat processing SCC" << getSCCNum(MBB)
789  << "sccNumIter = " << SccNumIter << '\n';);
790  } else {
791  // Finish the current scc.
792  ContNextScc = true;
793  }
794  } else {
795  // Continue on next component in the current scc.
796  ContNextScc = false;
797  }
798 
799  if (ContNextScc)
800  SccBeginMBB = nullptr;
801  } //while, "one iteration" over the function.
802 
803  MachineBasicBlock *EntryMBB =
805  if (EntryMBB->succ_empty()) {
806  Finish = true;
807  LLVM_DEBUG(dbgs() << "Reduce to one block\n";);
808  } else {
809  int NewnumRemainedBlk
810  = countActiveBlock(OrderedBlks.begin(), OrderedBlks.end());
811  // consider cloned blocks ??
812  if (NewnumRemainedBlk == 1 || NewnumRemainedBlk < NumRemainedBlk) {
813  MakeProgress = true;
814  NumRemainedBlk = NewnumRemainedBlk;
815  } else {
816  MakeProgress = false;
817  LLVM_DEBUG(dbgs() << "No progress\n";);
818  }
819  }
820  } while (!Finish && MakeProgress);
821 
822  // Misc wrap up to maintain the consistency of the Function representation.
824 
825  // Detach retired Block, release memory.
826  for (auto &It : BlockInfoMap) {
827  if (It.second && It.second->IsRetired) {
828  assert((It.first)->getNumber() != -1);
829  LLVM_DEBUG(dbgs() << "Erase BB" << (It.first)->getNumber() << "\n";);
830  It.first->eraseFromParent(); // Remove from the parent Function.
831  }
832  delete It.second;
833  }
834  BlockInfoMap.clear();
835  LLInfoMap.clear();
836 
837  if (!Finish) {
838  LLVM_DEBUG(FuncRep->viewCFG());
839  report_fatal_error("IRREDUCIBLE_CFG");
840  }
841 
842  return true;
843 }
844 
845 void AMDGPUCFGStructurizer::orderBlocks(MachineFunction *MF) {
846  int SccNum = 0;
847  for (scc_iterator<MachineFunction *> It = scc_begin(MF); !It.isAtEnd();
848  ++It, ++SccNum) {
849  const std::vector<MachineBasicBlock *> &SccNext = *It;
850  for (MachineBasicBlock *MBB : SccNext) {
851  OrderedBlks.push_back(MBB);
852  recordSccnum(MBB, SccNum);
853  }
854  }
855 
856  // walk through all the block in func to check for unreachable
857  for (auto *MBB : nodes(MF)) {
858  SccNum = getSCCNum(MBB);
859  if (SccNum == INVALIDSCCNUM)
860  dbgs() << "unreachable block BB" << MBB->getNumber() << "\n";
861  }
862 }
863 
864 int AMDGPUCFGStructurizer::patternMatch(MachineBasicBlock *MBB) {
865  int NumMatch = 0;
866  int CurMatch;
867 
868  LLVM_DEBUG(dbgs() << "Begin patternMatch BB" << MBB->getNumber() << "\n";);
869 
870  while ((CurMatch = patternMatchGroup(MBB)) > 0)
871  NumMatch += CurMatch;
872 
873  LLVM_DEBUG(dbgs() << "End patternMatch BB" << MBB->getNumber()
874  << ", numMatch = " << NumMatch << "\n";);
875 
876  return NumMatch;
877 }
878 
879 int AMDGPUCFGStructurizer::patternMatchGroup(MachineBasicBlock *MBB) {
880  int NumMatch = 0;
881  NumMatch += loopendPatternMatch();
882  NumMatch += serialPatternMatch(MBB);
883  NumMatch += ifPatternMatch(MBB);
884  return NumMatch;
885 }
886 
887 int AMDGPUCFGStructurizer::serialPatternMatch(MachineBasicBlock *MBB) {
888  if (MBB->succ_size() != 1)
889  return 0;
890 
891  MachineBasicBlock *childBlk = *MBB->succ_begin();
892  if (childBlk->pred_size() != 1 || isActiveLoophead(childBlk))
893  return 0;
894 
895  mergeSerialBlock(MBB, childBlk);
896  ++numSerialPatternMatch;
897  return 1;
898 }
899 
900 int AMDGPUCFGStructurizer::ifPatternMatch(MachineBasicBlock *MBB) {
901  //two edges
902  if (MBB->succ_size() != 2)
903  return 0;
904  if (hasBackEdge(MBB))
905  return 0;
906  MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB);
907  if (!BranchMI)
908  return 0;
909 
910  assert(isCondBranch(BranchMI));
911  int NumMatch = 0;
912 
913  MachineBasicBlock *TrueMBB = getTrueBranch(BranchMI);
914  NumMatch += serialPatternMatch(TrueMBB);
915  NumMatch += ifPatternMatch(TrueMBB);
916  MachineBasicBlock *FalseMBB = getFalseBranch(MBB, BranchMI);
917  NumMatch += serialPatternMatch(FalseMBB);
918  NumMatch += ifPatternMatch(FalseMBB);
919  MachineBasicBlock *LandBlk;
920  int Cloned = 0;
921 
922  assert (!TrueMBB->succ_empty() || !FalseMBB->succ_empty());
923  // TODO: Simplify
924  if (TrueMBB->succ_size() == 1 && FalseMBB->succ_size() == 1
925  && *TrueMBB->succ_begin() == *FalseMBB->succ_begin()) {
926  // Diamond pattern
927  LandBlk = *TrueMBB->succ_begin();
928  } else if (TrueMBB->succ_size() == 1 && *TrueMBB->succ_begin() == FalseMBB) {
929  // Triangle pattern, false is empty
930  LandBlk = FalseMBB;
931  FalseMBB = nullptr;
932  } else if (FalseMBB->succ_size() == 1
933  && *FalseMBB->succ_begin() == TrueMBB) {
934  // Triangle pattern, true is empty
935  // We reverse the predicate to make a triangle, empty false pattern;
936  std::swap(TrueMBB, FalseMBB);
937  reversePredicateSetter(MBB->end(), *MBB);
938  LandBlk = FalseMBB;
939  FalseMBB = nullptr;
940  } else if (FalseMBB->succ_size() == 1
941  && isSameloopDetachedContbreak(TrueMBB, FalseMBB)) {
942  LandBlk = *FalseMBB->succ_begin();
943  } else if (TrueMBB->succ_size() == 1
944  && isSameloopDetachedContbreak(FalseMBB, TrueMBB)) {
945  LandBlk = *TrueMBB->succ_begin();
946  } else {
947  return NumMatch + handleJumpintoIf(MBB, TrueMBB, FalseMBB);
948  }
949 
950  // improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the
951  // new BB created for landBlk==NULL may introduce new challenge to the
952  // reduction process.
953  if (LandBlk &&
954  ((TrueMBB && TrueMBB->pred_size() > 1)
955  || (FalseMBB && FalseMBB->pred_size() > 1))) {
956  Cloned += improveSimpleJumpintoIf(MBB, TrueMBB, FalseMBB, &LandBlk);
957  }
958 
959  if (TrueMBB && TrueMBB->pred_size() > 1) {
960  TrueMBB = cloneBlockForPredecessor(TrueMBB, MBB);
961  ++Cloned;
962  }
963 
964  if (FalseMBB && FalseMBB->pred_size() > 1) {
965  FalseMBB = cloneBlockForPredecessor(FalseMBB, MBB);
966  ++Cloned;
967  }
968 
969  mergeIfthenelseBlock(BranchMI, MBB, TrueMBB, FalseMBB, LandBlk);
970 
971  ++numIfPatternMatch;
972 
973  numClonedBlock += Cloned;
974 
975  return 1 + Cloned + NumMatch;
976 }
977 
978 int AMDGPUCFGStructurizer::loopendPatternMatch() {
979  std::deque<MachineLoop *> NestedLoops;
980  for (auto &It: *MLI)
981  for (MachineLoop *ML : depth_first(It))
982  NestedLoops.push_front(ML);
983 
984  if (NestedLoops.empty())
985  return 0;
986 
987  // Process nested loop outside->inside (we did push_front),
988  // so "continue" to a outside loop won't be mistaken as "break"
989  // of the current loop.
990  int Num = 0;
991  for (MachineLoop *ExaminedLoop : NestedLoops) {
992  if (ExaminedLoop->getNumBlocks() == 0 || Visited[ExaminedLoop])
993  continue;
994  LLVM_DEBUG(dbgs() << "Processing:\n"; ExaminedLoop->dump(););
995  int NumBreak = mergeLoop(ExaminedLoop);
996  if (NumBreak == -1)
997  break;
998  Num += NumBreak;
999  }
1000  return Num;
1001 }
1002 
1003 int AMDGPUCFGStructurizer::mergeLoop(MachineLoop *LoopRep) {
1004  MachineBasicBlock *LoopHeader = LoopRep->getHeader();
1005  MBBVector ExitingMBBs;
1006  LoopRep->getExitingBlocks(ExitingMBBs);
1007  assert(!ExitingMBBs.empty() && "Infinite Loop not supported");
1008  LLVM_DEBUG(dbgs() << "Loop has " << ExitingMBBs.size()
1009  << " exiting blocks\n";);
1010  // We assume a single ExitBlk
1011  MBBVector ExitBlks;
1012  LoopRep->getExitBlocks(ExitBlks);
1014  for (unsigned i = 0, e = ExitBlks.size(); i < e; ++i)
1015  ExitBlkSet.insert(ExitBlks[i]);
1016  assert(ExitBlkSet.size() == 1);
1017  MachineBasicBlock *ExitBlk = *ExitBlks.begin();
1018  assert(ExitBlk && "Loop has several exit block");
1019  MBBVector LatchBlks;
1020  for (auto *LB : inverse_children<MachineBasicBlock*>(LoopHeader))
1021  if (LoopRep->contains(LB))
1022  LatchBlks.push_back(LB);
1023 
1024  for (unsigned i = 0, e = ExitingMBBs.size(); i < e; ++i)
1025  mergeLoopbreakBlock(ExitingMBBs[i], ExitBlk);
1026  for (unsigned i = 0, e = LatchBlks.size(); i < e; ++i)
1027  settleLoopcontBlock(LatchBlks[i], LoopHeader);
1028  int Match = 0;
1029  do {
1030  Match = 0;
1031  Match += serialPatternMatch(LoopHeader);
1032  Match += ifPatternMatch(LoopHeader);
1033  } while (Match > 0);
1034  mergeLooplandBlock(LoopHeader, ExitBlk);
1035  MachineLoop *ParentLoop = LoopRep->getParentLoop();
1036  if (ParentLoop)
1037  MLI->changeLoopFor(LoopHeader, ParentLoop);
1038  else
1039  MLI->removeBlock(LoopHeader);
1040  Visited[LoopRep] = true;
1041  return 1;
1042 }
1043 
1044 bool AMDGPUCFGStructurizer::isSameloopDetachedContbreak(
1045  MachineBasicBlock *Src1MBB, MachineBasicBlock *Src2MBB) {
1046  if (Src1MBB->succ_empty()) {
1047  MachineLoop *LoopRep = MLI->getLoopFor(Src1MBB);
1048  if (LoopRep&& LoopRep == MLI->getLoopFor(Src2MBB)) {
1049  MachineBasicBlock *&TheEntry = LLInfoMap[LoopRep];
1050  if (TheEntry) {
1051  LLVM_DEBUG(dbgs() << "isLoopContBreakBlock yes src1 = BB"
1052  << Src1MBB->getNumber() << " src2 = BB"
1053  << Src2MBB->getNumber() << "\n";);
1054  return true;
1055  }
1056  }
1057  }
1058  return false;
1059 }
1060 
1061 int AMDGPUCFGStructurizer::handleJumpintoIf(MachineBasicBlock *HeadMBB,
1062  MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) {
1063  int Num = handleJumpintoIfImp(HeadMBB, TrueMBB, FalseMBB);
1064  if (Num == 0) {
1065  LLVM_DEBUG(dbgs() << "handleJumpintoIf swap trueBlk and FalseBlk"
1066  << "\n";);
1067  Num = handleJumpintoIfImp(HeadMBB, FalseMBB, TrueMBB);
1068  }
1069  return Num;
1070 }
1071 
1072 int AMDGPUCFGStructurizer::handleJumpintoIfImp(MachineBasicBlock *HeadMBB,
1073  MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) {
1074  int Num = 0;
1075  MachineBasicBlock *DownBlk;
1076 
1077  //trueBlk could be the common post dominator
1078  DownBlk = TrueMBB;
1079 
1080  LLVM_DEBUG(dbgs() << "handleJumpintoIfImp head = BB" << HeadMBB->getNumber()
1081  << " true = BB" << TrueMBB->getNumber()
1082  << ", numSucc=" << TrueMBB->succ_size() << " false = BB"
1083  << FalseMBB->getNumber() << "\n";);
1084 
1085  while (DownBlk) {
1086  LLVM_DEBUG(dbgs() << "check down = BB" << DownBlk->getNumber(););
1087 
1088  if (singlePathTo(FalseMBB, DownBlk) == SinglePath_InPath) {
1089  LLVM_DEBUG(dbgs() << " working\n";);
1090 
1091  Num += cloneOnSideEntryTo(HeadMBB, TrueMBB, DownBlk);
1092  Num += cloneOnSideEntryTo(HeadMBB, FalseMBB, DownBlk);
1093 
1094  numClonedBlock += Num;
1095  Num += serialPatternMatch(*HeadMBB->succ_begin());
1096  Num += serialPatternMatch(*std::next(HeadMBB->succ_begin()));
1097  Num += ifPatternMatch(HeadMBB);
1098  assert(Num > 0);
1099 
1100  break;
1101  }
1102  LLVM_DEBUG(dbgs() << " not working\n";);
1103  DownBlk = (DownBlk->succ_size() == 1) ? (*DownBlk->succ_begin()) : nullptr;
1104  } // walk down the postDomTree
1105 
1106  return Num;
1107 }
1108 
1109 #ifndef NDEBUG
1110 void AMDGPUCFGStructurizer::showImproveSimpleJumpintoIf(
1111  MachineBasicBlock *HeadMBB, MachineBasicBlock *TrueMBB,
1112  MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB, bool Detail) {
1113  dbgs() << "head = BB" << HeadMBB->getNumber()
1114  << " size = " << HeadMBB->size();
1115  if (Detail) {
1116  dbgs() << "\n";
1117  HeadMBB->print(dbgs());
1118  dbgs() << "\n";
1119  }
1120 
1121  if (TrueMBB) {
1122  dbgs() << ", true = BB" << TrueMBB->getNumber() << " size = "
1123  << TrueMBB->size() << " numPred = " << TrueMBB->pred_size();
1124  if (Detail) {
1125  dbgs() << "\n";
1126  TrueMBB->print(dbgs());
1127  dbgs() << "\n";
1128  }
1129  }
1130  if (FalseMBB) {
1131  dbgs() << ", false = BB" << FalseMBB->getNumber() << " size = "
1132  << FalseMBB->size() << " numPred = " << FalseMBB->pred_size();
1133  if (Detail) {
1134  dbgs() << "\n";
1135  FalseMBB->print(dbgs());
1136  dbgs() << "\n";
1137  }
1138  }
1139  if (LandMBB) {
1140  dbgs() << ", land = BB" << LandMBB->getNumber() << " size = "
1141  << LandMBB->size() << " numPred = " << LandMBB->pred_size();
1142  if (Detail) {
1143  dbgs() << "\n";
1144  LandMBB->print(dbgs());
1145  dbgs() << "\n";
1146  }
1147  }
1148 
1149  dbgs() << "\n";
1150 }
1151 #endif
1152 
1153 int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
1154  MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB,
1155  MachineBasicBlock **LandMBBPtr) {
1156  bool MigrateTrue = false;
1157  bool MigrateFalse = false;
1158 
1159  MachineBasicBlock *LandBlk = *LandMBBPtr;
1160 
1161  assert((!TrueMBB || TrueMBB->succ_size() <= 1)
1162  && (!FalseMBB || FalseMBB->succ_size() <= 1));
1163 
1164  if (TrueMBB == FalseMBB)
1165  return 0;
1166 
1167  MigrateTrue = needMigrateBlock(TrueMBB);
1168  MigrateFalse = needMigrateBlock(FalseMBB);
1169 
1170  if (!MigrateTrue && !MigrateFalse)
1171  return 0;
1172 
1173  // If we need to migrate either trueBlk and falseBlk, migrate the rest that
1174  // have more than one predecessors. without doing this, its predecessor
1175  // rather than headBlk will have undefined value in initReg.
1176  if (!MigrateTrue && TrueMBB && TrueMBB->pred_size() > 1)
1177  MigrateTrue = true;
1178  if (!MigrateFalse && FalseMBB && FalseMBB->pred_size() > 1)
1179  MigrateFalse = true;
1180 
1181  LLVM_DEBUG(
1182  dbgs() << "before improveSimpleJumpintoIf: ";
1183  showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0););
1184 
1185  // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk
1186  //
1187  // new: headBlk => if () {initReg = 1; org trueBlk branch} else
1188  // {initReg = 0; org falseBlk branch }
1189  // => landBlk => if (initReg) {org trueBlk} else {org falseBlk}
1190  // => org landBlk
1191  // if landBlk->pred_size() > 2, put the about if-else inside
1192  // if (initReg !=2) {...}
1193  //
1194  // add initReg = initVal to headBlk
1195 
1196  const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
1197  if (!MigrateTrue || !MigrateFalse) {
1198  // XXX: We have an opportunity here to optimize the "branch into if" case
1199  // here. Branch into if looks like this:
1200  // entry
1201  // / |
1202  // diamond_head branch_from
1203  // / \ |
1204  // diamond_false diamond_true
1205  // \ /
1206  // done
1207  //
1208  // The diamond_head block begins the "if" and the diamond_true block
1209  // is the block being "branched into".
1210  //
1211  // If MigrateTrue is true, then TrueBB is the block being "branched into"
1212  // and if MigrateFalse is true, then FalseBB is the block being
1213  // "branched into"
1214  //
1215  // Here is the pseudo code for how I think the optimization should work:
1216  // 1. Insert MOV GPR0, 0 before the branch instruction in diamond_head.
1217  // 2. Insert MOV GPR0, 1 before the branch instruction in branch_from.
1218  // 3. Move the branch instruction from diamond_head into its own basic
1219  // block (new_block).
1220  // 4. Add an unconditional branch from diamond_head to new_block
1221  // 5. Replace the branch instruction in branch_from with an unconditional
1222  // branch to new_block. If branch_from has multiple predecessors, then
1223  // we need to replace the True/False block in the branch
1224  // instruction instead of replacing it.
1225  // 6. Change the condition of the branch instruction in new_block from
1226  // COND to (COND || GPR0)
1227  //
1228  // In order insert these MOV instruction, we will need to use the
1229  // RegisterScavenger. Usually liveness stops being tracked during
1230  // the late machine optimization passes, however if we implement
1231  // bool TargetRegisterInfo::requiresRegisterScavenging(
1232  // const MachineFunction &MF)
1233  // and have it return true, liveness will be tracked correctly
1234  // by generic optimization passes. We will also need to make sure that
1235  // all of our target-specific passes that run after regalloc and before
1236  // the CFGStructurizer track liveness and we will need to modify this pass
1237  // to correctly track liveness.
1238  //
1239  // After the above changes, the new CFG should look like this:
1240  // entry
1241  // / |
1242  // diamond_head branch_from
1243  // \ /
1244  // new_block
1245  // / |
1246  // diamond_false diamond_true
1247  // \ /
1248  // done
1249  //
1250  // Without this optimization, we are forced to duplicate the diamond_true
1251  // block and we will end up with a CFG like this:
1252  //
1253  // entry
1254  // / |
1255  // diamond_head branch_from
1256  // / \ |
1257  // diamond_false diamond_true diamond_true (duplicate)
1258  // \ / |
1259  // done --------------------|
1260  //
1261  // Duplicating diamond_true can be very costly especially if it has a
1262  // lot of instructions.
1263  return 0;
1264  }
1265 
1266  int NumNewBlk = 0;
1267 
1268  bool LandBlkHasOtherPred = (LandBlk->pred_size() > 2);
1269 
1270  //insert R600::ENDIF to avoid special case "input landBlk == NULL"
1271  MachineBasicBlock::iterator I = insertInstrBefore(LandBlk, R600::ENDIF);
1272 
1273  if (LandBlkHasOtherPred) {
1274  report_fatal_error("Extra register needed to handle CFG");
1275  Register CmpResReg =
1276  HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC);
1277  report_fatal_error("Extra compare instruction needed to handle CFG");
1278  insertCondBranchBefore(LandBlk, I, R600::IF_PREDICATE_SET,
1279  CmpResReg, DebugLoc());
1280  }
1281 
1282  // XXX: We are running this after RA, so creating virtual registers will
1283  // cause an assertion failure in the PostRA scheduling pass.
1284  Register InitReg =
1285  HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC);
1286  insertCondBranchBefore(LandBlk, I, R600::IF_PREDICATE_SET, InitReg,
1287  DebugLoc());
1288 
1289  if (MigrateTrue) {
1290  migrateInstruction(TrueMBB, LandBlk, I);
1291  // need to uncondionally insert the assignment to ensure a path from its
1292  // predecessor rather than headBlk has valid value in initReg if
1293  // (initVal != 1).
1294  report_fatal_error("Extra register needed to handle CFG");
1295  }
1296  insertInstrBefore(I, R600::ELSE);
1297 
1298  if (MigrateFalse) {
1299  migrateInstruction(FalseMBB, LandBlk, I);
1300  // need to uncondionally insert the assignment to ensure a path from its
1301  // predecessor rather than headBlk has valid value in initReg if
1302  // (initVal != 0)
1303  report_fatal_error("Extra register needed to handle CFG");
1304  }
1305 
1306  if (LandBlkHasOtherPred) {
1307  // add endif
1308  insertInstrBefore(I, R600::ENDIF);
1309 
1310  // put initReg = 2 to other predecessors of landBlk
1311  for (MachineBasicBlock *MBB : LandBlk->predecessors())
1312  if (MBB != TrueMBB && MBB != FalseMBB)
1313  report_fatal_error("Extra register needed to handle CFG");
1314  }
1315  LLVM_DEBUG(
1316  dbgs() << "result from improveSimpleJumpintoIf: ";
1317  showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0););
1318 
1319  // update landBlk
1320  *LandMBBPtr = LandBlk;
1321 
1322  return NumNewBlk;
1323 }
1324 
1325 void AMDGPUCFGStructurizer::mergeSerialBlock(MachineBasicBlock *DstMBB,
1326  MachineBasicBlock *SrcMBB) {
1327  LLVM_DEBUG(dbgs() << "serialPattern BB" << DstMBB->getNumber() << " <= BB"
1328  << SrcMBB->getNumber() << "\n";);
1329  DstMBB->splice(DstMBB->end(), SrcMBB, SrcMBB->begin(), SrcMBB->end());
1330 
1331  DstMBB->removeSuccessor(SrcMBB, true);
1332  cloneSuccessorList(DstMBB, SrcMBB);
1333 
1334  removeSuccessor(SrcMBB);
1335  MLI->removeBlock(SrcMBB);
1336  retireBlock(SrcMBB);
1337 }
1338 
1339 void AMDGPUCFGStructurizer::mergeIfthenelseBlock(MachineInstr *BranchMI,
1341  MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB) {
1342  assert (TrueMBB);
1343  LLVM_DEBUG(dbgs() << "ifPattern BB" << MBB->getNumber(); dbgs() << "{ ";
1344  if (TrueMBB) { dbgs() << "BB" << TrueMBB->getNumber(); } dbgs()
1345  << " } else ";
1346  dbgs() << "{ "; if (FalseMBB) {
1347  dbgs() << "BB" << FalseMBB->getNumber();
1348  } dbgs() << " }\n ";
1349  dbgs() << "landBlock: "; if (!LandMBB) { dbgs() << "NULL"; } else {
1350  dbgs() << "BB" << LandMBB->getNumber();
1351  } dbgs() << "\n";);
1352 
1353  int OldOpcode = BranchMI->getOpcode();
1354  DebugLoc BranchDL = BranchMI->getDebugLoc();
1355 
1356 // transform to
1357 // if cond
1358 // trueBlk
1359 // else
1360 // falseBlk
1361 // endif
1362 // landBlk
1363 
1364  MachineBasicBlock::iterator I = BranchMI;
1365  insertCondBranchBefore(I, getBranchNzeroOpcode(OldOpcode),
1366  BranchDL);
1367 
1368  if (TrueMBB) {
1369  MBB->splice(I, TrueMBB, TrueMBB->begin(), TrueMBB->end());
1370  MBB->removeSuccessor(TrueMBB, true);
1371  if (LandMBB && TrueMBB->succ_size()!=0)
1372  TrueMBB->removeSuccessor(LandMBB, true);
1373  retireBlock(TrueMBB);
1374  MLI->removeBlock(TrueMBB);
1375  }
1376 
1377  if (FalseMBB) {
1378  insertInstrBefore(I, R600::ELSE);
1379  MBB->splice(I, FalseMBB, FalseMBB->begin(),
1380  FalseMBB->end());
1381  MBB->removeSuccessor(FalseMBB, true);
1382  if (LandMBB && !FalseMBB->succ_empty())
1383  FalseMBB->removeSuccessor(LandMBB, true);
1384  retireBlock(FalseMBB);
1385  MLI->removeBlock(FalseMBB);
1386  }
1387  insertInstrBefore(I, R600::ENDIF);
1388 
1389  BranchMI->eraseFromParent();
1390 
1391  if (LandMBB && TrueMBB && FalseMBB)
1392  MBB->addSuccessor(LandMBB);
1393 }
1394 
1395 void AMDGPUCFGStructurizer::mergeLooplandBlock(MachineBasicBlock *DstBlk,
1396  MachineBasicBlock *LandMBB) {
1397  LLVM_DEBUG(dbgs() << "loopPattern header = BB" << DstBlk->getNumber()
1398  << " land = BB" << LandMBB->getNumber() << "\n";);
1399 
1400  insertInstrBefore(DstBlk, R600::WHILELOOP, DebugLoc());
1401  insertInstrEnd(DstBlk, R600::ENDLOOP, DebugLoc());
1402  DstBlk->replaceSuccessor(DstBlk, LandMBB);
1403 }
1404 
1405 void AMDGPUCFGStructurizer::mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB,
1406  MachineBasicBlock *LandMBB) {
1407  LLVM_DEBUG(dbgs() << "loopbreakPattern exiting = BB"
1408  << ExitingMBB->getNumber() << " land = BB"
1409  << LandMBB->getNumber() << "\n";);
1410  MachineInstr *BranchMI = getLoopendBlockBranchInstr(ExitingMBB);
1411  assert(BranchMI && isCondBranch(BranchMI));
1412  DebugLoc DL = BranchMI->getDebugLoc();
1413  MachineBasicBlock *TrueBranch = getTrueBranch(BranchMI);
1414  MachineBasicBlock::iterator I = BranchMI;
1415  if (TrueBranch != LandMBB)
1416  reversePredicateSetter(I, *I->getParent());
1417  insertCondBranchBefore(ExitingMBB, I, R600::IF_PREDICATE_SET, R600::PREDICATE_BIT, DL);
1418  insertInstrBefore(I, R600::BREAK);
1419  insertInstrBefore(I, R600::ENDIF);
1420  //now branchInst can be erase safely
1421  BranchMI->eraseFromParent();
1422  //now take care of successors, retire blocks
1423  ExitingMBB->removeSuccessor(LandMBB, true);
1424 }
1425 
1426 void AMDGPUCFGStructurizer::settleLoopcontBlock(MachineBasicBlock *ContingMBB,
1427  MachineBasicBlock *ContMBB) {
1428  LLVM_DEBUG(dbgs() << "settleLoopcontBlock conting = BB"
1429  << ContingMBB->getNumber() << ", cont = BB"
1430  << ContMBB->getNumber() << "\n";);
1431 
1432  MachineInstr *MI = getLoopendBlockBranchInstr(ContingMBB);
1433  if (MI) {
1434  assert(isCondBranch(MI));
1436  MachineBasicBlock *TrueBranch = getTrueBranch(MI);
1437  int OldOpcode = MI->getOpcode();
1438  DebugLoc DL = MI->getDebugLoc();
1439 
1440  bool UseContinueLogical = ((&*ContingMBB->rbegin()) == MI);
1441 
1442  if (!UseContinueLogical) {
1443  int BranchOpcode =
1444  TrueBranch == ContMBB ? getBranchNzeroOpcode(OldOpcode) :
1445  getBranchZeroOpcode(OldOpcode);
1446  insertCondBranchBefore(I, BranchOpcode, DL);
1447  // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
1448  insertInstrEnd(ContingMBB, R600::CONTINUE, DL);
1449  insertInstrEnd(ContingMBB, R600::ENDIF, DL);
1450  } else {
1451  int BranchOpcode =
1452  TrueBranch == ContMBB ? getContinueNzeroOpcode(OldOpcode) :
1453  getContinueZeroOpcode(OldOpcode);
1454  insertCondBranchBefore(I, BranchOpcode, DL);
1455  }
1456 
1457  MI->eraseFromParent();
1458  } else {
1459  // if we've arrived here then we've already erased the branch instruction
1460  // travel back up the basic block to see the last reference of our debug
1461  // location we've just inserted that reference here so it should be
1462  // representative insertEnd to ensure phi-moves, if exist, go before the
1463  // continue-instr.
1464  insertInstrEnd(ContingMBB, R600::CONTINUE,
1465  getLastDebugLocInBB(ContingMBB));
1466  }
1467 }
1468 
1469 int AMDGPUCFGStructurizer::cloneOnSideEntryTo(MachineBasicBlock *PreMBB,
1470  MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB) {
1471  int Cloned = 0;
1472  assert(PreMBB->isSuccessor(SrcMBB));
1473  while (SrcMBB && SrcMBB != DstMBB) {
1474  assert(SrcMBB->succ_size() == 1);
1475  if (SrcMBB->pred_size() > 1) {
1476  SrcMBB = cloneBlockForPredecessor(SrcMBB, PreMBB);
1477  ++Cloned;
1478  }
1479 
1480  PreMBB = SrcMBB;
1481  SrcMBB = *SrcMBB->succ_begin();
1482  }
1483 
1484  return Cloned;
1485 }
1486 
1488 AMDGPUCFGStructurizer::cloneBlockForPredecessor(MachineBasicBlock *MBB,
1489  MachineBasicBlock *PredMBB) {
1490  assert(PredMBB->isSuccessor(MBB) &&
1491  "succBlk is not a prececessor of curBlk");
1492 
1493  MachineBasicBlock *CloneMBB = clone(MBB); //clone instructions
1494  replaceInstrUseOfBlockWith(PredMBB, MBB, CloneMBB);
1495  //srcBlk, oldBlk, newBlk
1496 
1497  PredMBB->replaceSuccessor(MBB, CloneMBB);
1498 
1499  // add all successor to cloneBlk
1500  cloneSuccessorList(CloneMBB, MBB);
1501 
1502  numClonedInstr += MBB->size();
1503 
1504  LLVM_DEBUG(dbgs() << "Cloned block: "
1505  << "BB" << MBB->getNumber() << "size " << MBB->size()
1506  << "\n";);
1507 
1508  SHOWNEWBLK(CloneMBB, "result of Cloned block: ");
1509 
1510  return CloneMBB;
1511 }
1512 
1513 void AMDGPUCFGStructurizer::migrateInstruction(MachineBasicBlock *SrcMBB,
1515  MachineBasicBlock::iterator SpliceEnd;
1516  //look for the input branchinstr, not the AMDGPU branchinstr
1517  MachineInstr *BranchMI = getNormalBlockBranchInstr(SrcMBB);
1518  if (!BranchMI) {
1519  LLVM_DEBUG(dbgs() << "migrateInstruction don't see branch instr\n";);
1520  SpliceEnd = SrcMBB->end();
1521  } else {
1522  LLVM_DEBUG(dbgs() << "migrateInstruction see branch instr: " << *BranchMI);
1523  SpliceEnd = BranchMI;
1524  }
1525  LLVM_DEBUG(dbgs() << "migrateInstruction before splice dstSize = "
1526  << DstMBB->size() << "srcSize = " << SrcMBB->size()
1527  << "\n";);
1528 
1529  //splice insert before insertPos
1530  DstMBB->splice(I, SrcMBB, SrcMBB->begin(), SpliceEnd);
1531 
1532  LLVM_DEBUG(dbgs() << "migrateInstruction after splice dstSize = "
1533  << DstMBB->size() << "srcSize = " << SrcMBB->size()
1534  << '\n';);
1535 }
1536 
1538 AMDGPUCFGStructurizer::normalizeInfiniteLoopExit(MachineLoop* LoopRep) {
1539  MachineBasicBlock *LoopHeader = LoopRep->getHeader();
1540  MachineBasicBlock *LoopLatch = LoopRep->getLoopLatch();
1541 
1542  if (!LoopHeader || !LoopLatch)
1543  return nullptr;
1544  MachineInstr *BranchMI = getLoopendBlockBranchInstr(LoopLatch);
1545  // Is LoopRep an infinite loop ?
1546  if (!BranchMI || !isUncondBranch(BranchMI))
1547  return nullptr;
1548 
1549  MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock();
1550  FuncRep->push_back(DummyExitBlk); //insert to function
1551  SHOWNEWBLK(DummyExitBlk, "DummyExitBlock to normalize infiniteLoop: ");
1552  LLVM_DEBUG(dbgs() << "Old branch instr: " << *BranchMI << "\n";);
1553  LLVMContext &Ctx = LoopHeader->getParent()->getFunction().getContext();
1554  Ctx.emitError("Extra register needed to handle CFG");
1555  return nullptr;
1556 }
1557 
1558 void AMDGPUCFGStructurizer::removeUnconditionalBranch(MachineBasicBlock *MBB) {
1559  MachineInstr *BranchMI;
1560 
1561  // I saw two unconditional branch in one basic block in example
1562  // test_fc_do_while_or.c need to fix the upstream on this to remove the loop.
1563  while ((BranchMI = getLoopendBlockBranchInstr(MBB))
1564  && isUncondBranch(BranchMI)) {
1565  LLVM_DEBUG(dbgs() << "Removing uncond branch instr: " << *BranchMI);
1566  BranchMI->eraseFromParent();
1567  }
1568 }
1569 
1570 void AMDGPUCFGStructurizer::removeRedundantConditionalBranch(
1572  if (MBB->succ_size() != 2)
1573  return;
1574  MachineBasicBlock *MBB1 = *MBB->succ_begin();
1575  MachineBasicBlock *MBB2 = *std::next(MBB->succ_begin());
1576  if (MBB1 != MBB2)
1577  return;
1578 
1579  MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB);
1580  assert(BranchMI && isCondBranch(BranchMI));
1581  LLVM_DEBUG(dbgs() << "Removing unneeded cond branch instr: " << *BranchMI);
1582  BranchMI->eraseFromParent();
1583  SHOWNEWBLK(MBB1, "Removing redundant successor");
1584  MBB->removeSuccessor(MBB1, true);
1585 }
1586 
1587 void AMDGPUCFGStructurizer::addDummyExitBlock(
1589  MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock();
1590  FuncRep->push_back(DummyExitBlk); //insert to function
1591  insertInstrEnd(DummyExitBlk, R600::RETURN);
1592 
1593  for (MachineBasicBlock *MBB : RetMBB) {
1594  if (MachineInstr *MI = getReturnInstr(MBB))
1595  MI->eraseFromParent();
1596  MBB->addSuccessor(DummyExitBlk);
1597  LLVM_DEBUG(dbgs() << "Add dummyExitBlock to BB" << MBB->getNumber()
1598  << " successors\n";);
1599  }
1600  SHOWNEWBLK(DummyExitBlk, "DummyExitBlock: ");
1601 }
1602 
1603 void AMDGPUCFGStructurizer::removeSuccessor(MachineBasicBlock *MBB) {
1604  while (MBB->succ_size())
1606 }
1607 
1608 void AMDGPUCFGStructurizer::recordSccnum(MachineBasicBlock *MBB,
1609  int SccNum) {
1610  BlockInformation *&srcBlkInfo = BlockInfoMap[MBB];
1611  if (!srcBlkInfo)
1612  srcBlkInfo = new BlockInformation();
1613  srcBlkInfo->SccNum = SccNum;
1614 }
1615 
1616 void AMDGPUCFGStructurizer::retireBlock(MachineBasicBlock *MBB) {
1617  LLVM_DEBUG(dbgs() << "Retiring BB" << MBB->getNumber() << "\n";);
1618 
1619  BlockInformation *&SrcBlkInfo = BlockInfoMap[MBB];
1620 
1621  if (!SrcBlkInfo)
1622  SrcBlkInfo = new BlockInformation();
1623 
1624  SrcBlkInfo->IsRetired = true;
1625  assert(MBB->succ_empty() && MBB->pred_empty() && "can't retire block yet");
1626 }
1627 
1628 INITIALIZE_PASS_BEGIN(AMDGPUCFGStructurizer, "amdgpustructurizer",
1629  "AMDGPU CFG Structurizer", false, false)
1633 INITIALIZE_PASS_END(AMDGPUCFGStructurizer, "amdgpustructurizer",
1635 
1637  return new AMDGPUCFGStructurizer();
1638 }
i
i
Definition: README.txt:29
SHOWNEWINSTR
#define SHOWNEWINSTR(i)
Definition: AMDILCFGStructurizer.cpp:57
llvm::MachineBasicBlock::succ_size
unsigned succ_size() const
Definition: MachineBasicBlock.h:348
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:105
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AllocatorList.h:23
llvm::LLVMContext::emitError
void emitError(uint64_t LocCookie, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
Definition: LLVMContext.cpp:251
llvm::LoopBase::getExitBlocks
void getExitBlocks(SmallVectorImpl< BlockT * > &ExitBlocks) const
Return all of the successor blocks of this loop.
Definition: LoopInfoImpl.h:62
llvm::MachineLoopInfo::getLoopFor
MachineLoop * getLoopFor(const MachineBasicBlock *BB) const
Return the innermost loop that BB lives in.
Definition: MachineLoopInfo.h:127
llvm::MachineRegisterInfo::createVirtualRegister
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Definition: MachineRegisterInfo.cpp:158
SCCIterator.h
llvm::LoopBase::contains
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:122
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1175
Statistic.h
llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30
llvm::Function::getContext
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:321
llvm::MachineFunction::CreateMachineInstr
MachineInstr * CreateMachineInstr(const MCInstrDesc &MCID, DebugLoc DL, bool NoImplicit=false)
CreateMachineInstr - Allocate a new MachineInstr.
Definition: MachineFunction.cpp:354
MachineJumpTableInfo.h
llvm::AMDGPUISD::ELSE
@ ELSE
Definition: AMDGPUISelLowering.h:358
llvm::NVPTXISD::RETURN
@ RETURN
Definition: NVPTXISelLowering.h:49
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:449
llvm::DiagnosticPredicateTy::Match
@ Match
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1564
llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:102
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::MachineLoopInfo
Definition: MachineLoopInfo.h:90
llvm::R600RegisterInfo
Definition: R600RegisterInfo.h:22
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::MachineBasicBlock::addSuccessor
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
Definition: MachineBasicBlock.cpp:750
llvm::MachineBasicBlock::push_back
void push_back(MachineInstr *MI)
Definition: MachineBasicBlock.h:860
R600.h
llvm::MachineBasicBlock::pred_size
unsigned pred_size() const
Definition: MachineBasicBlock.h:332
llvm::LoopBase::getParentLoop
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
Definition: LoopInfo.h:113
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:651
MachineLoopInfo.h
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:499
llvm::MachineJumpTableInfo::isEmpty
bool isEmpty() const
isEmpty - Return true if there are no jump tables.
Definition: MachineJumpTableInfo.h:97
llvm::MachineBasicBlock::isSuccessor
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
Definition: MachineBasicBlock.cpp:912
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:46
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::MachineFunction::getProperties
const MachineFunctionProperties & getProperties() const
Get the function properties.
Definition: MachineFunction.h:732
false
Definition: StackSlotColoring.cpp:142
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:127
llvm::MachineFunctionProperties::set
MachineFunctionProperties & set(Property P)
Definition: MachineFunction.h:180
AMDGPU
Definition: AMDGPUReplaceLDSUseWithPointer.cpp:114
llvm::MachineBasicBlock::rend
reverse_iterator rend()
Definition: MachineBasicBlock.h:282
llvm::PassRegistry
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:38
llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:143
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::LoopBase::getExitingBlocks
void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
Definition: LoopInfoImpl.h:34
SHOWNEWBLK
#define SHOWNEWBLK(b, msg)
Definition: AMDILCFGStructurizer.cpp:59
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(AMDGPUCFGStructurizer, "amdgpustructurizer", "AMDGPU CFG Structurizer", false, false) INITIALIZE_PASS_END(AMDGPUCFGStructurizer
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:95
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::scc_begin
scc_iterator< T > scc_begin(const T &G)
Construct the begin iterator for a deduced graph type T.
Definition: SCCIterator.h:232
llvm::MachineFunctionProperties::Property::FailsVerification
@ FailsVerification
llvm::MachineBasicBlock::succ_iterator
std::vector< MachineBasicBlock * >::iterator succ_iterator
Definition: MachineBasicBlock.h:310
R600MCTargetDesc.h
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:641
blk
static uint32_t blk(uint32_t *Buf, int I)
Definition: SHA1.cpp:36
llvm::MachineInstr::getDebugLoc
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:418
llvm::R600Subtarget
Definition: R600Subtarget.h:29
llvm::MachineLoop
Definition: MachineLoopInfo.h:45
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:64
llvm::MachineInstrBuilder
Definition: MachineInstrBuilder.h:69
llvm::scc_iterator
Enumerate the SCCs of a directed graph in reverse topological order of the SCC DAG.
Definition: SCCIterator.h:46
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
llvm::nodes
iterator_range< typename GraphTraits< GraphType >::nodes_iterator > nodes(const GraphType &G)
Definition: GraphTraits.h:108
llvm::MachineFunction::dump
void dump() const
dump - Print the current MachineFunction to cerr, useful for debugger use.
Definition: MachineFunction.cpp:541
I
#define I(x, y, z)
Definition: MD5.cpp:58
MachineFunctionPass.h
llvm::LoopBase::getLoopLatch
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:216
llvm::SmallVectorImpl::const_iterator
typename SuperClass::const_iterator const_iterator
Definition: SmallVector.h:557
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::MachineBasicBlock::size
unsigned size() const
Definition: MachineBasicBlock.h:243
llvm::MachineBasicBlock::succ_begin
succ_iterator succ_begin()
Definition: MachineBasicBlock.h:336
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:852
llvm::MachineBasicBlock::getParent
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Definition: MachineBasicBlock.h:229
llvm::initializeAMDGPUCFGStructurizerPass
void initializeAMDGPUCFGStructurizerPass(PassRegistry &)
llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition: MachineInstrBuilder.h:97
MachinePostDominators.h
llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition: MachineOperand.h:360
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
llvm::MachineBasicBlock::predecessors
iterator_range< pred_iterator > predecessors()
Definition: MachineBasicBlock.h:353
amdgpustructurizer
amdgpustructurizer
Definition: AMDILCFGStructurizer.cpp:1633
llvm::MachineBasicBlock::pred_empty
bool pred_empty() const
Definition: MachineBasicBlock.h:335
R600RegisterInfo.h
llvm::MachineFunction
Definition: MachineFunction.h:241
llvm::MachineBasicBlock::succ_empty
bool succ_empty() const
Definition: MachineBasicBlock.h:351
llvm::LoopInfo
Definition: LoopInfo.h:1086
llvm::MachineBasicBlock::getNumber
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
Definition: MachineBasicBlock.h:1073
llvm::MachineBasicBlock::successors
iterator_range< succ_iterator > successors()
Definition: MachineBasicBlock.h:359
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
llvm::MachineBasicBlock::rbegin
reverse_iterator rbegin()
Definition: MachineBasicBlock.h:276
llvm::MachineBasicBlock::splice
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
Definition: MachineBasicBlock.h:967
llvm::MachineInstr::getOpcode
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:489
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:134
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::MachineInstr::getParent
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:286
llvm::ifs::IFSSymbolType::Func
@ Func
llvm::MachinePostDominatorTree
MachinePostDominatorTree - an analysis pass wrapper for DominatorTree used to compute the post-domina...
Definition: MachinePostDominators.h:27
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::SmallPtrSetImplBase::size
size_type size() const
Definition: SmallPtrSet.h:92
llvm::depth_first
iterator_range< df_iterator< T > > depth_first(const T &G)
Definition: DepthFirstIterator.h:229
CFG
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to CFG
Definition: README.txt:39
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
llvm::MachineBasicBlock::replaceSuccessor
void replaceSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New)
Replace successor OLD with NEW and update probability info.
Definition: MachineBasicBlock.cpp:811
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:607
llvm::MachineBasicBlock::removeSuccessor
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
Definition: MachineBasicBlock.cpp:788
llvm::MachineBasicBlock::insert
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
Definition: MachineBasicBlock.cpp:1311
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:104
R600Subtarget.h
llvm::MVT::i32
@ i32
Definition: MachineValueType.h:46
INVALIDSCCNUM
#define INVALIDSCCNUM
Definition: AMDILCFGStructurizer.cpp:70
llvm::R600InstrInfo
Definition: R600InstrInfo.h:38
llvm::MachineBasicBlock::begin
iterator begin()
Definition: MachineBasicBlock.h:272
llvm::MachineBasicBlock::empty
bool empty() const
Definition: MachineBasicBlock.h:244
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:43
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
llvm::MachineFunction::getJumpTableInfo
const MachineJumpTableInfo * getJumpTableInfo() const
getJumpTableInfo - Return the jump table info object for the current function.
Definition: MachineFunction.h:664
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::GraphTraits
Definition: GraphTraits.h:35
llvm::MachineBasicBlock::print
void print(raw_ostream &OS, const SlotIndexes *=nullptr, bool IsStandalone=true) const
Definition: MachineBasicBlock.cpp:332
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
llvm::MachineDominatorTree
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
Definition: MachineDominators.h:46
llvm::createAMDGPUCFGStructurizerPass
FunctionPass * createAMDGPUCFGStructurizerPass()
Definition: AMDILCFGStructurizer.cpp:1636
MachineFunction.h
llvm::MachineInstr::eraseFromParent
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
Definition: MachineInstr.cpp:680
llvm::MachineInstrBundleIterator< MachineInstr >
InitializePasses.h
llvm::MachineBasicBlock::end
iterator end()
Definition: MachineBasicBlock.h:274
Structurizer
AMDGPU CFG Structurizer
Definition: AMDILCFGStructurizer.cpp:1634
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38