Line data Source code
1 : //===- AMDILCFGStructurizer.cpp - CFG Structurizer ------------------------===//
2 : //
3 : // The LLVM Compiler Infrastructure
4 : //
5 : // This file is distributed under the University of Illinois Open Source
6 : // License. See LICENSE.TXT for details.
7 : //
8 : //==-----------------------------------------------------------------------===//
9 :
10 : #include "AMDGPU.h"
11 : #include "AMDGPUSubtarget.h"
12 : #include "R600InstrInfo.h"
13 : #include "R600RegisterInfo.h"
14 : #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
15 : #include "llvm/ADT/DepthFirstIterator.h"
16 : #include "llvm/ADT/SCCIterator.h"
17 : #include "llvm/ADT/SmallPtrSet.h"
18 : #include "llvm/ADT/SmallVector.h"
19 : #include "llvm/ADT/Statistic.h"
20 : #include "llvm/ADT/StringRef.h"
21 : #include "llvm/CodeGen/MachineBasicBlock.h"
22 : #include "llvm/CodeGen/MachineDominators.h"
23 : #include "llvm/CodeGen/MachineFunction.h"
24 : #include "llvm/CodeGen/MachineFunctionPass.h"
25 : #include "llvm/CodeGen/MachineInstr.h"
26 : #include "llvm/CodeGen/MachineInstrBuilder.h"
27 : #include "llvm/CodeGen/MachineJumpTableInfo.h"
28 : #include "llvm/CodeGen/MachineLoopInfo.h"
29 : #include "llvm/CodeGen/MachineOperand.h"
30 : #include "llvm/CodeGen/MachinePostDominators.h"
31 : #include "llvm/CodeGen/MachineRegisterInfo.h"
32 : #include "llvm/IR/DebugLoc.h"
33 : #include "llvm/IR/LLVMContext.h"
34 : #include "llvm/Pass.h"
35 : #include "llvm/Support/Debug.h"
36 : #include "llvm/Support/ErrorHandling.h"
37 : #include "llvm/Support/MachineValueType.h"
38 : #include "llvm/Support/raw_ostream.h"
39 : #include <cassert>
40 : #include <cstddef>
41 : #include <deque>
42 : #include <iterator>
43 : #include <map>
44 : #include <utility>
45 : #include <vector>
46 :
47 : using namespace llvm;
48 :
49 : #define DEBUG_TYPE "structcfg"
50 :
51 : #define DEFAULT_VEC_SLOTS 8
52 :
53 : // TODO: move-begin.
54 :
55 : //===----------------------------------------------------------------------===//
56 : //
57 : // Statistics for CFGStructurizer.
58 : //
59 : //===----------------------------------------------------------------------===//
60 :
61 : STATISTIC(numSerialPatternMatch, "CFGStructurizer number of serial pattern "
62 : "matched");
63 : STATISTIC(numIfPatternMatch, "CFGStructurizer number of if pattern "
64 : "matched");
65 : STATISTIC(numClonedBlock, "CFGStructurizer cloned blocks");
66 : STATISTIC(numClonedInstr, "CFGStructurizer cloned instructions");
67 :
68 : namespace llvm {
69 :
70 : void initializeAMDGPUCFGStructurizerPass(PassRegistry &);
71 :
72 : } // end namespace llvm
73 :
74 : namespace {
75 :
76 : //===----------------------------------------------------------------------===//
77 : //
78 : // Miscellaneous utility for CFGStructurizer.
79 : //
80 : //===----------------------------------------------------------------------===//
81 :
82 : #define SHOWNEWINSTR(i) LLVM_DEBUG(dbgs() << "New instr: " << *i << "\n");
83 :
84 : #define SHOWNEWBLK(b, msg) \
85 : LLVM_DEBUG(dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
86 : dbgs() << "\n";);
87 :
88 : #define SHOWBLK_DETAIL(b, msg) \
89 : LLVM_DEBUG(if (b) { \
90 : dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
91 : b->print(dbgs()); \
92 : dbgs() << "\n"; \
93 : });
94 :
95 : #define INVALIDSCCNUM -1
96 :
97 : //===----------------------------------------------------------------------===//
98 : //
99 : // supporting data structure for CFGStructurizer
100 : //
101 : //===----------------------------------------------------------------------===//
102 :
103 : class BlockInformation {
104 : public:
105 : bool IsRetired = false;
106 : int SccNum = INVALIDSCCNUM;
107 :
108 2424 : BlockInformation() = default;
109 : };
110 :
111 : //===----------------------------------------------------------------------===//
112 : //
113 : // CFGStructurizer
114 : //
115 : //===----------------------------------------------------------------------===//
116 :
117 : class AMDGPUCFGStructurizer : public MachineFunctionPass {
118 : public:
119 : using MBBVector = SmallVector<MachineBasicBlock *, 32>;
120 : using MBBInfoMap = std::map<MachineBasicBlock *, BlockInformation *>;
121 : using LoopLandInfoMap = std::map<MachineLoop *, MachineBasicBlock *>;
122 :
123 : enum PathToKind {
124 : Not_SinglePath = 0,
125 : SinglePath_InPath = 1,
126 : SinglePath_NotInPath = 2
127 : };
128 :
129 : static char ID;
130 :
131 282 : AMDGPUCFGStructurizer() : MachineFunctionPass(ID) {
132 282 : initializeAMDGPUCFGStructurizerPass(*PassRegistry::getPassRegistry());
133 282 : }
134 :
135 282 : StringRef getPassName() const override {
136 282 : return "AMDGPU Control Flow Graph structurizer Pass";
137 : }
138 :
139 282 : void getAnalysisUsage(AnalysisUsage &AU) const override {
140 : AU.addRequired<MachineDominatorTree>();
141 : AU.addRequired<MachinePostDominatorTree>();
142 : AU.addRequired<MachineLoopInfo>();
143 282 : MachineFunctionPass::getAnalysisUsage(AU);
144 282 : }
145 :
146 : /// Perform the CFG structurization
147 : bool run();
148 :
149 : /// Perform the CFG preparation
150 : /// This step will remove every unconditionnal/dead jump instructions and make
151 : /// sure all loops have an exit block
152 : bool prepare();
153 :
154 2297 : bool runOnMachineFunction(MachineFunction &MF) override {
155 4594 : TII = MF.getSubtarget<R600Subtarget>().getInstrInfo();
156 2297 : TRI = &TII->getRegisterInfo();
157 : LLVM_DEBUG(MF.dump(););
158 : OrderedBlks.clear();
159 : Visited.clear();
160 2297 : FuncRep = &MF;
161 2297 : MLI = &getAnalysis<MachineLoopInfo>();
162 : LLVM_DEBUG(dbgs() << "LoopInfo:\n"; PrintLoopinfo(*MLI););
163 2297 : MDT = &getAnalysis<MachineDominatorTree>();
164 : LLVM_DEBUG(MDT->print(dbgs(), (const Module *)nullptr););
165 2297 : PDT = &getAnalysis<MachinePostDominatorTree>();
166 : LLVM_DEBUG(PDT->print(dbgs()););
167 2297 : prepare();
168 2297 : run();
169 : LLVM_DEBUG(MF.dump(););
170 2297 : return true;
171 : }
172 :
173 : protected:
174 : MachineDominatorTree *MDT;
175 : MachinePostDominatorTree *PDT;
176 : MachineLoopInfo *MLI;
177 : const R600InstrInfo *TII = nullptr;
178 : const R600RegisterInfo *TRI = nullptr;
179 :
180 : // PRINT FUNCTIONS
181 : /// Print the ordered Blocks.
182 : void printOrderedBlocks() const {
183 : size_t i = 0;
184 : for (MBBVector::const_iterator iterBlk = OrderedBlks.begin(),
185 : iterBlkEnd = OrderedBlks.end(); iterBlk != iterBlkEnd; ++iterBlk, ++i) {
186 : dbgs() << "BB" << (*iterBlk)->getNumber();
187 : dbgs() << "(" << getSCCNum(*iterBlk) << "," << (*iterBlk)->size() << ")";
188 : if (i != 0 && i % 10 == 0) {
189 : dbgs() << "\n";
190 : } else {
191 : dbgs() << " ";
192 : }
193 : }
194 : }
195 :
196 : static void PrintLoopinfo(const MachineLoopInfo &LoopInfo) {
197 : for (MachineLoop::iterator iter = LoopInfo.begin(),
198 : iterEnd = LoopInfo.end(); iter != iterEnd; ++iter) {
199 : (*iter)->print(dbgs(), 0);
200 : }
201 : }
202 :
203 : // UTILITY FUNCTIONS
204 : int getSCCNum(MachineBasicBlock *MBB) const;
205 : MachineBasicBlock *getLoopLandInfo(MachineLoop *LoopRep) const;
206 : bool hasBackEdge(MachineBasicBlock *MBB) const;
207 : bool isRetiredBlock(MachineBasicBlock *MBB) const;
208 : bool isActiveLoophead(MachineBasicBlock *MBB) const;
209 : PathToKind singlePathTo(MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB,
210 : bool AllowSideEntry = true) const;
211 : int countActiveBlock(MBBVector::const_iterator It,
212 : MBBVector::const_iterator E) const;
213 : bool needMigrateBlock(MachineBasicBlock *MBB) const;
214 :
215 : // Utility Functions
216 : void reversePredicateSetter(MachineBasicBlock::iterator I,
217 : MachineBasicBlock &MBB);
218 : /// Compute the reversed DFS post order of Blocks
219 : void orderBlocks(MachineFunction *MF);
220 :
221 : // Function originally from CFGStructTraits
222 : void insertInstrEnd(MachineBasicBlock *MBB, int NewOpcode,
223 : const DebugLoc &DL = DebugLoc());
224 : MachineInstr *insertInstrBefore(MachineBasicBlock *MBB, int NewOpcode,
225 : const DebugLoc &DL = DebugLoc());
226 : MachineInstr *insertInstrBefore(MachineBasicBlock::iterator I, int NewOpcode);
227 : void insertCondBranchBefore(MachineBasicBlock::iterator I, int NewOpcode,
228 : const DebugLoc &DL);
229 : void insertCondBranchBefore(MachineBasicBlock *MBB,
230 : MachineBasicBlock::iterator I, int NewOpcode,
231 : int RegNum, const DebugLoc &DL);
232 :
233 : static int getBranchNzeroOpcode(int OldOpcode);
234 : static int getBranchZeroOpcode(int OldOpcode);
235 : static int getContinueNzeroOpcode(int OldOpcode);
236 : static int getContinueZeroOpcode(int OldOpcode);
237 : static MachineBasicBlock *getTrueBranch(MachineInstr *MI);
238 : static void setTrueBranch(MachineInstr *MI, MachineBasicBlock *MBB);
239 : static MachineBasicBlock *getFalseBranch(MachineBasicBlock *MBB,
240 : MachineInstr *MI);
241 : static bool isCondBranch(MachineInstr *MI);
242 : static bool isUncondBranch(MachineInstr *MI);
243 : static DebugLoc getLastDebugLocInBB(MachineBasicBlock *MBB);
244 : static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *MBB);
245 :
246 : /// The correct naming for this is getPossibleLoopendBlockBranchInstr.
247 : ///
248 : /// BB with backward-edge could have move instructions after the branch
249 : /// instruction. Such move instruction "belong to" the loop backward-edge.
250 : MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *MBB);
251 :
252 : static MachineInstr *getReturnInstr(MachineBasicBlock *MBB);
253 : static bool isReturnBlock(MachineBasicBlock *MBB);
254 : static void cloneSuccessorList(MachineBasicBlock *DstMBB,
255 : MachineBasicBlock *SrcMBB);
256 : static MachineBasicBlock *clone(MachineBasicBlock *MBB);
257 :
258 : /// MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose
259 : /// because the AMDGPU instruction is not recognized as terminator fix this
260 : /// and retire this routine
261 : void replaceInstrUseOfBlockWith(MachineBasicBlock *SrcMBB,
262 : MachineBasicBlock *OldMBB, MachineBasicBlock *NewBlk);
263 :
264 : static void wrapup(MachineBasicBlock *MBB);
265 :
266 : int patternMatch(MachineBasicBlock *MBB);
267 : int patternMatchGroup(MachineBasicBlock *MBB);
268 : int serialPatternMatch(MachineBasicBlock *MBB);
269 : int ifPatternMatch(MachineBasicBlock *MBB);
270 : int loopendPatternMatch();
271 : int mergeLoop(MachineLoop *LoopRep);
272 :
273 : /// return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in
274 : /// the same loop with LoopLandInfo without explicitly keeping track of
275 : /// loopContBlks and loopBreakBlks, this is a method to get the information.
276 : bool isSameloopDetachedContbreak(MachineBasicBlock *Src1MBB,
277 : MachineBasicBlock *Src2MBB);
278 : int handleJumpintoIf(MachineBasicBlock *HeadMBB,
279 : MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB);
280 : int handleJumpintoIfImp(MachineBasicBlock *HeadMBB,
281 : MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB);
282 : int improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
283 : MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB,
284 : MachineBasicBlock **LandMBBPtr);
285 : void showImproveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
286 : MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB,
287 : MachineBasicBlock *LandMBB, bool Detail = false);
288 : int cloneOnSideEntryTo(MachineBasicBlock *PreMBB,
289 : MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB);
290 : void mergeSerialBlock(MachineBasicBlock *DstMBB,
291 : MachineBasicBlock *SrcMBB);
292 :
293 : void mergeIfthenelseBlock(MachineInstr *BranchMI,
294 : MachineBasicBlock *MBB, MachineBasicBlock *TrueMBB,
295 : MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB);
296 : void mergeLooplandBlock(MachineBasicBlock *DstMBB,
297 : MachineBasicBlock *LandMBB);
298 : void mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB,
299 : MachineBasicBlock *LandMBB);
300 : void settleLoopcontBlock(MachineBasicBlock *ContingMBB,
301 : MachineBasicBlock *ContMBB);
302 :
303 : /// normalizeInfiniteLoopExit change
304 : /// B1:
305 : /// uncond_br LoopHeader
306 : ///
307 : /// to
308 : /// B1:
309 : /// cond_br 1 LoopHeader dummyExit
310 : /// and return the newly added dummy exit block
311 : MachineBasicBlock *normalizeInfiniteLoopExit(MachineLoop *LoopRep);
312 : void removeUnconditionalBranch(MachineBasicBlock *MBB);
313 :
314 : /// Remove duplicate branches instructions in a block.
315 : /// For instance
316 : /// B0:
317 : /// cond_br X B1 B2
318 : /// cond_br X B1 B2
319 : /// is transformed to
320 : /// B0:
321 : /// cond_br X B1 B2
322 : void removeRedundantConditionalBranch(MachineBasicBlock *MBB);
323 :
324 : void addDummyExitBlock(SmallVectorImpl<MachineBasicBlock *> &RetMBB);
325 : void removeSuccessor(MachineBasicBlock *MBB);
326 : MachineBasicBlock *cloneBlockForPredecessor(MachineBasicBlock *MBB,
327 : MachineBasicBlock *PredMBB);
328 : void migrateInstruction(MachineBasicBlock *SrcMBB,
329 : MachineBasicBlock *DstMBB, MachineBasicBlock::iterator I);
330 : void recordSccnum(MachineBasicBlock *MBB, int SCCNum);
331 : void retireBlock(MachineBasicBlock *MBB);
332 :
333 : private:
334 : MBBInfoMap BlockInfoMap;
335 : LoopLandInfoMap LLInfoMap;
336 : std::map<MachineLoop *, bool> Visited;
337 : MachineFunction *FuncRep;
338 : SmallVector<MachineBasicBlock *, DEFAULT_VEC_SLOTS> OrderedBlks;
339 : };
340 :
341 : } // end anonymous namespace
342 :
343 : char AMDGPUCFGStructurizer::ID = 0;
344 :
345 : int AMDGPUCFGStructurizer::getSCCNum(MachineBasicBlock *MBB) const {
346 : MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB);
347 2672 : if (It == BlockInfoMap.end())
348 : return INVALIDSCCNUM;
349 250 : return (*It).second->SccNum;
350 : }
351 :
352 : MachineBasicBlock *AMDGPUCFGStructurizer::getLoopLandInfo(MachineLoop *LoopRep)
353 : const {
354 : LoopLandInfoMap::const_iterator It = LLInfoMap.find(LoopRep);
355 0 : if (It == LLInfoMap.end())
356 : return nullptr;
357 0 : return (*It).second;
358 : }
359 :
360 0 : bool AMDGPUCFGStructurizer::hasBackEdge(MachineBasicBlock *MBB) const {
361 0 : MachineLoop *LoopRep = MLI->getLoopFor(MBB);
362 0 : if (!LoopRep)
363 0 : return false;
364 : MachineBasicBlock *LoopHeader = LoopRep->getHeader();
365 0 : return MBB->isSuccessor(LoopHeader);
366 : }
367 :
368 : bool AMDGPUCFGStructurizer::isRetiredBlock(MachineBasicBlock *MBB) const {
369 : MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB);
370 7266 : if (It == BlockInfoMap.end())
371 : return false;
372 7266 : return (*It).second->IsRetired;
373 : }
374 :
375 80 : bool AMDGPUCFGStructurizer::isActiveLoophead(MachineBasicBlock *MBB) const {
376 80 : MachineLoop *LoopRep = MLI->getLoopFor(MBB);
377 80 : while (LoopRep && LoopRep->getHeader() == MBB) {
378 0 : MachineBasicBlock *LoopLand = getLoopLandInfo(LoopRep);
379 0 : if(!LoopLand)
380 0 : return true;
381 0 : if (!isRetiredBlock(LoopLand))
382 0 : return true;
383 0 : LoopRep = LoopRep->getParentLoop();
384 : }
385 : return false;
386 : }
387 :
388 4 : AMDGPUCFGStructurizer::PathToKind AMDGPUCFGStructurizer::singlePathTo(
389 : MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB,
390 : bool AllowSideEntry) const {
391 : assert(DstMBB);
392 4 : if (SrcMBB == DstMBB)
393 : return SinglePath_InPath;
394 10 : while (SrcMBB && SrcMBB->succ_size() == 1) {
395 10 : SrcMBB = *SrcMBB->succ_begin();
396 10 : if (SrcMBB == DstMBB)
397 : return SinglePath_InPath;
398 6 : if (!AllowSideEntry && SrcMBB->pred_size() > 1)
399 : return Not_SinglePath;
400 : }
401 0 : if (SrcMBB && SrcMBB->succ_size()==0)
402 0 : return SinglePath_NotInPath;
403 : return Not_SinglePath;
404 : }
405 :
406 4699 : int AMDGPUCFGStructurizer::countActiveBlock(MBBVector::const_iterator It,
407 : MBBVector::const_iterator E) const {
408 : int Count = 0;
409 9543 : while (It != E) {
410 9688 : if (!isRetiredBlock(*It))
411 4824 : ++Count;
412 4844 : ++It;
413 : }
414 4699 : return Count;
415 : }
416 :
417 0 : bool AMDGPUCFGStructurizer::needMigrateBlock(MachineBasicBlock *MBB) const {
418 : unsigned BlockSizeThreshold = 30;
419 : unsigned CloneInstrThreshold = 100;
420 0 : bool MultiplePreds = MBB && (MBB->pred_size() > 1);
421 :
422 : if(!MultiplePreds)
423 0 : return false;
424 : unsigned BlkSize = MBB->size();
425 0 : return ((BlkSize > BlockSizeThreshold) &&
426 0 : (BlkSize * (MBB->pred_size() - 1) > CloneInstrThreshold));
427 : }
428 :
429 0 : void AMDGPUCFGStructurizer::reversePredicateSetter(
430 : MachineBasicBlock::iterator I, MachineBasicBlock &MBB) {
431 : assert(I.isValid() && "Expected valid iterator");
432 : for (;; --I) {
433 0 : if (I == MBB.end())
434 0 : continue;
435 0 : if (I->getOpcode() == R600::PRED_X) {
436 0 : switch (I->getOperand(2).getImm()) {
437 : case R600::PRED_SETE_INT:
438 : I->getOperand(2).setImm(R600::PRED_SETNE_INT);
439 0 : return;
440 : case R600::PRED_SETNE_INT:
441 : I->getOperand(2).setImm(R600::PRED_SETE_INT);
442 0 : return;
443 : case R600::PRED_SETE:
444 : I->getOperand(2).setImm(R600::PRED_SETNE);
445 0 : return;
446 : case R600::PRED_SETNE:
447 : I->getOperand(2).setImm(R600::PRED_SETE);
448 0 : return;
449 0 : default:
450 0 : llvm_unreachable("PRED_X Opcode invalid!");
451 : }
452 : }
453 : }
454 : }
455 :
456 0 : void AMDGPUCFGStructurizer::insertInstrEnd(MachineBasicBlock *MBB,
457 : int NewOpcode, const DebugLoc &DL) {
458 : MachineInstr *MI =
459 0 : MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DL);
460 : MBB->push_back(MI);
461 : //assume the instruction doesn't take any reg operand ...
462 : SHOWNEWINSTR(MI);
463 0 : }
464 :
465 0 : MachineInstr *AMDGPUCFGStructurizer::insertInstrBefore(MachineBasicBlock *MBB,
466 : int NewOpcode,
467 : const DebugLoc &DL) {
468 : MachineInstr *MI =
469 0 : MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DL);
470 0 : if (MBB->begin() != MBB->end())
471 : MBB->insert(MBB->begin(), MI);
472 : else
473 : MBB->push_back(MI);
474 : SHOWNEWINSTR(MI);
475 0 : return MI;
476 : }
477 :
478 0 : MachineInstr *AMDGPUCFGStructurizer::insertInstrBefore(
479 : MachineBasicBlock::iterator I, int NewOpcode) {
480 : MachineInstr *OldMI = &(*I);
481 0 : MachineBasicBlock *MBB = OldMI->getParent();
482 : MachineInstr *NewMBB =
483 0 : MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DebugLoc());
484 : MBB->insert(I, NewMBB);
485 : //assume the instruction doesn't take any reg operand ...
486 : SHOWNEWINSTR(NewMBB);
487 0 : return NewMBB;
488 : }
489 :
490 0 : void AMDGPUCFGStructurizer::insertCondBranchBefore(
491 : MachineBasicBlock::iterator I, int NewOpcode, const DebugLoc &DL) {
492 : MachineInstr *OldMI = &(*I);
493 0 : MachineBasicBlock *MBB = OldMI->getParent();
494 0 : MachineFunction *MF = MBB->getParent();
495 0 : MachineInstr *NewMI = MF->CreateMachineInstr(TII->get(NewOpcode), DL);
496 : MBB->insert(I, NewMI);
497 : MachineInstrBuilder MIB(*MF, NewMI);
498 0 : MIB.addReg(OldMI->getOperand(1).getReg(), false);
499 : SHOWNEWINSTR(NewMI);
500 : //erase later oldInstr->eraseFromParent();
501 0 : }
502 :
503 0 : void AMDGPUCFGStructurizer::insertCondBranchBefore(
504 : MachineBasicBlock *blk, MachineBasicBlock::iterator I, int NewOpcode,
505 : int RegNum, const DebugLoc &DL) {
506 0 : MachineFunction *MF = blk->getParent();
507 0 : MachineInstr *NewInstr = MF->CreateMachineInstr(TII->get(NewOpcode), DL);
508 : //insert before
509 : blk->insert(I, NewInstr);
510 0 : MachineInstrBuilder(*MF, NewInstr).addReg(RegNum, false);
511 : SHOWNEWINSTR(NewInstr);
512 0 : }
513 :
514 : int AMDGPUCFGStructurizer::getBranchNzeroOpcode(int OldOpcode) {
515 44 : switch(OldOpcode) {
516 : case R600::JUMP_COND:
517 : case R600::JUMP: return R600::IF_PREDICATE_SET;
518 0 : case R600::BRANCH_COND_i32:
519 : case R600::BRANCH_COND_f32: return R600::IF_LOGICALNZ_f32;
520 0 : default: llvm_unreachable("internal error");
521 : }
522 : return -1;
523 : }
524 :
525 : int AMDGPUCFGStructurizer::getBranchZeroOpcode(int OldOpcode) {
526 0 : switch(OldOpcode) {
527 : case R600::JUMP_COND:
528 : case R600::JUMP: return R600::IF_PREDICATE_SET;
529 0 : case R600::BRANCH_COND_i32:
530 : case R600::BRANCH_COND_f32: return R600::IF_LOGICALZ_f32;
531 0 : default: llvm_unreachable("internal error");
532 : }
533 : return -1;
534 : }
535 :
536 : int AMDGPUCFGStructurizer::getContinueNzeroOpcode(int OldOpcode) {
537 0 : switch(OldOpcode) {
538 : case R600::JUMP_COND:
539 : case R600::JUMP: return R600::CONTINUE_LOGICALNZ_i32;
540 0 : default: llvm_unreachable("internal error");
541 : }
542 : return -1;
543 : }
544 :
545 : int AMDGPUCFGStructurizer::getContinueZeroOpcode(int OldOpcode) {
546 0 : switch(OldOpcode) {
547 : case R600::JUMP_COND:
548 : case R600::JUMP: return R600::CONTINUE_LOGICALZ_i32;
549 0 : default: llvm_unreachable("internal error");
550 : }
551 : return -1;
552 : }
553 :
554 : MachineBasicBlock *AMDGPUCFGStructurizer::getTrueBranch(MachineInstr *MI) {
555 115 : return MI->getOperand(0).getMBB();
556 : }
557 :
558 : void AMDGPUCFGStructurizer::setTrueBranch(MachineInstr *MI,
559 : MachineBasicBlock *MBB) {
560 : MI->getOperand(0).setMBB(MBB);
561 : }
562 :
563 : MachineBasicBlock *
564 : AMDGPUCFGStructurizer::getFalseBranch(MachineBasicBlock *MBB,
565 : MachineInstr *MI) {
566 : assert(MBB->succ_size() == 2);
567 : MachineBasicBlock *TrueBranch = getTrueBranch(MI);
568 : MachineBasicBlock::succ_iterator It = MBB->succ_begin();
569 : MachineBasicBlock::succ_iterator Next = It;
570 : ++Next;
571 48 : return (*It == TrueBranch) ? *Next : *It;
572 : }
573 :
574 : bool AMDGPUCFGStructurizer::isCondBranch(MachineInstr *MI) {
575 49 : switch (MI->getOpcode()) {
576 : case R600::JUMP_COND:
577 : case R600::BRANCH_COND_i32:
578 : case R600::BRANCH_COND_f32: return true;
579 : default:
580 : return false;
581 : }
582 : return false;
583 : }
584 :
585 : bool AMDGPUCFGStructurizer::isUncondBranch(MachineInstr *MI) {
586 126 : switch (MI->getOpcode()) {
587 : case R600::JUMP:
588 : case R600::BRANCH:
589 : return true;
590 : default:
591 : return false;
592 : }
593 : return false;
594 : }
595 :
596 18 : DebugLoc AMDGPUCFGStructurizer::getLastDebugLocInBB(MachineBasicBlock *MBB) {
597 : //get DebugLoc from the first MachineBasicBlock instruction with debug info
598 18 : DebugLoc DL;
599 245 : for (MachineBasicBlock::iterator It = MBB->begin(); It != MBB->end();
600 : ++It) {
601 : MachineInstr *instr = &(*It);
602 227 : if (instr->getDebugLoc())
603 : DL = instr->getDebugLoc();
604 : }
605 18 : return DL;
606 : }
607 :
608 48 : MachineInstr *AMDGPUCFGStructurizer::getNormalBlockBranchInstr(
609 : MachineBasicBlock *MBB) {
610 : MachineBasicBlock::reverse_iterator It = MBB->rbegin();
611 : MachineInstr *MI = &*It;
612 48 : if (MI && (isCondBranch(MI) || isUncondBranch(MI)))
613 48 : return MI;
614 : return nullptr;
615 : }
616 :
617 0 : MachineInstr *AMDGPUCFGStructurizer::getLoopendBlockBranchInstr(
618 : MachineBasicBlock *MBB) {
619 : for (MachineBasicBlock::reverse_iterator It = MBB->rbegin(), E = MBB->rend();
620 0 : It != E; ++It) {
621 : // FIXME: Simplify
622 : MachineInstr *MI = &*It;
623 0 : if (MI) {
624 : if (isCondBranch(MI) || isUncondBranch(MI))
625 0 : return MI;
626 0 : else if (!TII->isMov(MI->getOpcode()))
627 : break;
628 : }
629 : }
630 : return nullptr;
631 : }
632 :
633 2424 : MachineInstr *AMDGPUCFGStructurizer::getReturnInstr(MachineBasicBlock *MBB) {
634 : MachineBasicBlock::reverse_iterator It = MBB->rbegin();
635 2424 : if (It != MBB->rend()) {
636 : MachineInstr *instr = &(*It);
637 4840 : if (instr->getOpcode() == R600::RETURN)
638 2296 : return instr;
639 : }
640 : return nullptr;
641 : }
642 :
643 : bool AMDGPUCFGStructurizer::isReturnBlock(MachineBasicBlock *MBB) {
644 2422 : MachineInstr *MI = getReturnInstr(MBB);
645 : bool IsReturn = (MBB->succ_size() == 0);
646 : if (MI)
647 : assert(IsReturn);
648 : else if (IsReturn)
649 : LLVM_DEBUG(dbgs() << "BB" << MBB->getNumber()
650 : << " is return block without RETURN instr\n";);
651 : return IsReturn;
652 : }
653 :
654 : void AMDGPUCFGStructurizer::cloneSuccessorList(MachineBasicBlock *DstMBB,
655 : MachineBasicBlock *SrcMBB) {
656 : for (MachineBasicBlock::succ_iterator It = SrcMBB->succ_begin(),
657 101 : iterEnd = SrcMBB->succ_end(); It != iterEnd; ++It)
658 20 : DstMBB->addSuccessor(*It); // *iter's predecessor is also taken care of
659 : }
660 :
661 1 : MachineBasicBlock *AMDGPUCFGStructurizer::clone(MachineBasicBlock *MBB) {
662 1 : MachineFunction *Func = MBB->getParent();
663 1 : MachineBasicBlock *NewMBB = Func->CreateMachineBasicBlock();
664 : Func->push_back(NewMBB); //insert to function
665 131 : for (const MachineInstr &It : *MBB)
666 130 : NewMBB->push_back(Func->CloneMachineInstr(&It));
667 1 : return NewMBB;
668 : }
669 :
670 1 : void AMDGPUCFGStructurizer::replaceInstrUseOfBlockWith(
671 : MachineBasicBlock *SrcMBB, MachineBasicBlock *OldMBB,
672 : MachineBasicBlock *NewBlk) {
673 1 : MachineInstr *BranchMI = getLoopendBlockBranchInstr(SrcMBB);
674 2 : if (BranchMI && isCondBranch(BranchMI) &&
675 : getTrueBranch(BranchMI) == OldMBB)
676 : setTrueBranch(BranchMI, NewBlk);
677 1 : }
678 :
679 2297 : void AMDGPUCFGStructurizer::wrapup(MachineBasicBlock *MBB) {
680 : assert((!MBB->getParent()->getJumpTableInfo()
681 : || MBB->getParent()->getJumpTableInfo()->isEmpty())
682 : && "found a jump table");
683 :
684 : //collect continue right before endloop
685 : SmallVector<MachineInstr *, DEFAULT_VEC_SLOTS> ContInstr;
686 : MachineBasicBlock::iterator Pre = MBB->begin();
687 : MachineBasicBlock::iterator E = MBB->end();
688 : MachineBasicBlock::iterator It = Pre;
689 63055 : while (It != E) {
690 60758 : if (Pre->getOpcode() == R600::CONTINUE
691 60758 : && It->getOpcode() == R600::ENDLOOP)
692 18 : ContInstr.push_back(&*Pre);
693 : Pre = It;
694 : ++It;
695 : }
696 :
697 : //delete continue right before endloop
698 2315 : for (unsigned i = 0; i < ContInstr.size(); ++i)
699 18 : ContInstr[i]->eraseFromParent();
700 :
701 : // TODO to fix up jump table so later phase won't be confused. if
702 : // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but
703 : // there isn't such an interface yet. alternatively, replace all the other
704 : // blocks in the jump table with the entryBlk //}
705 2297 : }
706 :
707 2297 : bool AMDGPUCFGStructurizer::prepare() {
708 : bool Changed = false;
709 :
710 : //FIXME: if not reducible flow graph, make it so ???
711 :
712 : LLVM_DEBUG(dbgs() << "AMDGPUCFGStructurizer::prepare\n";);
713 :
714 2297 : orderBlocks(FuncRep);
715 :
716 : SmallVector<MachineBasicBlock *, DEFAULT_VEC_SLOTS> RetBlks;
717 :
718 : // Add an ExitBlk to loop that don't have one
719 2297 : for (MachineLoopInfo::iterator It = MLI->begin(),
720 2313 : E = MLI->end(); It != E; ++It) {
721 16 : MachineLoop *LoopRep = (*It);
722 : MBBVector ExitingMBBs;
723 16 : LoopRep->getExitingBlocks(ExitingMBBs);
724 :
725 16 : if (ExitingMBBs.size() == 0) {
726 0 : MachineBasicBlock* DummyExitBlk = normalizeInfiniteLoopExit(LoopRep);
727 0 : if (DummyExitBlk)
728 0 : RetBlks.push_back(DummyExitBlk);
729 : }
730 : }
731 :
732 : // Remove unconditional branch instr.
733 : // Add dummy exit block iff there are multiple returns.
734 2422 : for (SmallVectorImpl<MachineBasicBlock *>::const_iterator
735 4719 : It = OrderedBlks.begin(), E = OrderedBlks.end(); It != E; ++It) {
736 2422 : MachineBasicBlock *MBB = *It;
737 2422 : removeUnconditionalBranch(MBB);
738 2422 : removeRedundantConditionalBranch(MBB);
739 2422 : if (isReturnBlock(MBB)) {
740 2298 : RetBlks.push_back(MBB);
741 : }
742 : assert(MBB->succ_size() <= 2);
743 : }
744 :
745 2297 : if (RetBlks.size() >= 2) {
746 1 : addDummyExitBlock(RetBlks);
747 : Changed = true;
748 : }
749 :
750 2297 : return Changed;
751 : }
752 :
753 2297 : bool AMDGPUCFGStructurizer::run() {
754 : //Assume reducible CFG...
755 : LLVM_DEBUG(dbgs() << "AMDGPUCFGStructurizer::run\n");
756 :
757 : #ifdef STRESSTEST
758 : //Use the worse block ordering to test the algorithm.
759 : ReverseVector(orderedBlks);
760 : #endif
761 :
762 : LLVM_DEBUG(dbgs() << "Ordered blocks:\n"; printOrderedBlocks(););
763 : int NumIter = 0;
764 : bool Finish = false;
765 : MachineBasicBlock *MBB;
766 : bool MakeProgress = false;
767 2297 : int NumRemainedBlk = countActiveBlock(OrderedBlks.begin(),
768 : OrderedBlks.end());
769 :
770 : do {
771 : ++NumIter;
772 : LLVM_DEBUG(dbgs() << "numIter = " << NumIter
773 : << ", numRemaintedBlk = " << NumRemainedBlk << "\n";);
774 :
775 : SmallVectorImpl<MachineBasicBlock *>::const_iterator It =
776 : OrderedBlks.begin();
777 : SmallVectorImpl<MachineBasicBlock *>::const_iterator E =
778 : OrderedBlks.end();
779 :
780 : SmallVectorImpl<MachineBasicBlock *>::const_iterator SccBeginIter =
781 : It;
782 : MachineBasicBlock *SccBeginMBB = nullptr;
783 : int SccNumBlk = 0; // The number of active blocks, init to a
784 : // maximum possible number.
785 : int SccNumIter; // Number of iteration in this SCC.
786 :
787 4719 : while (It != E) {
788 2422 : MBB = *It;
789 :
790 2422 : if (!SccBeginMBB) {
791 : SccBeginIter = It;
792 : SccBeginMBB = MBB;
793 : SccNumIter = 0;
794 : SccNumBlk = NumRemainedBlk; // Init to maximum possible number.
795 : LLVM_DEBUG(dbgs() << "start processing SCC" << getSCCNum(SccBeginMBB);
796 : dbgs() << "\n";);
797 : }
798 :
799 2422 : if (!isRetiredBlock(MBB))
800 : patternMatch(MBB);
801 :
802 2422 : ++It;
803 :
804 : bool ContNextScc = true;
805 : if (It == E
806 2672 : || getSCCNum(SccBeginMBB) != getSCCNum(*It)) {
807 : // Just finish one scc.
808 : ++SccNumIter;
809 2402 : int sccRemainedNumBlk = countActiveBlock(SccBeginIter, It);
810 2402 : if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= SccNumBlk) {
811 : LLVM_DEBUG(dbgs() << "Can't reduce SCC " << getSCCNum(MBB)
812 : << ", sccNumIter = " << SccNumIter;
813 : dbgs() << "doesn't make any progress\n";);
814 : ContNextScc = true;
815 2402 : } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < SccNumBlk) {
816 : SccNumBlk = sccRemainedNumBlk;
817 : It = SccBeginIter;
818 : ContNextScc = false;
819 : LLVM_DEBUG(dbgs() << "repeat processing SCC" << getSCCNum(MBB)
820 : << "sccNumIter = " << SccNumIter << '\n';);
821 : } else {
822 : // Finish the current scc.
823 : ContNextScc = true;
824 : }
825 : } else {
826 : // Continue on next component in the current scc.
827 : ContNextScc = false;
828 : }
829 :
830 2422 : if (ContNextScc)
831 : SccBeginMBB = nullptr;
832 : } //while, "one iteration" over the function.
833 :
834 : MachineBasicBlock *EntryMBB =
835 2297 : *GraphTraits<MachineFunction *>::nodes_begin(FuncRep);
836 2297 : if (EntryMBB->succ_size() == 0) {
837 : Finish = true;
838 : LLVM_DEBUG(dbgs() << "Reduce to one block\n";);
839 : } else {
840 : int NewnumRemainedBlk
841 0 : = countActiveBlock(OrderedBlks.begin(), OrderedBlks.end());
842 : // consider cloned blocks ??
843 0 : if (NewnumRemainedBlk == 1 || NewnumRemainedBlk < NumRemainedBlk) {
844 : MakeProgress = true;
845 : NumRemainedBlk = NewnumRemainedBlk;
846 : } else {
847 : MakeProgress = false;
848 : LLVM_DEBUG(dbgs() << "No progress\n";);
849 : }
850 : }
851 2297 : } while (!Finish && MakeProgress);
852 :
853 : // Misc wrap up to maintain the consistency of the Function representation.
854 4594 : wrapup(*GraphTraits<MachineFunction *>::nodes_begin(FuncRep));
855 :
856 : // Detach retired Block, release memory.
857 : for (MBBInfoMap::iterator It = BlockInfoMap.begin(), E = BlockInfoMap.end();
858 4721 : It != E; ++It) {
859 2424 : if ((*It).second && (*It).second->IsRetired) {
860 : assert(((*It).first)->getNumber() != -1);
861 : LLVM_DEBUG(dbgs() << "Erase BB" << ((*It).first)->getNumber() << "\n";);
862 127 : (*It).first->eraseFromParent(); //Remove from the parent Function.
863 : }
864 2424 : delete (*It).second;
865 : }
866 : BlockInfoMap.clear();
867 : LLInfoMap.clear();
868 :
869 2297 : if (!Finish) {
870 : LLVM_DEBUG(FuncRep->viewCFG());
871 0 : report_fatal_error("IRREDUCIBLE_CFG");
872 : }
873 :
874 2297 : return true;
875 : }
876 :
877 2297 : void AMDGPUCFGStructurizer::orderBlocks(MachineFunction *MF) {
878 : int SccNum = 0;
879 : MachineBasicBlock *MBB;
880 4699 : for (scc_iterator<MachineFunction *> It = scc_begin(MF); !It.isAtEnd();
881 : ++It, ++SccNum) {
882 : const std::vector<MachineBasicBlock *> &SccNext = *It;
883 : for (std::vector<MachineBasicBlock *>::const_iterator
884 : blockIter = SccNext.begin(), blockEnd = SccNext.end();
885 4824 : blockIter != blockEnd; ++blockIter) {
886 2422 : MBB = *blockIter;
887 2422 : OrderedBlks.push_back(MBB);
888 2422 : recordSccnum(MBB, SccNum);
889 : }
890 : }
891 :
892 : // walk through all the block in func to check for unreachable
893 4719 : for (auto *MBB : nodes(MF)) {
894 0 : SccNum = getSCCNum(MBB);
895 2422 : if (SccNum == INVALIDSCCNUM)
896 0 : dbgs() << "unreachable block BB" << MBB->getNumber() << "\n";
897 : }
898 2297 : }
899 :
900 : int AMDGPUCFGStructurizer::patternMatch(MachineBasicBlock *MBB) {
901 : int NumMatch = 0;
902 : int CurMatch;
903 :
904 : LLVM_DEBUG(dbgs() << "Begin patternMatch BB" << MBB->getNumber() << "\n";);
905 :
906 2522 : while ((CurMatch = patternMatchGroup(MBB)) > 0)
907 : NumMatch += CurMatch;
908 :
909 : LLVM_DEBUG(dbgs() << "End patternMatch BB" << MBB->getNumber()
910 : << ", numMatch = " << NumMatch << "\n";);
911 :
912 : return NumMatch;
913 : }
914 :
915 2522 : int AMDGPUCFGStructurizer::patternMatchGroup(MachineBasicBlock *MBB) {
916 : int NumMatch = 0;
917 2522 : NumMatch += loopendPatternMatch();
918 2522 : NumMatch += serialPatternMatch(MBB);
919 2522 : NumMatch += ifPatternMatch(MBB);
920 2522 : return NumMatch;
921 : }
922 :
923 2654 : int AMDGPUCFGStructurizer::serialPatternMatch(MachineBasicBlock *MBB) {
924 2654 : if (MBB->succ_size() != 1)
925 : return 0;
926 :
927 204 : MachineBasicBlock *childBlk = *MBB->succ_begin();
928 204 : if (childBlk->pred_size() != 1 || isActiveLoophead(childBlk))
929 124 : return 0;
930 :
931 80 : mergeSerialBlock(MBB, childBlk);
932 : ++numSerialPatternMatch;
933 80 : return 1;
934 : }
935 :
936 2650 : int AMDGPUCFGStructurizer::ifPatternMatch(MachineBasicBlock *MBB) {
937 : //two edges
938 2650 : if (MBB->succ_size() != 2)
939 : return 0;
940 48 : if (hasBackEdge(MBB))
941 : return 0;
942 48 : MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB);
943 48 : if (!BranchMI)
944 : return 0;
945 :
946 : assert(isCondBranch(BranchMI));
947 : int NumMatch = 0;
948 :
949 : MachineBasicBlock *TrueMBB = getTrueBranch(BranchMI);
950 48 : NumMatch += serialPatternMatch(TrueMBB);
951 48 : NumMatch += ifPatternMatch(TrueMBB);
952 : MachineBasicBlock *FalseMBB = getFalseBranch(MBB, BranchMI);
953 48 : NumMatch += serialPatternMatch(FalseMBB);
954 48 : NumMatch += ifPatternMatch(FalseMBB);
955 : MachineBasicBlock *LandBlk;
956 : int Cloned = 0;
957 :
958 : assert (!TrueMBB->succ_empty() || !FalseMBB->succ_empty());
959 : // TODO: Simplify
960 15 : if (TrueMBB->succ_size() == 1 && FalseMBB->succ_size() == 1
961 63 : && *TrueMBB->succ_begin() == *FalseMBB->succ_begin()) {
962 : // Diamond pattern
963 : LandBlk = *TrueMBB->succ_begin();
964 45 : } else if (TrueMBB->succ_size() == 1 && *TrueMBB->succ_begin() == FalseMBB) {
965 : // Triangle pattern, false is empty
966 : LandBlk = FalseMBB;
967 : FalseMBB = nullptr;
968 : } else if (FalseMBB->succ_size() == 1
969 45 : && *FalseMBB->succ_begin() == TrueMBB) {
970 : // Triangle pattern, true is empty
971 : // We reverse the predicate to make a triangle, empty false pattern;
972 : std::swap(TrueMBB, FalseMBB);
973 41 : reversePredicateSetter(MBB->end(), *MBB);
974 : LandBlk = FalseMBB;
975 : FalseMBB = nullptr;
976 : } else if (FalseMBB->succ_size() == 1
977 4 : && isSameloopDetachedContbreak(TrueMBB, FalseMBB)) {
978 0 : LandBlk = *FalseMBB->succ_begin();
979 : } else if (TrueMBB->succ_size() == 1
980 4 : && isSameloopDetachedContbreak(FalseMBB, TrueMBB)) {
981 0 : LandBlk = *TrueMBB->succ_begin();
982 : } else {
983 4 : return NumMatch + handleJumpintoIf(MBB, TrueMBB, FalseMBB);
984 : }
985 :
986 : // improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the
987 : // new BB created for landBlk==NULL may introduce new challenge to the
988 : // reduction process.
989 44 : if (LandBlk &&
990 44 : ((TrueMBB && TrueMBB->pred_size() > 1)
991 43 : || (FalseMBB && FalseMBB->pred_size() > 1))) {
992 1 : Cloned += improveSimpleJumpintoIf(MBB, TrueMBB, FalseMBB, &LandBlk);
993 : }
994 :
995 44 : if (TrueMBB && TrueMBB->pred_size() > 1) {
996 1 : TrueMBB = cloneBlockForPredecessor(TrueMBB, MBB);
997 1 : ++Cloned;
998 : }
999 :
1000 44 : if (FalseMBB && FalseMBB->pred_size() > 1) {
1001 0 : FalseMBB = cloneBlockForPredecessor(FalseMBB, MBB);
1002 0 : ++Cloned;
1003 : }
1004 :
1005 44 : mergeIfthenelseBlock(BranchMI, MBB, TrueMBB, FalseMBB, LandBlk);
1006 :
1007 : ++numIfPatternMatch;
1008 :
1009 : numClonedBlock += Cloned;
1010 :
1011 44 : return 1 + Cloned + NumMatch;
1012 : }
1013 :
1014 2522 : int AMDGPUCFGStructurizer::loopendPatternMatch() {
1015 : std::deque<MachineLoop *> NestedLoops;
1016 2626 : for (auto &It: *MLI)
1017 324 : for (MachineLoop *ML : depth_first(It))
1018 : NestedLoops.push_front(ML);
1019 :
1020 2522 : if (NestedLoops.empty())
1021 : return 0;
1022 :
1023 : // Process nested loop outside->inside (we did push_front),
1024 : // so "continue" to a outside loop won't be mistaken as "break"
1025 : // of the current loop.
1026 : int Num = 0;
1027 220 : for (MachineLoop *ExaminedLoop : NestedLoops) {
1028 116 : if (ExaminedLoop->getNumBlocks() == 0 || Visited[ExaminedLoop])
1029 98 : continue;
1030 : LLVM_DEBUG(dbgs() << "Processing:\n"; ExaminedLoop->dump(););
1031 18 : int NumBreak = mergeLoop(ExaminedLoop);
1032 18 : if (NumBreak == -1)
1033 : break;
1034 18 : Num += NumBreak;
1035 : }
1036 104 : return Num;
1037 : }
1038 :
1039 18 : int AMDGPUCFGStructurizer::mergeLoop(MachineLoop *LoopRep) {
1040 18 : MachineBasicBlock *LoopHeader = LoopRep->getHeader();
1041 : MBBVector ExitingMBBs;
1042 18 : LoopRep->getExitingBlocks(ExitingMBBs);
1043 : assert(!ExitingMBBs.empty() && "Infinite Loop not supported");
1044 : LLVM_DEBUG(dbgs() << "Loop has " << ExitingMBBs.size()
1045 : << " exiting blocks\n";);
1046 : // We assume a single ExitBlk
1047 : MBBVector ExitBlks;
1048 18 : LoopRep->getExitBlocks(ExitBlks);
1049 : SmallPtrSet<MachineBasicBlock *, 2> ExitBlkSet;
1050 36 : for (unsigned i = 0, e = ExitBlks.size(); i < e; ++i)
1051 36 : ExitBlkSet.insert(ExitBlks[i]);
1052 : assert(ExitBlkSet.size() == 1);
1053 18 : MachineBasicBlock *ExitBlk = *ExitBlks.begin();
1054 : assert(ExitBlk && "Loop has several exit block");
1055 : MBBVector LatchBlks;
1056 54 : for (auto *LB : inverse_children<MachineBasicBlock*>(LoopHeader))
1057 36 : if (LoopRep->contains(LB))
1058 18 : LatchBlks.push_back(LB);
1059 :
1060 36 : for (unsigned i = 0, e = ExitingMBBs.size(); i < e; ++i)
1061 36 : mergeLoopbreakBlock(ExitingMBBs[i], ExitBlk);
1062 36 : for (unsigned i = 0, e = LatchBlks.size(); i < e; ++i)
1063 36 : settleLoopcontBlock(LatchBlks[i], LoopHeader);
1064 : int Match = 0;
1065 : do {
1066 : Match = 0;
1067 28 : Match += serialPatternMatch(LoopHeader);
1068 28 : Match += ifPatternMatch(LoopHeader);
1069 28 : } while (Match > 0);
1070 18 : mergeLooplandBlock(LoopHeader, ExitBlk);
1071 18 : MachineLoop *ParentLoop = LoopRep->getParentLoop();
1072 18 : if (ParentLoop)
1073 2 : MLI->changeLoopFor(LoopHeader, ParentLoop);
1074 : else
1075 16 : MLI->removeBlock(LoopHeader);
1076 18 : Visited[LoopRep] = true;
1077 18 : return 1;
1078 : }
1079 :
1080 8 : bool AMDGPUCFGStructurizer::isSameloopDetachedContbreak(
1081 : MachineBasicBlock *Src1MBB, MachineBasicBlock *Src2MBB) {
1082 8 : if (Src1MBB->succ_size() == 0) {
1083 0 : MachineLoop *LoopRep = MLI->getLoopFor(Src1MBB);
1084 0 : if (LoopRep&& LoopRep == MLI->getLoopFor(Src2MBB)) {
1085 0 : MachineBasicBlock *&TheEntry = LLInfoMap[LoopRep];
1086 0 : if (TheEntry) {
1087 : LLVM_DEBUG(dbgs() << "isLoopContBreakBlock yes src1 = BB"
1088 : << Src1MBB->getNumber() << " src2 = BB"
1089 : << Src2MBB->getNumber() << "\n";);
1090 0 : return true;
1091 : }
1092 : }
1093 : }
1094 : return false;
1095 : }
1096 :
1097 4 : int AMDGPUCFGStructurizer::handleJumpintoIf(MachineBasicBlock *HeadMBB,
1098 : MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) {
1099 4 : int Num = handleJumpintoIfImp(HeadMBB, TrueMBB, FalseMBB);
1100 4 : if (Num == 0) {
1101 : LLVM_DEBUG(dbgs() << "handleJumpintoIf swap trueBlk and FalseBlk"
1102 : << "\n";);
1103 0 : Num = handleJumpintoIfImp(HeadMBB, FalseMBB, TrueMBB);
1104 : }
1105 4 : return Num;
1106 : }
1107 :
1108 4 : int AMDGPUCFGStructurizer::handleJumpintoIfImp(MachineBasicBlock *HeadMBB,
1109 : MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) {
1110 : int Num = 0;
1111 : MachineBasicBlock *DownBlk;
1112 :
1113 : //trueBlk could be the common post dominator
1114 : DownBlk = TrueMBB;
1115 :
1116 : LLVM_DEBUG(dbgs() << "handleJumpintoIfImp head = BB" << HeadMBB->getNumber()
1117 : << " true = BB" << TrueMBB->getNumber()
1118 : << ", numSucc=" << TrueMBB->succ_size() << " false = BB"
1119 : << FalseMBB->getNumber() << "\n";);
1120 :
1121 4 : while (DownBlk) {
1122 : LLVM_DEBUG(dbgs() << "check down = BB" << DownBlk->getNumber(););
1123 :
1124 4 : if (singlePathTo(FalseMBB, DownBlk) == SinglePath_InPath) {
1125 : LLVM_DEBUG(dbgs() << " working\n";);
1126 :
1127 4 : Num += cloneOnSideEntryTo(HeadMBB, TrueMBB, DownBlk);
1128 4 : Num += cloneOnSideEntryTo(HeadMBB, FalseMBB, DownBlk);
1129 :
1130 : numClonedBlock += Num;
1131 4 : Num += serialPatternMatch(*HeadMBB->succ_begin());
1132 4 : Num += serialPatternMatch(*std::next(HeadMBB->succ_begin()));
1133 4 : Num += ifPatternMatch(HeadMBB);
1134 : assert(Num > 0);
1135 :
1136 4 : break;
1137 : }
1138 : LLVM_DEBUG(dbgs() << " not working\n";);
1139 0 : DownBlk = (DownBlk->succ_size() == 1) ? (*DownBlk->succ_begin()) : nullptr;
1140 : } // walk down the postDomTree
1141 :
1142 4 : return Num;
1143 : }
1144 :
1145 : #ifndef NDEBUG
1146 : void AMDGPUCFGStructurizer::showImproveSimpleJumpintoIf(
1147 : MachineBasicBlock *HeadMBB, MachineBasicBlock *TrueMBB,
1148 : MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB, bool Detail) {
1149 : dbgs() << "head = BB" << HeadMBB->getNumber()
1150 : << " size = " << HeadMBB->size();
1151 : if (Detail) {
1152 : dbgs() << "\n";
1153 : HeadMBB->print(dbgs());
1154 : dbgs() << "\n";
1155 : }
1156 :
1157 : if (TrueMBB) {
1158 : dbgs() << ", true = BB" << TrueMBB->getNumber() << " size = "
1159 : << TrueMBB->size() << " numPred = " << TrueMBB->pred_size();
1160 : if (Detail) {
1161 : dbgs() << "\n";
1162 : TrueMBB->print(dbgs());
1163 : dbgs() << "\n";
1164 : }
1165 : }
1166 : if (FalseMBB) {
1167 : dbgs() << ", false = BB" << FalseMBB->getNumber() << " size = "
1168 : << FalseMBB->size() << " numPred = " << FalseMBB->pred_size();
1169 : if (Detail) {
1170 : dbgs() << "\n";
1171 : FalseMBB->print(dbgs());
1172 : dbgs() << "\n";
1173 : }
1174 : }
1175 : if (LandMBB) {
1176 : dbgs() << ", land = BB" << LandMBB->getNumber() << " size = "
1177 : << LandMBB->size() << " numPred = " << LandMBB->pred_size();
1178 : if (Detail) {
1179 : dbgs() << "\n";
1180 : LandMBB->print(dbgs());
1181 : dbgs() << "\n";
1182 : }
1183 : }
1184 :
1185 : dbgs() << "\n";
1186 : }
1187 : #endif
1188 :
1189 0 : int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
1190 : MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB,
1191 : MachineBasicBlock **LandMBBPtr) {
1192 : bool MigrateTrue = false;
1193 : bool MigrateFalse = false;
1194 :
1195 0 : MachineBasicBlock *LandBlk = *LandMBBPtr;
1196 :
1197 : assert((!TrueMBB || TrueMBB->succ_size() <= 1)
1198 : && (!FalseMBB || FalseMBB->succ_size() <= 1));
1199 :
1200 0 : if (TrueMBB == FalseMBB)
1201 0 : return 0;
1202 :
1203 0 : MigrateTrue = needMigrateBlock(TrueMBB);
1204 0 : MigrateFalse = needMigrateBlock(FalseMBB);
1205 :
1206 0 : if (!MigrateTrue && !MigrateFalse)
1207 0 : return 0;
1208 :
1209 : // If we need to migrate either trueBlk and falseBlk, migrate the rest that
1210 : // have more than one predecessors. without doing this, its predecessor
1211 : // rather than headBlk will have undefined value in initReg.
1212 0 : if (!MigrateTrue && TrueMBB && TrueMBB->pred_size() > 1)
1213 : MigrateTrue = true;
1214 0 : if (!MigrateFalse && FalseMBB && FalseMBB->pred_size() > 1)
1215 : MigrateFalse = true;
1216 :
1217 : LLVM_DEBUG(
1218 : dbgs() << "before improveSimpleJumpintoIf: ";
1219 : showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0););
1220 :
1221 : // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk
1222 : //
1223 : // new: headBlk => if () {initReg = 1; org trueBlk branch} else
1224 : // {initReg = 0; org falseBlk branch }
1225 : // => landBlk => if (initReg) {org trueBlk} else {org falseBlk}
1226 : // => org landBlk
1227 : // if landBlk->pred_size() > 2, put the about if-else inside
1228 : // if (initReg !=2) {...}
1229 : //
1230 : // add initReg = initVal to headBlk
1231 :
1232 0 : const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
1233 0 : if (!MigrateTrue || !MigrateFalse) {
1234 : // XXX: We have an opportunity here to optimize the "branch into if" case
1235 : // here. Branch into if looks like this:
1236 : // entry
1237 : // / |
1238 : // diamond_head branch_from
1239 : // / \ |
1240 : // diamond_false diamond_true
1241 : // \ /
1242 : // done
1243 : //
1244 : // The diamond_head block begins the "if" and the diamond_true block
1245 : // is the block being "branched into".
1246 : //
1247 : // If MigrateTrue is true, then TrueBB is the block being "branched into"
1248 : // and if MigrateFalse is true, then FalseBB is the block being
1249 : // "branched into"
1250 : //
1251 : // Here is the pseudo code for how I think the optimization should work:
1252 : // 1. Insert MOV GPR0, 0 before the branch instruction in diamond_head.
1253 : // 2. Insert MOV GPR0, 1 before the branch instruction in branch_from.
1254 : // 3. Move the branch instruction from diamond_head into its own basic
1255 : // block (new_block).
1256 : // 4. Add an unconditional branch from diamond_head to new_block
1257 : // 5. Replace the branch instruction in branch_from with an unconditional
1258 : // branch to new_block. If branch_from has multiple predecessors, then
1259 : // we need to replace the True/False block in the branch
1260 : // instruction instead of replacing it.
1261 : // 6. Change the condition of the branch instruction in new_block from
1262 : // COND to (COND || GPR0)
1263 : //
1264 : // In order insert these MOV instruction, we will need to use the
1265 : // RegisterScavenger. Usually liveness stops being tracked during
1266 : // the late machine optimization passes, however if we implement
1267 : // bool TargetRegisterInfo::requiresRegisterScavenging(
1268 : // const MachineFunction &MF)
1269 : // and have it return true, liveness will be tracked correctly
1270 : // by generic optimization passes. We will also need to make sure that
1271 : // all of our target-specific passes that run after regalloc and before
1272 : // the CFGStructurizer track liveness and we will need to modify this pass
1273 : // to correctly track liveness.
1274 : //
1275 : // After the above changes, the new CFG should look like this:
1276 : // entry
1277 : // / |
1278 : // diamond_head branch_from
1279 : // \ /
1280 : // new_block
1281 : // / |
1282 : // diamond_false diamond_true
1283 : // \ /
1284 : // done
1285 : //
1286 : // Without this optimization, we are forced to duplicate the diamond_true
1287 : // block and we will end up with a CFG like this:
1288 : //
1289 : // entry
1290 : // / |
1291 : // diamond_head branch_from
1292 : // / \ |
1293 : // diamond_false diamond_true diamond_true (duplicate)
1294 : // \ / |
1295 : // done --------------------|
1296 : //
1297 : // Duplicating diamond_true can be very costly especially if it has a
1298 : // lot of instructions.
1299 0 : return 0;
1300 : }
1301 :
1302 : int NumNewBlk = 0;
1303 :
1304 : bool LandBlkHasOtherPred = (LandBlk->pred_size() > 2);
1305 :
1306 : //insert R600::ENDIF to avoid special case "input landBlk == NULL"
1307 0 : MachineBasicBlock::iterator I = insertInstrBefore(LandBlk, R600::ENDIF);
1308 :
1309 0 : if (LandBlkHasOtherPred) {
1310 0 : report_fatal_error("Extra register needed to handle CFG");
1311 : unsigned CmpResReg =
1312 : HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC);
1313 : report_fatal_error("Extra compare instruction needed to handle CFG");
1314 : insertCondBranchBefore(LandBlk, I, R600::IF_PREDICATE_SET,
1315 : CmpResReg, DebugLoc());
1316 : }
1317 :
1318 : // XXX: We are running this after RA, so creating virtual registers will
1319 : // cause an assertion failure in the PostRA scheduling pass.
1320 : unsigned InitReg =
1321 0 : HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC);
1322 0 : insertCondBranchBefore(LandBlk, I, R600::IF_PREDICATE_SET, InitReg,
1323 0 : DebugLoc());
1324 :
1325 : if (MigrateTrue) {
1326 0 : migrateInstruction(TrueMBB, LandBlk, I);
1327 : // need to uncondionally insert the assignment to ensure a path from its
1328 : // predecessor rather than headBlk has valid value in initReg if
1329 : // (initVal != 1).
1330 0 : report_fatal_error("Extra register needed to handle CFG");
1331 : }
1332 : insertInstrBefore(I, R600::ELSE);
1333 :
1334 : if (MigrateFalse) {
1335 : migrateInstruction(FalseMBB, LandBlk, I);
1336 : // need to uncondionally insert the assignment to ensure a path from its
1337 : // predecessor rather than headBlk has valid value in initReg if
1338 : // (initVal != 0)
1339 : report_fatal_error("Extra register needed to handle CFG");
1340 : }
1341 :
1342 : if (LandBlkHasOtherPred) {
1343 : // add endif
1344 : insertInstrBefore(I, R600::ENDIF);
1345 :
1346 : // put initReg = 2 to other predecessors of landBlk
1347 : for (MachineBasicBlock::pred_iterator PI = LandBlk->pred_begin(),
1348 : PE = LandBlk->pred_end(); PI != PE; ++PI) {
1349 : MachineBasicBlock *MBB = *PI;
1350 : if (MBB != TrueMBB && MBB != FalseMBB)
1351 : report_fatal_error("Extra register needed to handle CFG");
1352 : }
1353 : }
1354 : LLVM_DEBUG(
1355 : dbgs() << "result from improveSimpleJumpintoIf: ";
1356 : showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0););
1357 :
1358 : // update landBlk
1359 : *LandMBBPtr = LandBlk;
1360 :
1361 : return NumNewBlk;
1362 : }
1363 :
1364 80 : void AMDGPUCFGStructurizer::mergeSerialBlock(MachineBasicBlock *DstMBB,
1365 : MachineBasicBlock *SrcMBB) {
1366 : LLVM_DEBUG(dbgs() << "serialPattern BB" << DstMBB->getNumber() << " <= BB"
1367 : << SrcMBB->getNumber() << "\n";);
1368 : DstMBB->splice(DstMBB->end(), SrcMBB, SrcMBB->begin(), SrcMBB->end());
1369 :
1370 80 : DstMBB->removeSuccessor(SrcMBB, true);
1371 : cloneSuccessorList(DstMBB, SrcMBB);
1372 :
1373 80 : removeSuccessor(SrcMBB);
1374 80 : MLI->removeBlock(SrcMBB);
1375 80 : retireBlock(SrcMBB);
1376 80 : }
1377 :
1378 44 : void AMDGPUCFGStructurizer::mergeIfthenelseBlock(MachineInstr *BranchMI,
1379 : MachineBasicBlock *MBB, MachineBasicBlock *TrueMBB,
1380 : MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB) {
1381 : assert (TrueMBB);
1382 : LLVM_DEBUG(dbgs() << "ifPattern BB" << MBB->getNumber(); dbgs() << "{ ";
1383 : if (TrueMBB) { dbgs() << "BB" << TrueMBB->getNumber(); } dbgs()
1384 : << " } else ";
1385 : dbgs() << "{ "; if (FalseMBB) {
1386 : dbgs() << "BB" << FalseMBB->getNumber();
1387 : } dbgs() << " }\n ";
1388 : dbgs() << "landBlock: "; if (!LandMBB) { dbgs() << "NULL"; } else {
1389 : dbgs() << "BB" << LandMBB->getNumber();
1390 : } dbgs() << "\n";);
1391 :
1392 44 : int OldOpcode = BranchMI->getOpcode();
1393 : DebugLoc BranchDL = BranchMI->getDebugLoc();
1394 :
1395 : // transform to
1396 : // if cond
1397 : // trueBlk
1398 : // else
1399 : // falseBlk
1400 : // endif
1401 : // landBlk
1402 :
1403 : MachineBasicBlock::iterator I = BranchMI;
1404 44 : insertCondBranchBefore(I, getBranchNzeroOpcode(OldOpcode),
1405 : BranchDL);
1406 :
1407 44 : if (TrueMBB) {
1408 : MBB->splice(I, TrueMBB, TrueMBB->begin(), TrueMBB->end());
1409 44 : MBB->removeSuccessor(TrueMBB, true);
1410 44 : if (LandMBB && TrueMBB->succ_size()!=0)
1411 44 : TrueMBB->removeSuccessor(LandMBB, true);
1412 44 : retireBlock(TrueMBB);
1413 44 : MLI->removeBlock(TrueMBB);
1414 : }
1415 :
1416 44 : if (FalseMBB) {
1417 3 : insertInstrBefore(I, R600::ELSE);
1418 : MBB->splice(I, FalseMBB, FalseMBB->begin(),
1419 : FalseMBB->end());
1420 3 : MBB->removeSuccessor(FalseMBB, true);
1421 3 : if (LandMBB && FalseMBB->succ_size() != 0)
1422 3 : FalseMBB->removeSuccessor(LandMBB, true);
1423 3 : retireBlock(FalseMBB);
1424 3 : MLI->removeBlock(FalseMBB);
1425 : }
1426 44 : insertInstrBefore(I, R600::ENDIF);
1427 :
1428 44 : BranchMI->eraseFromParent();
1429 :
1430 44 : if (LandMBB && TrueMBB && FalseMBB)
1431 3 : MBB->addSuccessor(LandMBB);
1432 44 : }
1433 :
1434 18 : void AMDGPUCFGStructurizer::mergeLooplandBlock(MachineBasicBlock *DstBlk,
1435 : MachineBasicBlock *LandMBB) {
1436 : LLVM_DEBUG(dbgs() << "loopPattern header = BB" << DstBlk->getNumber()
1437 : << " land = BB" << LandMBB->getNumber() << "\n";);
1438 :
1439 18 : insertInstrBefore(DstBlk, R600::WHILELOOP, DebugLoc());
1440 18 : insertInstrEnd(DstBlk, R600::ENDLOOP, DebugLoc());
1441 18 : DstBlk->replaceSuccessor(DstBlk, LandMBB);
1442 18 : }
1443 :
1444 18 : void AMDGPUCFGStructurizer::mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB,
1445 : MachineBasicBlock *LandMBB) {
1446 : LLVM_DEBUG(dbgs() << "loopbreakPattern exiting = BB"
1447 : << ExitingMBB->getNumber() << " land = BB"
1448 : << LandMBB->getNumber() << "\n";);
1449 18 : MachineInstr *BranchMI = getLoopendBlockBranchInstr(ExitingMBB);
1450 : assert(BranchMI && isCondBranch(BranchMI));
1451 : DebugLoc DL = BranchMI->getDebugLoc();
1452 : MachineBasicBlock *TrueBranch = getTrueBranch(BranchMI);
1453 : MachineBasicBlock::iterator I = BranchMI;
1454 18 : if (TrueBranch != LandMBB)
1455 18 : reversePredicateSetter(I, *I->getParent());
1456 18 : insertCondBranchBefore(ExitingMBB, I, R600::IF_PREDICATE_SET, R600::PREDICATE_BIT, DL);
1457 18 : insertInstrBefore(I, R600::BREAK);
1458 18 : insertInstrBefore(I, R600::ENDIF);
1459 : //now branchInst can be erase safely
1460 18 : BranchMI->eraseFromParent();
1461 : //now take care of successors, retire blocks
1462 18 : ExitingMBB->removeSuccessor(LandMBB, true);
1463 18 : }
1464 :
1465 18 : void AMDGPUCFGStructurizer::settleLoopcontBlock(MachineBasicBlock *ContingMBB,
1466 : MachineBasicBlock *ContMBB) {
1467 : LLVM_DEBUG(dbgs() << "settleLoopcontBlock conting = BB"
1468 : << ContingMBB->getNumber() << ", cont = BB"
1469 : << ContMBB->getNumber() << "\n";);
1470 :
1471 18 : MachineInstr *MI = getLoopendBlockBranchInstr(ContingMBB);
1472 18 : if (MI) {
1473 : assert(isCondBranch(MI));
1474 : MachineBasicBlock::iterator I = MI;
1475 : MachineBasicBlock *TrueBranch = getTrueBranch(MI);
1476 0 : int OldOpcode = MI->getOpcode();
1477 : DebugLoc DL = MI->getDebugLoc();
1478 :
1479 : bool UseContinueLogical = ((&*ContingMBB->rbegin()) == MI);
1480 :
1481 0 : if (!UseContinueLogical) {
1482 : int BranchOpcode =
1483 0 : TrueBranch == ContMBB ? getBranchNzeroOpcode(OldOpcode) :
1484 : getBranchZeroOpcode(OldOpcode);
1485 0 : insertCondBranchBefore(I, BranchOpcode, DL);
1486 : // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
1487 0 : insertInstrEnd(ContingMBB, R600::CONTINUE, DL);
1488 0 : insertInstrEnd(ContingMBB, R600::ENDIF, DL);
1489 : } else {
1490 : int BranchOpcode =
1491 0 : TrueBranch == ContMBB ? getContinueNzeroOpcode(OldOpcode) :
1492 : getContinueZeroOpcode(OldOpcode);
1493 0 : insertCondBranchBefore(I, BranchOpcode, DL);
1494 : }
1495 :
1496 0 : MI->eraseFromParent();
1497 : } else {
1498 : // if we've arrived here then we've already erased the branch instruction
1499 : // travel back up the basic block to see the last reference of our debug
1500 : // location we've just inserted that reference here so it should be
1501 : // representative insertEnd to ensure phi-moves, if exist, go before the
1502 : // continue-instr.
1503 18 : insertInstrEnd(ContingMBB, R600::CONTINUE,
1504 36 : getLastDebugLocInBB(ContingMBB));
1505 : }
1506 18 : }
1507 :
1508 8 : int AMDGPUCFGStructurizer::cloneOnSideEntryTo(MachineBasicBlock *PreMBB,
1509 : MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB) {
1510 : int Cloned = 0;
1511 : assert(PreMBB->isSuccessor(SrcMBB));
1512 18 : while (SrcMBB && SrcMBB != DstMBB) {
1513 : assert(SrcMBB->succ_size() == 1);
1514 10 : if (SrcMBB->pred_size() > 1) {
1515 0 : SrcMBB = cloneBlockForPredecessor(SrcMBB, PreMBB);
1516 0 : ++Cloned;
1517 : }
1518 :
1519 : PreMBB = SrcMBB;
1520 10 : SrcMBB = *SrcMBB->succ_begin();
1521 : }
1522 :
1523 8 : return Cloned;
1524 : }
1525 :
1526 : MachineBasicBlock *
1527 1 : AMDGPUCFGStructurizer::cloneBlockForPredecessor(MachineBasicBlock *MBB,
1528 : MachineBasicBlock *PredMBB) {
1529 : assert(PredMBB->isSuccessor(MBB) &&
1530 : "succBlk is not a prececessor of curBlk");
1531 :
1532 1 : MachineBasicBlock *CloneMBB = clone(MBB); //clone instructions
1533 1 : replaceInstrUseOfBlockWith(PredMBB, MBB, CloneMBB);
1534 : //srcBlk, oldBlk, newBlk
1535 :
1536 1 : PredMBB->replaceSuccessor(MBB, CloneMBB);
1537 :
1538 : // add all successor to cloneBlk
1539 : cloneSuccessorList(CloneMBB, MBB);
1540 :
1541 : numClonedInstr += MBB->size();
1542 :
1543 : LLVM_DEBUG(dbgs() << "Cloned block: "
1544 : << "BB" << MBB->getNumber() << "size " << MBB->size()
1545 : << "\n";);
1546 :
1547 : SHOWNEWBLK(CloneMBB, "result of Cloned block: ");
1548 :
1549 1 : return CloneMBB;
1550 : }
1551 :
1552 0 : void AMDGPUCFGStructurizer::migrateInstruction(MachineBasicBlock *SrcMBB,
1553 : MachineBasicBlock *DstMBB, MachineBasicBlock::iterator I) {
1554 : MachineBasicBlock::iterator SpliceEnd;
1555 : //look for the input branchinstr, not the AMDGPU branchinstr
1556 0 : MachineInstr *BranchMI = getNormalBlockBranchInstr(SrcMBB);
1557 0 : if (!BranchMI) {
1558 : LLVM_DEBUG(dbgs() << "migrateInstruction don't see branch instr\n";);
1559 : SpliceEnd = SrcMBB->end();
1560 : } else {
1561 : LLVM_DEBUG(dbgs() << "migrateInstruction see branch instr: " << *BranchMI);
1562 : SpliceEnd = BranchMI;
1563 : }
1564 : LLVM_DEBUG(dbgs() << "migrateInstruction before splice dstSize = "
1565 : << DstMBB->size() << "srcSize = " << SrcMBB->size()
1566 : << "\n";);
1567 :
1568 : //splice insert before insertPos
1569 : DstMBB->splice(I, SrcMBB, SrcMBB->begin(), SpliceEnd);
1570 :
1571 : LLVM_DEBUG(dbgs() << "migrateInstruction after splice dstSize = "
1572 : << DstMBB->size() << "srcSize = " << SrcMBB->size()
1573 : << '\n';);
1574 0 : }
1575 :
1576 : MachineBasicBlock *
1577 0 : AMDGPUCFGStructurizer::normalizeInfiniteLoopExit(MachineLoop* LoopRep) {
1578 : MachineBasicBlock *LoopHeader = LoopRep->getHeader();
1579 0 : MachineBasicBlock *LoopLatch = LoopRep->getLoopLatch();
1580 :
1581 0 : if (!LoopHeader || !LoopLatch)
1582 : return nullptr;
1583 0 : MachineInstr *BranchMI = getLoopendBlockBranchInstr(LoopLatch);
1584 : // Is LoopRep an infinite loop ?
1585 0 : if (!BranchMI || !isUncondBranch(BranchMI))
1586 : return nullptr;
1587 :
1588 0 : MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock();
1589 0 : FuncRep->push_back(DummyExitBlk); //insert to function
1590 : SHOWNEWBLK(DummyExitBlk, "DummyExitBlock to normalize infiniteLoop: ");
1591 : LLVM_DEBUG(dbgs() << "Old branch instr: " << *BranchMI << "\n";);
1592 0 : LLVMContext &Ctx = LoopHeader->getParent()->getFunction().getContext();
1593 0 : Ctx.emitError("Extra register needed to handle CFG");
1594 0 : return nullptr;
1595 : }
1596 :
1597 2422 : void AMDGPUCFGStructurizer::removeUnconditionalBranch(MachineBasicBlock *MBB) {
1598 : MachineInstr *BranchMI;
1599 :
1600 : // I saw two unconditional branch in one basic block in example
1601 : // test_fc_do_while_or.c need to fix the upstream on this to remove the loop.
1602 2423 : while ((BranchMI = getLoopendBlockBranchInstr(MBB))
1603 2423 : && isUncondBranch(BranchMI)) {
1604 : LLVM_DEBUG(dbgs() << "Removing uncond branch instr: " << *BranchMI);
1605 1 : BranchMI->eraseFromParent();
1606 : }
1607 2422 : }
1608 :
1609 0 : void AMDGPUCFGStructurizer::removeRedundantConditionalBranch(
1610 : MachineBasicBlock *MBB) {
1611 0 : if (MBB->succ_size() != 2)
1612 0 : return;
1613 0 : MachineBasicBlock *MBB1 = *MBB->succ_begin();
1614 0 : MachineBasicBlock *MBB2 = *std::next(MBB->succ_begin());
1615 0 : if (MBB1 != MBB2)
1616 0 : return;
1617 :
1618 0 : MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB);
1619 : assert(BranchMI && isCondBranch(BranchMI));
1620 : LLVM_DEBUG(dbgs() << "Removing unneeded cond branch instr: " << *BranchMI);
1621 0 : BranchMI->eraseFromParent();
1622 : SHOWNEWBLK(MBB1, "Removing redundant successor");
1623 0 : MBB->removeSuccessor(MBB1, true);
1624 : }
1625 :
1626 1 : void AMDGPUCFGStructurizer::addDummyExitBlock(
1627 : SmallVectorImpl<MachineBasicBlock*> &RetMBB) {
1628 1 : MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock();
1629 1 : FuncRep->push_back(DummyExitBlk); //insert to function
1630 2 : insertInstrEnd(DummyExitBlk, R600::RETURN);
1631 :
1632 2 : for (SmallVectorImpl<MachineBasicBlock *>::iterator It = RetMBB.begin(),
1633 3 : E = RetMBB.end(); It != E; ++It) {
1634 2 : MachineBasicBlock *MBB = *It;
1635 2 : MachineInstr *MI = getReturnInstr(MBB);
1636 2 : if (MI)
1637 1 : MI->eraseFromParent();
1638 2 : MBB->addSuccessor(DummyExitBlk);
1639 : LLVM_DEBUG(dbgs() << "Add dummyExitBlock to BB" << MBB->getNumber()
1640 : << " successors\n";);
1641 : }
1642 : SHOWNEWBLK(DummyExitBlk, "DummyExitBlock: ");
1643 1 : }
1644 :
1645 0 : void AMDGPUCFGStructurizer::removeSuccessor(MachineBasicBlock *MBB) {
1646 0 : while (MBB->succ_size())
1647 0 : MBB->removeSuccessor(*MBB->succ_begin());
1648 0 : }
1649 :
1650 2422 : void AMDGPUCFGStructurizer::recordSccnum(MachineBasicBlock *MBB,
1651 : int SccNum) {
1652 2422 : BlockInformation *&srcBlkInfo = BlockInfoMap[MBB];
1653 2422 : if (!srcBlkInfo)
1654 2422 : srcBlkInfo = new BlockInformation();
1655 2422 : srcBlkInfo->SccNum = SccNum;
1656 2422 : }
1657 :
1658 127 : void AMDGPUCFGStructurizer::retireBlock(MachineBasicBlock *MBB) {
1659 : LLVM_DEBUG(dbgs() << "Retiring BB" << MBB->getNumber() << "\n";);
1660 :
1661 127 : BlockInformation *&SrcBlkInfo = BlockInfoMap[MBB];
1662 :
1663 127 : if (!SrcBlkInfo)
1664 2 : SrcBlkInfo = new BlockInformation();
1665 :
1666 127 : SrcBlkInfo->IsRetired = true;
1667 : assert(MBB->succ_size() == 0 && MBB->pred_size() == 0
1668 : && "can't retire block yet");
1669 127 : }
1670 :
1671 282 : INITIALIZE_PASS_BEGIN(AMDGPUCFGStructurizer, "amdgpustructurizer",
1672 : "AMDGPU CFG Structurizer", false, false)
1673 282 : INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
1674 282 : INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
1675 282 : INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
1676 564 : INITIALIZE_PASS_END(AMDGPUCFGStructurizer, "amdgpustructurizer",
1677 : "AMDGPU CFG Structurizer", false, false)
1678 :
1679 282 : FunctionPass *llvm::createAMDGPUCFGStructurizerPass() {
1680 282 : return new AMDGPUCFGStructurizer();
1681 : }
|