LLVM  4.0.0
R600ControlFlowFinalizer.cpp
Go to the documentation of this file.
1 //===-- R600ControlFlowFinalizer.cpp - Finalize Control Flow Inst----------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// This pass compute turns all control flow pseudo instructions into native one
12 /// computing their address on the fly ; it also sets STACK_SIZE info.
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/Support/Debug.h"
16 #include "AMDGPU.h"
17 #include "AMDGPUSubtarget.h"
18 #include "R600Defines.h"
19 #include "R600InstrInfo.h"
21 #include "R600RegisterInfo.h"
26 
27 using namespace llvm;
28 
29 #define DEBUG_TYPE "r600cf"
30 
31 namespace {
32 
33 struct CFStack {
34 
35  enum StackItem {
36  ENTRY = 0,
37  SUB_ENTRY = 1,
38  FIRST_NON_WQM_PUSH = 2,
39  FIRST_NON_WQM_PUSH_W_FULL_ENTRY = 3
40  };
41 
42  const R600Subtarget *ST;
43  std::vector<StackItem> BranchStack;
44  std::vector<StackItem> LoopStack;
45  unsigned MaxStackSize;
46  unsigned CurrentEntries;
47  unsigned CurrentSubEntries;
48 
49  CFStack(const R600Subtarget *st, CallingConv::ID cc) : ST(st),
50  // We need to reserve a stack entry for CALL_FS in vertex shaders.
51  MaxStackSize(cc == CallingConv::AMDGPU_VS ? 1 : 0),
52  CurrentEntries(0), CurrentSubEntries(0) { }
53 
54  unsigned getLoopDepth();
55  bool branchStackContains(CFStack::StackItem);
56  bool requiresWorkAroundForInst(unsigned Opcode);
57  unsigned getSubEntrySize(CFStack::StackItem Item);
58  void updateMaxStackSize();
59  void pushBranch(unsigned Opcode, bool isWQM = false);
60  void pushLoop();
61  void popBranch();
62  void popLoop();
63 };
64 
65 unsigned CFStack::getLoopDepth() {
66  return LoopStack.size();
67 }
68 
69 bool CFStack::branchStackContains(CFStack::StackItem Item) {
70  for (std::vector<CFStack::StackItem>::const_iterator I = BranchStack.begin(),
71  E = BranchStack.end(); I != E; ++I) {
72  if (*I == Item)
73  return true;
74  }
75  return false;
76 }
77 
78 bool CFStack::requiresWorkAroundForInst(unsigned Opcode) {
79  if (Opcode == AMDGPU::CF_ALU_PUSH_BEFORE && ST->hasCaymanISA() &&
80  getLoopDepth() > 1)
81  return true;
82 
83  if (!ST->hasCFAluBug())
84  return false;
85 
86  switch(Opcode) {
87  default: return false;
88  case AMDGPU::CF_ALU_PUSH_BEFORE:
89  case AMDGPU::CF_ALU_ELSE_AFTER:
90  case AMDGPU::CF_ALU_BREAK:
91  case AMDGPU::CF_ALU_CONTINUE:
92  if (CurrentSubEntries == 0)
93  return false;
94  if (ST->getWavefrontSize() == 64) {
95  // We are being conservative here. We only require this work-around if
96  // CurrentSubEntries > 3 &&
97  // (CurrentSubEntries % 4 == 3 || CurrentSubEntries % 4 == 0)
98  //
99  // We have to be conservative, because we don't know for certain that
100  // our stack allocation algorithm for Evergreen/NI is correct. Applying this
101  // work-around when CurrentSubEntries > 3 allows us to over-allocate stack
102  // resources without any problems.
103  return CurrentSubEntries > 3;
104  } else {
105  assert(ST->getWavefrontSize() == 32);
106  // We are being conservative here. We only require the work-around if
107  // CurrentSubEntries > 7 &&
108  // (CurrentSubEntries % 8 == 7 || CurrentSubEntries % 8 == 0)
109  // See the comment on the wavefront size == 64 case for why we are
110  // being conservative.
111  return CurrentSubEntries > 7;
112  }
113  }
114 }
115 
116 unsigned CFStack::getSubEntrySize(CFStack::StackItem Item) {
117  switch(Item) {
118  default:
119  return 0;
120  case CFStack::FIRST_NON_WQM_PUSH:
121  assert(!ST->hasCaymanISA());
122  if (ST->getGeneration() <= R600Subtarget::R700) {
123  // +1 For the push operation.
124  // +2 Extra space required.
125  return 3;
126  } else {
127  // Some documentation says that this is not necessary on Evergreen,
128  // but experimentation has show that we need to allocate 1 extra
129  // sub-entry for the first non-WQM push.
130  // +1 For the push operation.
131  // +1 Extra space required.
132  return 2;
133  }
134  case CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY:
135  assert(ST->getGeneration() >= R600Subtarget::EVERGREEN);
136  // +1 For the push operation.
137  // +1 Extra space required.
138  return 2;
139  case CFStack::SUB_ENTRY:
140  return 1;
141  }
142 }
143 
144 void CFStack::updateMaxStackSize() {
145  unsigned CurrentStackSize =
146  CurrentEntries + (alignTo(CurrentSubEntries, 4) / 4);
147  MaxStackSize = std::max(CurrentStackSize, MaxStackSize);
148 }
149 
150 void CFStack::pushBranch(unsigned Opcode, bool isWQM) {
151  CFStack::StackItem Item = CFStack::ENTRY;
152  switch(Opcode) {
153  case AMDGPU::CF_PUSH_EG:
154  case AMDGPU::CF_ALU_PUSH_BEFORE:
155  if (!isWQM) {
156  if (!ST->hasCaymanISA() &&
157  !branchStackContains(CFStack::FIRST_NON_WQM_PUSH))
158  Item = CFStack::FIRST_NON_WQM_PUSH; // May not be required on Evergreen/NI
159  // See comment in
160  // CFStack::getSubEntrySize()
161  else if (CurrentEntries > 0 &&
162  ST->getGeneration() > R600Subtarget::EVERGREEN &&
163  !ST->hasCaymanISA() &&
164  !branchStackContains(CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY))
165  Item = CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY;
166  else
167  Item = CFStack::SUB_ENTRY;
168  } else
169  Item = CFStack::ENTRY;
170  break;
171  }
172  BranchStack.push_back(Item);
173  if (Item == CFStack::ENTRY)
174  CurrentEntries++;
175  else
176  CurrentSubEntries += getSubEntrySize(Item);
177  updateMaxStackSize();
178 }
179 
180 void CFStack::pushLoop() {
181  LoopStack.push_back(CFStack::ENTRY);
182  CurrentEntries++;
183  updateMaxStackSize();
184 }
185 
186 void CFStack::popBranch() {
187  CFStack::StackItem Top = BranchStack.back();
188  if (Top == CFStack::ENTRY)
189  CurrentEntries--;
190  else
191  CurrentSubEntries-= getSubEntrySize(Top);
192  BranchStack.pop_back();
193 }
194 
195 void CFStack::popLoop() {
196  CurrentEntries--;
197  LoopStack.pop_back();
198 }
199 
200 class R600ControlFlowFinalizer : public MachineFunctionPass {
201 
202 private:
203  typedef std::pair<MachineInstr *, std::vector<MachineInstr *> > ClauseFile;
204 
205  enum ControlFlowInstruction {
206  CF_TC,
207  CF_VC,
208  CF_CALL_FS,
209  CF_WHILE_LOOP,
210  CF_END_LOOP,
211  CF_LOOP_BREAK,
212  CF_LOOP_CONTINUE,
213  CF_JUMP,
214  CF_ELSE,
215  CF_POP,
216  CF_END
217  };
218 
219  static char ID;
220  const R600InstrInfo *TII;
221  const R600RegisterInfo *TRI;
222  unsigned MaxFetchInst;
223  const R600Subtarget *ST;
224 
225  bool IsTrivialInst(MachineInstr &MI) const {
226  switch (MI.getOpcode()) {
227  case AMDGPU::KILL:
228  case AMDGPU::RETURN:
229  return true;
230  default:
231  return false;
232  }
233  }
234 
235  const MCInstrDesc &getHWInstrDesc(ControlFlowInstruction CFI) const {
236  unsigned Opcode = 0;
237  bool isEg = (ST->getGeneration() >= R600Subtarget::EVERGREEN);
238  switch (CFI) {
239  case CF_TC:
240  Opcode = isEg ? AMDGPU::CF_TC_EG : AMDGPU::CF_TC_R600;
241  break;
242  case CF_VC:
243  Opcode = isEg ? AMDGPU::CF_VC_EG : AMDGPU::CF_VC_R600;
244  break;
245  case CF_CALL_FS:
246  Opcode = isEg ? AMDGPU::CF_CALL_FS_EG : AMDGPU::CF_CALL_FS_R600;
247  break;
248  case CF_WHILE_LOOP:
249  Opcode = isEg ? AMDGPU::WHILE_LOOP_EG : AMDGPU::WHILE_LOOP_R600;
250  break;
251  case CF_END_LOOP:
252  Opcode = isEg ? AMDGPU::END_LOOP_EG : AMDGPU::END_LOOP_R600;
253  break;
254  case CF_LOOP_BREAK:
255  Opcode = isEg ? AMDGPU::LOOP_BREAK_EG : AMDGPU::LOOP_BREAK_R600;
256  break;
257  case CF_LOOP_CONTINUE:
258  Opcode = isEg ? AMDGPU::CF_CONTINUE_EG : AMDGPU::CF_CONTINUE_R600;
259  break;
260  case CF_JUMP:
261  Opcode = isEg ? AMDGPU::CF_JUMP_EG : AMDGPU::CF_JUMP_R600;
262  break;
263  case CF_ELSE:
264  Opcode = isEg ? AMDGPU::CF_ELSE_EG : AMDGPU::CF_ELSE_R600;
265  break;
266  case CF_POP:
267  Opcode = isEg ? AMDGPU::POP_EG : AMDGPU::POP_R600;
268  break;
269  case CF_END:
270  if (ST->hasCaymanISA()) {
271  Opcode = AMDGPU::CF_END_CM;
272  break;
273  }
274  Opcode = isEg ? AMDGPU::CF_END_EG : AMDGPU::CF_END_R600;
275  break;
276  }
277  assert (Opcode && "No opcode selected");
278  return TII->get(Opcode);
279  }
280 
281  bool isCompatibleWithClause(const MachineInstr &MI,
282  std::set<unsigned> &DstRegs) const {
283  unsigned DstMI, SrcMI;
285  E = MI.operands_end();
286  I != E; ++I) {
287  const MachineOperand &MO = *I;
288  if (!MO.isReg())
289  continue;
290  if (MO.isDef()) {
291  unsigned Reg = MO.getReg();
292  if (AMDGPU::R600_Reg128RegClass.contains(Reg))
293  DstMI = Reg;
294  else
295  DstMI = TRI->getMatchingSuperReg(Reg,
296  TRI->getSubRegFromChannel(TRI->getHWRegChan(Reg)),
297  &AMDGPU::R600_Reg128RegClass);
298  }
299  if (MO.isUse()) {
300  unsigned Reg = MO.getReg();
301  if (AMDGPU::R600_Reg128RegClass.contains(Reg))
302  SrcMI = Reg;
303  else
304  SrcMI = TRI->getMatchingSuperReg(Reg,
305  TRI->getSubRegFromChannel(TRI->getHWRegChan(Reg)),
306  &AMDGPU::R600_Reg128RegClass);
307  }
308  }
309  if ((DstRegs.find(SrcMI) == DstRegs.end())) {
310  DstRegs.insert(DstMI);
311  return true;
312  } else
313  return false;
314  }
315 
316  ClauseFile
318  const {
319  MachineBasicBlock::iterator ClauseHead = I;
320  std::vector<MachineInstr *> ClauseContent;
321  unsigned AluInstCount = 0;
322  bool IsTex = TII->usesTextureCache(*ClauseHead);
323  std::set<unsigned> DstRegs;
324  for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) {
325  if (IsTrivialInst(*I))
326  continue;
327  if (AluInstCount >= MaxFetchInst)
328  break;
329  if ((IsTex && !TII->usesTextureCache(*I)) ||
330  (!IsTex && !TII->usesVertexCache(*I)))
331  break;
332  if (!isCompatibleWithClause(*I, DstRegs))
333  break;
334  AluInstCount ++;
335  ClauseContent.push_back(&*I);
336  }
337  MachineInstr *MIb = BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead),
338  getHWInstrDesc(IsTex?CF_TC:CF_VC))
339  .addImm(0) // ADDR
340  .addImm(AluInstCount - 1); // COUNT
341  return ClauseFile(MIb, std::move(ClauseContent));
342  }
343 
344  void getLiteral(MachineInstr &MI, std::vector<MachineOperand *> &Lits) const {
345  static const unsigned LiteralRegs[] = {
346  AMDGPU::ALU_LITERAL_X,
347  AMDGPU::ALU_LITERAL_Y,
348  AMDGPU::ALU_LITERAL_Z,
349  AMDGPU::ALU_LITERAL_W
350  };
352  TII->getSrcs(MI);
353  for (const auto &Src:Srcs) {
354  if (Src.first->getReg() != AMDGPU::ALU_LITERAL_X)
355  continue;
356  int64_t Imm = Src.second;
357  std::vector<MachineOperand *>::iterator It =
358  find_if(Lits, [&](MachineOperand *val) {
359  return val->isImm() && (val->getImm() == Imm);
360  });
361 
362  // Get corresponding Operand
363  MachineOperand &Operand = MI.getOperand(
364  TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::literal));
365 
366  if (It != Lits.end()) {
367  // Reuse existing literal reg
368  unsigned Index = It - Lits.begin();
369  Src.first->setReg(LiteralRegs[Index]);
370  } else {
371  // Allocate new literal reg
372  assert(Lits.size() < 4 && "Too many literals in Instruction Group");
373  Src.first->setReg(LiteralRegs[Lits.size()]);
374  Lits.push_back(&Operand);
375  }
376  }
377  }
378 
379  MachineBasicBlock::iterator insertLiterals(
380  MachineBasicBlock::iterator InsertPos,
381  const std::vector<unsigned> &Literals) const {
382  MachineBasicBlock *MBB = InsertPos->getParent();
383  for (unsigned i = 0, e = Literals.size(); i < e; i+=2) {
384  unsigned LiteralPair0 = Literals[i];
385  unsigned LiteralPair1 = (i + 1 < e)?Literals[i + 1]:0;
386  InsertPos = BuildMI(MBB, InsertPos->getDebugLoc(),
387  TII->get(AMDGPU::LITERALS))
388  .addImm(LiteralPair0)
389  .addImm(LiteralPair1);
390  }
391  return InsertPos;
392  }
393 
394  ClauseFile
395  MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I)
396  const {
397  MachineInstr &ClauseHead = *I;
398  std::vector<MachineInstr *> ClauseContent;
399  I++;
400  for (MachineBasicBlock::instr_iterator E = MBB.instr_end(); I != E;) {
401  if (IsTrivialInst(*I)) {
402  ++I;
403  continue;
404  }
405  if (!I->isBundle() && !TII->isALUInstr(I->getOpcode()))
406  break;
407  std::vector<MachineOperand *>Literals;
408  if (I->isBundle()) {
409  MachineInstr &DeleteMI = *I;
411  while (++BI != E && BI->isBundledWithPred()) {
412  BI->unbundleFromPred();
413  for (MachineOperand &MO : BI->operands()) {
414  if (MO.isReg() && MO.isInternalRead())
415  MO.setIsInternalRead(false);
416  }
417  getLiteral(*BI, Literals);
418  ClauseContent.push_back(&*BI);
419  }
420  I = BI;
421  DeleteMI.eraseFromParent();
422  } else {
423  getLiteral(*I, Literals);
424  ClauseContent.push_back(&*I);
425  I++;
426  }
427  for (unsigned i = 0, e = Literals.size(); i < e; i += 2) {
428  MachineInstrBuilder MILit = BuildMI(MBB, I, I->getDebugLoc(),
429  TII->get(AMDGPU::LITERALS));
430  if (Literals[i]->isImm()) {
431  MILit.addImm(Literals[i]->getImm());
432  } else {
433  MILit.addGlobalAddress(Literals[i]->getGlobal(),
434  Literals[i]->getOffset());
435  }
436  if (i + 1 < e) {
437  if (Literals[i + 1]->isImm()) {
438  MILit.addImm(Literals[i + 1]->getImm());
439  } else {
440  MILit.addGlobalAddress(Literals[i + 1]->getGlobal(),
441  Literals[i + 1]->getOffset());
442  }
443  } else
444  MILit.addImm(0);
445  ClauseContent.push_back(MILit);
446  }
447  }
448  assert(ClauseContent.size() < 128 && "ALU clause is too big");
449  ClauseHead.getOperand(7).setImm(ClauseContent.size() - 1);
450  return ClauseFile(&ClauseHead, std::move(ClauseContent));
451  }
452 
453  void EmitFetchClause(MachineBasicBlock::iterator InsertPos,
454  const DebugLoc &DL, ClauseFile &Clause,
455  unsigned &CfCount) {
456  CounterPropagateAddr(*Clause.first, CfCount);
457  MachineBasicBlock *BB = Clause.first->getParent();
458  BuildMI(BB, DL, TII->get(AMDGPU::FETCH_CLAUSE)).addImm(CfCount);
459  for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) {
460  BB->splice(InsertPos, BB, Clause.second[i]);
461  }
462  CfCount += 2 * Clause.second.size();
463  }
464 
465  void EmitALUClause(MachineBasicBlock::iterator InsertPos, const DebugLoc &DL,
466  ClauseFile &Clause, unsigned &CfCount) {
467  Clause.first->getOperand(0).setImm(0);
468  CounterPropagateAddr(*Clause.first, CfCount);
469  MachineBasicBlock *BB = Clause.first->getParent();
470  BuildMI(BB, DL, TII->get(AMDGPU::ALU_CLAUSE)).addImm(CfCount);
471  for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) {
472  BB->splice(InsertPos, BB, Clause.second[i]);
473  }
474  CfCount += Clause.second.size();
475  }
476 
477  void CounterPropagateAddr(MachineInstr &MI, unsigned Addr) const {
478  MI.getOperand(0).setImm(Addr + MI.getOperand(0).getImm());
479  }
480  void CounterPropagateAddr(const std::set<MachineInstr *> &MIs,
481  unsigned Addr) const {
482  for (MachineInstr *MI : MIs) {
483  CounterPropagateAddr(*MI, Addr);
484  }
485  }
486 
487 public:
488  R600ControlFlowFinalizer(TargetMachine &tm)
489  : MachineFunctionPass(ID), TII(nullptr), TRI(nullptr), ST(nullptr) {}
490 
491  bool runOnMachineFunction(MachineFunction &MF) override {
492  ST = &MF.getSubtarget<R600Subtarget>();
493  MaxFetchInst = ST->getTexVTXClauseSize();
494  TII = ST->getInstrInfo();
495  TRI = ST->getRegisterInfo();
496 
498 
499  CFStack CFStack(ST, MF.getFunction()->getCallingConv());
500  for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME;
501  ++MB) {
502  MachineBasicBlock &MBB = *MB;
503  unsigned CfCount = 0;
504  std::vector<std::pair<unsigned, std::set<MachineInstr *> > > LoopStack;
505  std::vector<MachineInstr * > IfThenElseStack;
507  BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()),
508  getHWInstrDesc(CF_CALL_FS));
509  CfCount++;
510  }
511  std::vector<ClauseFile> FetchClauses, AluClauses;
512  std::vector<MachineInstr *> LastAlu(1);
513  std::vector<MachineInstr *> ToPopAfter;
514 
515  for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
516  I != E;) {
517  if (TII->usesTextureCache(*I) || TII->usesVertexCache(*I)) {
518  DEBUG(dbgs() << CfCount << ":"; I->dump(););
519  FetchClauses.push_back(MakeFetchClause(MBB, I));
520  CfCount++;
521  LastAlu.back() = nullptr;
522  continue;
523  }
524 
526  if (MI->getOpcode() != AMDGPU::ENDIF)
527  LastAlu.back() = nullptr;
528  if (MI->getOpcode() == AMDGPU::CF_ALU)
529  LastAlu.back() = &*MI;
530  I++;
531  bool RequiresWorkAround =
532  CFStack.requiresWorkAroundForInst(MI->getOpcode());
533  switch (MI->getOpcode()) {
534  case AMDGPU::CF_ALU_PUSH_BEFORE:
535  if (RequiresWorkAround) {
536  DEBUG(dbgs() << "Applying bug work-around for ALU_PUSH_BEFORE\n");
537  BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::CF_PUSH_EG))
538  .addImm(CfCount + 1)
539  .addImm(1);
540  MI->setDesc(TII->get(AMDGPU::CF_ALU));
541  CfCount++;
542  CFStack.pushBranch(AMDGPU::CF_PUSH_EG);
543  } else
544  CFStack.pushBranch(AMDGPU::CF_ALU_PUSH_BEFORE);
545 
546  case AMDGPU::CF_ALU:
547  I = MI;
548  AluClauses.push_back(MakeALUClause(MBB, I));
549  DEBUG(dbgs() << CfCount << ":"; MI->dump(););
550  CfCount++;
551  break;
552  case AMDGPU::WHILELOOP: {
553  CFStack.pushLoop();
554  MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
555  getHWInstrDesc(CF_WHILE_LOOP))
556  .addImm(1);
557  std::pair<unsigned, std::set<MachineInstr *> > Pair(CfCount,
558  std::set<MachineInstr *>());
559  Pair.second.insert(MIb);
560  LoopStack.push_back(std::move(Pair));
561  MI->eraseFromParent();
562  CfCount++;
563  break;
564  }
565  case AMDGPU::ENDLOOP: {
566  CFStack.popLoop();
567  std::pair<unsigned, std::set<MachineInstr *> > Pair =
568  std::move(LoopStack.back());
569  LoopStack.pop_back();
570  CounterPropagateAddr(Pair.second, CfCount);
571  BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END_LOOP))
572  .addImm(Pair.first + 1);
573  MI->eraseFromParent();
574  CfCount++;
575  break;
576  }
577  case AMDGPU::IF_PREDICATE_SET: {
578  LastAlu.push_back(nullptr);
579  MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
580  getHWInstrDesc(CF_JUMP))
581  .addImm(0)
582  .addImm(0);
583  IfThenElseStack.push_back(MIb);
584  DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
585  MI->eraseFromParent();
586  CfCount++;
587  break;
588  }
589  case AMDGPU::ELSE: {
590  MachineInstr * JumpInst = IfThenElseStack.back();
591  IfThenElseStack.pop_back();
592  CounterPropagateAddr(*JumpInst, CfCount);
593  MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
594  getHWInstrDesc(CF_ELSE))
595  .addImm(0)
596  .addImm(0);
597  DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
598  IfThenElseStack.push_back(MIb);
599  MI->eraseFromParent();
600  CfCount++;
601  break;
602  }
603  case AMDGPU::ENDIF: {
604  CFStack.popBranch();
605  if (LastAlu.back()) {
606  ToPopAfter.push_back(LastAlu.back());
607  } else {
608  MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
609  getHWInstrDesc(CF_POP))
610  .addImm(CfCount + 1)
611  .addImm(1);
612  (void)MIb;
613  DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
614  CfCount++;
615  }
616 
617  MachineInstr *IfOrElseInst = IfThenElseStack.back();
618  IfThenElseStack.pop_back();
619  CounterPropagateAddr(*IfOrElseInst, CfCount);
620  IfOrElseInst->getOperand(1).setImm(1);
621  LastAlu.pop_back();
622  MI->eraseFromParent();
623  break;
624  }
625  case AMDGPU::BREAK: {
626  CfCount ++;
627  MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
628  getHWInstrDesc(CF_LOOP_BREAK))
629  .addImm(0);
630  LoopStack.back().second.insert(MIb);
631  MI->eraseFromParent();
632  break;
633  }
634  case AMDGPU::CONTINUE: {
635  MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
636  getHWInstrDesc(CF_LOOP_CONTINUE))
637  .addImm(0);
638  LoopStack.back().second.insert(MIb);
639  MI->eraseFromParent();
640  CfCount++;
641  break;
642  }
643  case AMDGPU::RETURN: {
644  DebugLoc DL = MBB.findDebugLoc(MI);
645  BuildMI(MBB, MI, DL, getHWInstrDesc(CF_END));
646  CfCount++;
647  if (CfCount % 2) {
648  BuildMI(MBB, I, DL, TII->get(AMDGPU::PAD));
649  CfCount++;
650  }
651  MI->eraseFromParent();
652  for (unsigned i = 0, e = FetchClauses.size(); i < e; i++)
653  EmitFetchClause(I, DL, FetchClauses[i], CfCount);
654  for (unsigned i = 0, e = AluClauses.size(); i < e; i++)
655  EmitALUClause(I, DL, AluClauses[i], CfCount);
656  break;
657  }
658  default:
659  if (TII->isExport(MI->getOpcode())) {
660  DEBUG(dbgs() << CfCount << ":"; MI->dump(););
661  CfCount++;
662  }
663  break;
664  }
665  }
666  for (unsigned i = 0, e = ToPopAfter.size(); i < e; ++i) {
667  MachineInstr *Alu = ToPopAfter[i];
669  TII->get(AMDGPU::CF_ALU_POP_AFTER))
670  .addImm(Alu->getOperand(0).getImm())
671  .addImm(Alu->getOperand(1).getImm())
672  .addImm(Alu->getOperand(2).getImm())
673  .addImm(Alu->getOperand(3).getImm())
674  .addImm(Alu->getOperand(4).getImm())
675  .addImm(Alu->getOperand(5).getImm())
676  .addImm(Alu->getOperand(6).getImm())
677  .addImm(Alu->getOperand(7).getImm())
678  .addImm(Alu->getOperand(8).getImm());
679  Alu->eraseFromParent();
680  }
681  MFI->CFStackSize = CFStack.MaxStackSize;
682  }
683 
684  return false;
685  }
686 
687  StringRef getPassName() const override {
688  return "R600 Control Flow Finalizer Pass";
689  }
690 };
691 
693 
694 } // end anonymous namespace
695 
696 
698  return new R600ControlFlowFinalizer(TM);
699 }
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
mop_iterator operands_end()
Definition: MachineInstr.h:296
AMDGPU specific subclass of TargetSubtarget.
instr_iterator instr_end()
size_t i
Interface definition for R600InstrInfo.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:163
Calling convention used for Mesa vertex shaders.
Definition: CallingConv.h:182
A debug info location.
Definition: DebugLoc.h:34
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the next integer (mod 2**64) that is greater than or equal to Value and is a multiple of Alig...
Definition: MathExtras.h:664
Interface definition for R600RegisterInfo.
return AArch64::GPR64RegClass contains(Reg)
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:165
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
Reg
All possible values of the reg field in the ModR/M byte.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
#define ENTRY(ASMNAME, ENUM)
MachineBasicBlock * MBB
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
int64_t getImm() const
FunctionPass * createR600ControlFlowFinalizer(TargetMachine &tm)
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:273
static GCRegistry::Add< CoreCLRGC > E("coreclr","CoreCLR-compatible GC")
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:279
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
void setImm(int64_t immVal)
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any DBG_VALUE instructions.
Iterator for intrusive lists based on ilist_node.
MachineOperand class - Representation of each machine instruction operand.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:843
void dump(const TargetInstrInfo *TII=nullptr) const
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned char TargetFlags=0) const
Representation of each machine instruction.
Definition: MachineInstr.h:52
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
void setReg(unsigned Reg)
Change the register this operand corresponds to.
#define I(x, y, z)
Definition: MD5.cpp:54
unsigned getReg() const
getReg - Returns the register number.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
mop_iterator operands_begin()
Definition: MachineInstr.h:295
#define DEBUG(X)
Definition: Debug.h:100
Primary interface to the complete machine description for the target machine.
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:47
auto find_if(R &&Range, UnaryPredicate P) -> decltype(std::begin(Range))
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:764