Line data Source code
1 : //===- R600ControlFlowFinalizer.cpp - Finalize Control Flow Inst ----------===//
2 : //
3 : // The LLVM Compiler Infrastructure
4 : //
5 : // This file is distributed under the University of Illinois Open Source
6 : // License. See LICENSE.TXT for details.
7 : //
8 : //===----------------------------------------------------------------------===//
9 : //
10 : /// \file
11 : /// This pass compute turns all control flow pseudo instructions into native one
12 : /// computing their address on the fly; it also sets STACK_SIZE info.
13 : //
14 : //===----------------------------------------------------------------------===//
15 :
16 : #include "AMDGPU.h"
17 : #include "AMDGPUSubtarget.h"
18 : #include "R600Defines.h"
19 : #include "R600InstrInfo.h"
20 : #include "R600MachineFunctionInfo.h"
21 : #include "R600RegisterInfo.h"
22 : #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
23 : #include "llvm/ADT/STLExtras.h"
24 : #include "llvm/ADT/SmallVector.h"
25 : #include "llvm/ADT/StringRef.h"
26 : #include "llvm/CodeGen/MachineBasicBlock.h"
27 : #include "llvm/CodeGen/MachineFunction.h"
28 : #include "llvm/CodeGen/MachineFunctionPass.h"
29 : #include "llvm/CodeGen/MachineInstr.h"
30 : #include "llvm/CodeGen/MachineInstrBuilder.h"
31 : #include "llvm/CodeGen/MachineOperand.h"
32 : #include "llvm/IR/CallingConv.h"
33 : #include "llvm/IR/DebugLoc.h"
34 : #include "llvm/IR/Function.h"
35 : #include "llvm/Pass.h"
36 : #include "llvm/Support/Compiler.h"
37 : #include "llvm/Support/Debug.h"
38 : #include "llvm/Support/MathExtras.h"
39 : #include "llvm/Support/raw_ostream.h"
40 : #include <algorithm>
41 : #include <cassert>
42 : #include <cstdint>
43 : #include <set>
44 : #include <utility>
45 : #include <vector>
46 :
47 : using namespace llvm;
48 :
49 : #define DEBUG_TYPE "r600cf"
50 :
51 : namespace {
52 :
53 : struct CFStack {
54 : enum StackItem {
55 : ENTRY = 0,
56 : SUB_ENTRY = 1,
57 : FIRST_NON_WQM_PUSH = 2,
58 : FIRST_NON_WQM_PUSH_W_FULL_ENTRY = 3
59 : };
60 :
61 : const R600Subtarget *ST;
62 : std::vector<StackItem> BranchStack;
63 : std::vector<StackItem> LoopStack;
64 : unsigned MaxStackSize;
65 : unsigned CurrentEntries = 0;
66 : unsigned CurrentSubEntries = 0;
67 :
68 2297 : CFStack(const R600Subtarget *st, CallingConv::ID cc) : ST(st),
69 : // We need to reserve a stack entry for CALL_FS in vertex shaders.
70 4579 : MaxStackSize(cc == CallingConv::AMDGPU_VS ? 1 : 0) {}
71 :
72 : unsigned getLoopDepth();
73 : bool branchStackContains(CFStack::StackItem);
74 : bool requiresWorkAroundForInst(unsigned Opcode);
75 : unsigned getSubEntrySize(CFStack::StackItem Item);
76 : void updateMaxStackSize();
77 : void pushBranch(unsigned Opcode, bool isWQM = false);
78 : void pushLoop();
79 : void popBranch();
80 : void popLoop();
81 : };
82 :
83 : unsigned CFStack::getLoopDepth() {
84 8 : return LoopStack.size();
85 : }
86 :
87 : bool CFStack::branchStackContains(CFStack::StackItem Item) {
88 : for (std::vector<CFStack::StackItem>::const_iterator I = BranchStack.begin(),
89 54 : E = BranchStack.end(); I != E; ++I) {
90 10 : if (*I == Item)
91 : return true;
92 : }
93 : return false;
94 : }
95 :
96 9393 : bool CFStack::requiresWorkAroundForInst(unsigned Opcode) {
97 9393 : if (Opcode == R600::CF_ALU_PUSH_BEFORE && ST->hasCaymanISA() &&
98 : getLoopDepth() > 1)
99 : return true;
100 :
101 9392 : if (!ST->hasCFAluBug())
102 : return false;
103 :
104 : switch(Opcode) {
105 : default: return false;
106 45 : case R600::CF_ALU_PUSH_BEFORE:
107 : case R600::CF_ALU_ELSE_AFTER:
108 : case R600::CF_ALU_BREAK:
109 : case R600::CF_ALU_CONTINUE:
110 45 : if (CurrentSubEntries == 0)
111 : return false;
112 10 : if (ST->getWavefrontSize() == 64) {
113 : // We are being conservative here. We only require this work-around if
114 : // CurrentSubEntries > 3 &&
115 : // (CurrentSubEntries % 4 == 3 || CurrentSubEntries % 4 == 0)
116 : //
117 : // We have to be conservative, because we don't know for certain that
118 : // our stack allocation algorithm for Evergreen/NI is correct. Applying this
119 : // work-around when CurrentSubEntries > 3 allows us to over-allocate stack
120 : // resources without any problems.
121 8 : return CurrentSubEntries > 3;
122 : } else {
123 : assert(ST->getWavefrontSize() == 32);
124 : // We are being conservative here. We only require the work-around if
125 : // CurrentSubEntries > 7 &&
126 : // (CurrentSubEntries % 8 == 7 || CurrentSubEntries % 8 == 0)
127 : // See the comment on the wavefront size == 64 case for why we are
128 : // being conservative.
129 2 : return CurrentSubEntries > 7;
130 : }
131 : }
132 : }
133 :
134 0 : unsigned CFStack::getSubEntrySize(CFStack::StackItem Item) {
135 124 : switch(Item) {
136 : default:
137 : return 0;
138 88 : case CFStack::FIRST_NON_WQM_PUSH:
139 : assert(!ST->hasCaymanISA());
140 88 : if (ST->getGeneration() <= AMDGPUSubtarget::R700) {
141 : // +1 For the push operation.
142 : // +2 Extra space required.
143 0 : return 3;
144 : } else {
145 : // Some documentation says that this is not necessary on Evergreen,
146 : // but experimentation has show that we need to allocate 1 extra
147 : // sub-entry for the first non-WQM push.
148 : // +1 For the push operation.
149 : // +1 Extra space required.
150 0 : return 2;
151 : }
152 0 : case CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY:
153 : assert(ST->getGeneration() >= AMDGPUSubtarget::EVERGREEN);
154 : // +1 For the push operation.
155 : // +1 Extra space required.
156 0 : return 2;
157 36 : case CFStack::SUB_ENTRY:
158 0 : return 1;
159 : }
160 : }
161 :
162 : void CFStack::updateMaxStackSize() {
163 : unsigned CurrentStackSize =
164 62 : CurrentEntries + (alignTo(CurrentSubEntries, 4) / 4);
165 80 : MaxStackSize = std::max(CurrentStackSize, MaxStackSize);
166 : }
167 :
168 62 : void CFStack::pushBranch(unsigned Opcode, bool isWQM) {
169 62 : CFStack::StackItem Item = CFStack::ENTRY;
170 62 : switch(Opcode) {
171 62 : case R600::CF_PUSH_EG:
172 : case R600::CF_ALU_PUSH_BEFORE:
173 62 : if (!isWQM) {
174 116 : if (!ST->hasCaymanISA() &&
175 : !branchStackContains(CFStack::FIRST_NON_WQM_PUSH))
176 44 : Item = CFStack::FIRST_NON_WQM_PUSH; // May not be required on Evergreen/NI
177 : // See comment in
178 : // CFStack::getSubEntrySize()
179 31 : else if (CurrentEntries > 0 &&
180 13 : ST->getGeneration() > AMDGPUSubtarget::EVERGREEN &&
181 18 : !ST->hasCaymanISA() &&
182 : !branchStackContains(CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY))
183 0 : Item = CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY;
184 : else
185 18 : Item = CFStack::SUB_ENTRY;
186 : } else
187 : Item = CFStack::ENTRY;
188 : break;
189 : }
190 62 : BranchStack.push_back(Item);
191 62 : if (Item == CFStack::ENTRY)
192 0 : CurrentEntries++;
193 : else
194 62 : CurrentSubEntries += getSubEntrySize(Item);
195 : updateMaxStackSize();
196 62 : }
197 :
198 18 : void CFStack::pushLoop() {
199 18 : LoopStack.push_back(CFStack::ENTRY);
200 18 : CurrentEntries++;
201 : updateMaxStackSize();
202 18 : }
203 :
204 62 : void CFStack::popBranch() {
205 62 : CFStack::StackItem Top = BranchStack.back();
206 62 : if (Top == CFStack::ENTRY)
207 0 : CurrentEntries--;
208 : else
209 62 : CurrentSubEntries-= getSubEntrySize(Top);
210 : BranchStack.pop_back();
211 62 : }
212 :
213 : void CFStack::popLoop() {
214 18 : CurrentEntries--;
215 : LoopStack.pop_back();
216 : }
217 :
218 : class R600ControlFlowFinalizer : public MachineFunctionPass {
219 : private:
220 : using ClauseFile = std::pair<MachineInstr *, std::vector<MachineInstr *>>;
221 :
222 : enum ControlFlowInstruction {
223 : CF_TC,
224 : CF_VC,
225 : CF_CALL_FS,
226 : CF_WHILE_LOOP,
227 : CF_END_LOOP,
228 : CF_LOOP_BREAK,
229 : CF_LOOP_CONTINUE,
230 : CF_JUMP,
231 : CF_ELSE,
232 : CF_POP,
233 : CF_END
234 : };
235 :
236 : const R600InstrInfo *TII = nullptr;
237 : const R600RegisterInfo *TRI = nullptr;
238 : unsigned MaxFetchInst;
239 : const R600Subtarget *ST = nullptr;
240 :
241 0 : bool IsTrivialInst(MachineInstr &MI) const {
242 67184 : switch (MI.getOpcode()) {
243 : case R600::KILL:
244 : case R600::RETURN:
245 : return true;
246 0 : default:
247 0 : return false;
248 : }
249 : }
250 :
251 0 : const MCInstrDesc &getHWInstrDesc(ControlFlowInstruction CFI) const {
252 : unsigned Opcode = 0;
253 2180 : bool isEg = (ST->getGeneration() >= AMDGPUSubtarget::EVERGREEN);
254 0 : switch (CFI) {
255 0 : case CF_TC:
256 0 : Opcode = isEg ? R600::CF_TC_EG : R600::CF_TC_R600;
257 : break;
258 0 : case CF_VC:
259 0 : Opcode = isEg ? R600::CF_VC_EG : R600::CF_VC_R600;
260 : break;
261 0 : case CF_CALL_FS:
262 0 : Opcode = isEg ? R600::CF_CALL_FS_EG : R600::CF_CALL_FS_R600;
263 : break;
264 0 : case CF_WHILE_LOOP:
265 0 : Opcode = isEg ? R600::WHILE_LOOP_EG : R600::WHILE_LOOP_R600;
266 : break;
267 0 : case CF_END_LOOP:
268 0 : Opcode = isEg ? R600::END_LOOP_EG : R600::END_LOOP_R600;
269 : break;
270 0 : case CF_LOOP_BREAK:
271 0 : Opcode = isEg ? R600::LOOP_BREAK_EG : R600::LOOP_BREAK_R600;
272 : break;
273 0 : case CF_LOOP_CONTINUE:
274 0 : Opcode = isEg ? R600::CF_CONTINUE_EG : R600::CF_CONTINUE_R600;
275 : break;
276 0 : case CF_JUMP:
277 0 : Opcode = isEg ? R600::CF_JUMP_EG : R600::CF_JUMP_R600;
278 : break;
279 0 : case CF_ELSE:
280 0 : Opcode = isEg ? R600::CF_ELSE_EG : R600::CF_ELSE_R600;
281 : break;
282 0 : case CF_POP:
283 0 : Opcode = isEg ? R600::POP_EG : R600::POP_R600;
284 : break;
285 0 : case CF_END:
286 0 : if (ST->hasCaymanISA()) {
287 : Opcode = R600::CF_END_CM;
288 : break;
289 : }
290 1704 : Opcode = isEg ? R600::CF_END_EG : R600::CF_END_R600;
291 : break;
292 : }
293 : assert (Opcode && "No opcode selected");
294 2180 : return TII->get(Opcode);
295 : }
296 :
297 0 : bool isCompatibleWithClause(const MachineInstr &MI,
298 : std::set<unsigned> &DstRegs) const {
299 : unsigned DstMI, SrcMI;
300 0 : for (MachineInstr::const_mop_iterator I = MI.operands_begin(),
301 0 : E = MI.operands_end();
302 0 : I != E; ++I) {
303 : const MachineOperand &MO = *I;
304 0 : if (!MO.isReg())
305 0 : continue;
306 0 : if (MO.isDef()) {
307 0 : unsigned Reg = MO.getReg();
308 0 : if (R600::R600_Reg128RegClass.contains(Reg))
309 0 : DstMI = Reg;
310 : else
311 0 : DstMI = TRI->getMatchingSuperReg(Reg,
312 : AMDGPURegisterInfo::getSubRegFromChannel(TRI->getHWRegChan(Reg)),
313 : &R600::R600_Reg128RegClass);
314 : }
315 0 : if (MO.isUse()) {
316 0 : unsigned Reg = MO.getReg();
317 0 : if (R600::R600_Reg128RegClass.contains(Reg))
318 : SrcMI = Reg;
319 : else
320 0 : SrcMI = TRI->getMatchingSuperReg(Reg,
321 : AMDGPURegisterInfo::getSubRegFromChannel(TRI->getHWRegChan(Reg)),
322 : &R600::R600_Reg128RegClass);
323 : }
324 : }
325 0 : if ((DstRegs.find(SrcMI) == DstRegs.end())) {
326 : DstRegs.insert(DstMI);
327 0 : return true;
328 : } else
329 0 : return false;
330 : }
331 :
332 : ClauseFile
333 1317 : MakeFetchClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I)
334 : const {
335 1317 : MachineBasicBlock::iterator ClauseHead = I;
336 : std::vector<MachineInstr *> ClauseContent;
337 : unsigned AluInstCount = 0;
338 1317 : bool IsTex = TII->usesTextureCache(*ClauseHead);
339 : std::set<unsigned> DstRegs;
340 3300 : for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) {
341 : if (IsTrivialInst(*I))
342 : continue;
343 3300 : if (AluInstCount >= MaxFetchInst)
344 : break;
345 3300 : if ((IsTex && !TII->usesTextureCache(*I)) ||
346 0 : (!IsTex && !TII->usesVertexCache(*I)))
347 : break;
348 2000 : if (!isCompatibleWithClause(*I, DstRegs))
349 : break;
350 1983 : AluInstCount ++;
351 1983 : ClauseContent.push_back(&*I);
352 : }
353 1317 : MachineInstr *MIb = BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead),
354 1317 : getHWInstrDesc(IsTex?CF_TC:CF_VC))
355 : .addImm(0) // ADDR
356 1317 : .addImm(AluInstCount - 1); // COUNT
357 1317 : return ClauseFile(MIb, std::move(ClauseContent));
358 : }
359 :
360 0 : void getLiteral(MachineInstr &MI, std::vector<MachineOperand *> &Lits) const {
361 : static const unsigned LiteralRegs[] = {
362 : R600::ALU_LITERAL_X,
363 : R600::ALU_LITERAL_Y,
364 : R600::ALU_LITERAL_Z,
365 : R600::ALU_LITERAL_W
366 : };
367 : const SmallVector<std::pair<MachineOperand *, int64_t>, 3> Srcs =
368 0 : TII->getSrcs(MI);
369 0 : for (const auto &Src:Srcs) {
370 0 : if (Src.first->getReg() != R600::ALU_LITERAL_X)
371 0 : continue;
372 0 : int64_t Imm = Src.second;
373 : std::vector<MachineOperand *>::iterator It =
374 : llvm::find_if(Lits, [&](MachineOperand *val) {
375 0 : return val->isImm() && (val->getImm() == Imm);
376 : });
377 :
378 : // Get corresponding Operand
379 : MachineOperand &Operand = MI.getOperand(
380 0 : TII->getOperandIdx(MI.getOpcode(), R600::OpName::literal));
381 :
382 0 : if (It != Lits.end()) {
383 : // Reuse existing literal reg
384 0 : unsigned Index = It - Lits.begin();
385 0 : Src.first->setReg(LiteralRegs[Index]);
386 : } else {
387 : // Allocate new literal reg
388 : assert(Lits.size() < 4 && "Too many literals in Instruction Group");
389 0 : Src.first->setReg(LiteralRegs[Lits.size()]);
390 0 : Lits.push_back(&Operand);
391 : }
392 : }
393 0 : }
394 :
395 : MachineBasicBlock::iterator insertLiterals(
396 : MachineBasicBlock::iterator InsertPos,
397 : const std::vector<unsigned> &Literals) const {
398 : MachineBasicBlock *MBB = InsertPos->getParent();
399 : for (unsigned i = 0, e = Literals.size(); i < e; i+=2) {
400 : unsigned LiteralPair0 = Literals[i];
401 : unsigned LiteralPair1 = (i + 1 < e)?Literals[i + 1]:0;
402 : InsertPos = BuildMI(MBB, InsertPos->getDebugLoc(),
403 : TII->get(R600::LITERALS))
404 : .addImm(LiteralPair0)
405 : .addImm(LiteralPair1);
406 : }
407 : return InsertPos;
408 : }
409 :
410 : ClauseFile
411 3844 : MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I)
412 : const {
413 : MachineInstr &ClauseHead = *I;
414 : std::vector<MachineInstr *> ClauseContent;
415 : I++;
416 30579 : for (MachineBasicBlock::instr_iterator E = MBB.instr_end(); I != E;) {
417 : if (IsTrivialInst(*I)) {
418 : ++I;
419 287 : continue;
420 : }
421 30005 : if (!I->isBundle() && !TII->isALUInstr(I->getOpcode()))
422 : break;
423 : std::vector<MachineOperand *>Literals;
424 26448 : if (I->isBundle()) {
425 : MachineInstr &DeleteMI = *I;
426 : MachineBasicBlock::instr_iterator BI = I.getInstrIterator();
427 51221 : while (++BI != E && BI->isBundledWithPred()) {
428 37488 : BI->unbundleFromPred();
429 766052 : for (MachineOperand &MO : BI->operands()) {
430 728564 : if (MO.isReg() && MO.isInternalRead())
431 : MO.setIsInternalRead(false);
432 : }
433 37488 : getLiteral(*BI, Literals);
434 37488 : ClauseContent.push_back(&*BI);
435 : }
436 13733 : I = BI;
437 13733 : DeleteMI.eraseFromParent();
438 : } else {
439 12715 : getLiteral(*I, Literals);
440 12715 : ClauseContent.push_back(&*I);
441 : I++;
442 : }
443 66697 : for (unsigned i = 0, e = Literals.size(); i < e; i += 2) {
444 : MachineInstrBuilder MILit = BuildMI(MBB, I, I->getDebugLoc(),
445 27602 : TII->get(R600::LITERALS));
446 41403 : if (Literals[i]->isImm()) {
447 13786 : MILit.addImm(Literals[i]->getImm());
448 : } else {
449 : MILit.addGlobalAddress(Literals[i]->getGlobal(),
450 30 : Literals[i]->getOffset());
451 : }
452 13801 : if (i + 1 < e) {
453 16257 : if (Literals[i + 1]->isImm()) {
454 5419 : MILit.addImm(Literals[i + 1]->getImm());
455 : } else {
456 : MILit.addGlobalAddress(Literals[i + 1]->getGlobal(),
457 0 : Literals[i + 1]->getOffset());
458 : }
459 : } else
460 : MILit.addImm(0);
461 13801 : ClauseContent.push_back(MILit);
462 : }
463 : }
464 : assert(ClauseContent.size() < 128 && "ALU clause is too big");
465 7688 : ClauseHead.getOperand(7).setImm(ClauseContent.size() - 1);
466 3844 : return ClauseFile(&ClauseHead, std::move(ClauseContent));
467 : }
468 :
469 0 : void EmitFetchClause(MachineBasicBlock::iterator InsertPos,
470 : const DebugLoc &DL, ClauseFile &Clause,
471 : unsigned &CfCount) {
472 0 : CounterPropagateAddr(*Clause.first, CfCount);
473 0 : MachineBasicBlock *BB = Clause.first->getParent();
474 0 : BuildMI(BB, DL, TII->get(R600::FETCH_CLAUSE)).addImm(CfCount);
475 0 : for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) {
476 0 : BB->splice(InsertPos, BB, Clause.second[i]);
477 : }
478 0 : CfCount += 2 * Clause.second.size();
479 0 : }
480 :
481 0 : void EmitALUClause(MachineBasicBlock::iterator InsertPos, const DebugLoc &DL,
482 : ClauseFile &Clause, unsigned &CfCount) {
483 0 : Clause.first->getOperand(0).setImm(0);
484 0 : CounterPropagateAddr(*Clause.first, CfCount);
485 0 : MachineBasicBlock *BB = Clause.first->getParent();
486 0 : BuildMI(BB, DL, TII->get(R600::ALU_CLAUSE)).addImm(CfCount);
487 0 : for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) {
488 0 : BB->splice(InsertPos, BB, Clause.second[i]);
489 : }
490 0 : CfCount += Clause.second.size();
491 0 : }
492 :
493 0 : void CounterPropagateAddr(MachineInstr &MI, unsigned Addr) const {
494 0 : MI.getOperand(0).setImm(Addr + MI.getOperand(0).getImm());
495 0 : }
496 0 : void CounterPropagateAddr(const std::set<MachineInstr *> &MIs,
497 : unsigned Addr) const {
498 0 : for (MachineInstr *MI : MIs) {
499 : CounterPropagateAddr(*MI, Addr);
500 : }
501 0 : }
502 :
503 : public:
504 : static char ID;
505 :
506 282 : R600ControlFlowFinalizer() : MachineFunctionPass(ID) {}
507 :
508 2297 : bool runOnMachineFunction(MachineFunction &MF) override {
509 2297 : ST = &MF.getSubtarget<R600Subtarget>();
510 2297 : MaxFetchInst = ST->getTexVTXClauseSize();
511 2297 : TII = ST->getInstrInfo();
512 2297 : TRI = ST->getRegisterInfo();
513 :
514 2297 : R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
515 :
516 6891 : CFStack CFStack(ST, MF.getFunction().getCallingConv());
517 4594 : for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME;
518 : ++MB) {
519 : MachineBasicBlock &MBB = *MB;
520 2297 : unsigned CfCount = 0;
521 2297 : std::vector<std::pair<unsigned, std::set<MachineInstr *>>> LoopStack;
522 : std::vector<MachineInstr * > IfThenElseStack;
523 4594 : if (MF.getFunction().getCallingConv() == CallingConv::AMDGPU_VS) {
524 15 : BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()),
525 30 : getHWInstrDesc(CF_CALL_FS));
526 15 : CfCount++;
527 : }
528 2297 : std::vector<ClauseFile> FetchClauses, AluClauses;
529 2297 : std::vector<MachineInstr *> LastAlu(1);
530 : std::vector<MachineInstr *> ToPopAfter;
531 :
532 2297 : for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
533 13007 : I != E;) {
534 10710 : if (TII->usesTextureCache(*I) || TII->usesVertexCache(*I)) {
535 : LLVM_DEBUG(dbgs() << CfCount << ":"; I->dump(););
536 1317 : FetchClauses.push_back(MakeFetchClause(MBB, I));
537 1317 : CfCount++;
538 1317 : LastAlu.back() = nullptr;
539 : continue;
540 : }
541 :
542 9393 : MachineBasicBlock::iterator MI = I;
543 18786 : if (MI->getOpcode() != R600::ENDIF)
544 9331 : LastAlu.back() = nullptr;
545 9393 : if (MI->getOpcode() == R600::CF_ALU)
546 3782 : LastAlu.back() = &*MI;
547 : I++;
548 : bool RequiresWorkAround =
549 9393 : CFStack.requiresWorkAroundForInst(MI->getOpcode());
550 9393 : switch (MI->getOpcode()) {
551 62 : case R600::CF_ALU_PUSH_BEFORE:
552 62 : if (RequiresWorkAround) {
553 : LLVM_DEBUG(dbgs()
554 : << "Applying bug work-around for ALU_PUSH_BEFORE\n");
555 1 : BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(R600::CF_PUSH_EG))
556 1 : .addImm(CfCount + 1)
557 : .addImm(1);
558 1 : MI->setDesc(TII->get(R600::CF_ALU));
559 1 : CfCount++;
560 1 : CFStack.pushBranch(R600::CF_PUSH_EG);
561 : } else
562 61 : CFStack.pushBranch(R600::CF_ALU_PUSH_BEFORE);
563 : LLVM_FALLTHROUGH;
564 : case R600::CF_ALU:
565 3844 : I = MI;
566 3844 : AluClauses.push_back(MakeALUClause(MBB, I));
567 : LLVM_DEBUG(dbgs() << CfCount << ":"; MI->dump(););
568 3844 : CfCount++;
569 3844 : break;
570 18 : case R600::WHILELOOP: {
571 18 : CFStack.pushLoop();
572 18 : MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
573 36 : getHWInstrDesc(CF_WHILE_LOOP))
574 18 : .addImm(1);
575 : std::pair<unsigned, std::set<MachineInstr *>> Pair(CfCount,
576 18 : std::set<MachineInstr *>());
577 : Pair.second.insert(MIb);
578 : LoopStack.push_back(std::move(Pair));
579 18 : MI->eraseFromParent();
580 18 : CfCount++;
581 : break;
582 : }
583 : case R600::ENDLOOP: {
584 : CFStack.popLoop();
585 : std::pair<unsigned, std::set<MachineInstr *>> Pair =
586 : std::move(LoopStack.back());
587 : LoopStack.pop_back();
588 18 : CounterPropagateAddr(Pair.second, CfCount);
589 54 : BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END_LOOP))
590 18 : .addImm(Pair.first + 1);
591 18 : MI->eraseFromParent();
592 18 : CfCount++;
593 : break;
594 : }
595 62 : case R600::IF_PREDICATE_SET: {
596 62 : LastAlu.push_back(nullptr);
597 62 : MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
598 124 : getHWInstrDesc(CF_JUMP))
599 : .addImm(0)
600 62 : .addImm(0);
601 62 : IfThenElseStack.push_back(MIb);
602 : LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
603 62 : MI->eraseFromParent();
604 62 : CfCount++;
605 : break;
606 : }
607 : case R600::ELSE: {
608 3 : MachineInstr * JumpInst = IfThenElseStack.back();
609 : IfThenElseStack.pop_back();
610 3 : CounterPropagateAddr(*JumpInst, CfCount);
611 3 : MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
612 6 : getHWInstrDesc(CF_ELSE))
613 : .addImm(0)
614 3 : .addImm(0);
615 : LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
616 3 : IfThenElseStack.push_back(MIb);
617 3 : MI->eraseFromParent();
618 3 : CfCount++;
619 : break;
620 : }
621 62 : case R600::ENDIF: {
622 62 : CFStack.popBranch();
623 62 : if (LastAlu.back()) {
624 24 : ToPopAfter.push_back(LastAlu.back());
625 : } else {
626 38 : MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
627 76 : getHWInstrDesc(CF_POP))
628 38 : .addImm(CfCount + 1)
629 : .addImm(1);
630 : (void)MIb;
631 : LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
632 38 : CfCount++;
633 : }
634 :
635 62 : MachineInstr *IfOrElseInst = IfThenElseStack.back();
636 : IfThenElseStack.pop_back();
637 62 : CounterPropagateAddr(*IfOrElseInst, CfCount);
638 : IfOrElseInst->getOperand(1).setImm(1);
639 : LastAlu.pop_back();
640 62 : MI->eraseFromParent();
641 62 : break;
642 : }
643 18 : case R600::BREAK: {
644 18 : CfCount ++;
645 18 : MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
646 36 : getHWInstrDesc(CF_LOOP_BREAK))
647 18 : .addImm(0);
648 : LoopStack.back().second.insert(MIb);
649 18 : MI->eraseFromParent();
650 : break;
651 : }
652 0 : case R600::CONTINUE: {
653 0 : MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
654 0 : getHWInstrDesc(CF_LOOP_CONTINUE))
655 0 : .addImm(0);
656 : LoopStack.back().second.insert(MIb);
657 0 : MI->eraseFromParent();
658 0 : CfCount++;
659 : break;
660 : }
661 2008 : case R600::RETURN: {
662 : DebugLoc DL = MBB.findDebugLoc(MI);
663 3712 : BuildMI(MBB, MI, DL, getHWInstrDesc(CF_END));
664 2008 : CfCount++;
665 2008 : if (CfCount % 2) {
666 3314 : BuildMI(MBB, I, DL, TII->get(R600::PAD));
667 1657 : CfCount++;
668 : }
669 2008 : MI->eraseFromParent();
670 5312 : for (unsigned i = 0, e = FetchClauses.size(); i < e; i++)
671 2592 : EmitFetchClause(I, DL, FetchClauses[i], CfCount);
672 7473 : for (unsigned i = 0, e = AluClauses.size(); i < e; i++)
673 6914 : EmitALUClause(I, DL, AluClauses[i], CfCount);
674 : break;
675 : }
676 3360 : default:
677 3360 : if (TII->isExport(MI->getOpcode())) {
678 : LLVM_DEBUG(dbgs() << CfCount << ":"; MI->dump(););
679 3343 : CfCount++;
680 : }
681 : break;
682 : }
683 : }
684 4618 : for (unsigned i = 0, e = ToPopAfter.size(); i < e; ++i) {
685 24 : MachineInstr *Alu = ToPopAfter[i];
686 24 : BuildMI(MBB, Alu, MBB.findDebugLoc((MachineBasicBlock::iterator)Alu),
687 24 : TII->get(R600::CF_ALU_POP_AFTER))
688 24 : .addImm(Alu->getOperand(0).getImm())
689 24 : .addImm(Alu->getOperand(1).getImm())
690 24 : .addImm(Alu->getOperand(2).getImm())
691 24 : .addImm(Alu->getOperand(3).getImm())
692 24 : .addImm(Alu->getOperand(4).getImm())
693 24 : .addImm(Alu->getOperand(5).getImm())
694 24 : .addImm(Alu->getOperand(6).getImm())
695 24 : .addImm(Alu->getOperand(7).getImm())
696 24 : .addImm(Alu->getOperand(8).getImm());
697 24 : Alu->eraseFromParent();
698 : }
699 2297 : MFI->CFStackSize = CFStack.MaxStackSize;
700 : }
701 :
702 2297 : return false;
703 : }
704 :
705 282 : StringRef getPassName() const override {
706 282 : return "R600 Control Flow Finalizer Pass";
707 : }
708 : };
709 :
710 : } // end anonymous namespace
711 :
712 85105 : INITIALIZE_PASS_BEGIN(R600ControlFlowFinalizer, DEBUG_TYPE,
713 : "R600 Control Flow Finalizer", false, false)
714 199024 : INITIALIZE_PASS_END(R600ControlFlowFinalizer, DEBUG_TYPE,
715 : "R600 Control Flow Finalizer", false, false)
716 :
717 : char R600ControlFlowFinalizer::ID = 0;
718 :
719 : char &llvm::R600ControlFlowFinalizerID = R600ControlFlowFinalizer::ID;
720 :
721 282 : FunctionPass *llvm::createR600ControlFlowFinalizer() {
722 282 : return new R600ControlFlowFinalizer();
723 : }
|