24 #define DEBUG_TYPE "r600cf"
32 FIRST_NON_WQM_PUSH = 2,
33 FIRST_NON_WQM_PUSH_W_FULL_ENTRY = 3
37 std::vector<StackItem> BranchStack;
38 std::vector<StackItem> LoopStack;
39 unsigned MaxStackSize;
40 unsigned CurrentEntries = 0;
41 unsigned CurrentSubEntries = 0;
47 unsigned getLoopDepth();
48 bool branchStackContains(CFStack::StackItem);
49 bool requiresWorkAroundForInst(
unsigned Opcode);
50 unsigned getSubEntrySize(CFStack::StackItem Item);
51 void updateMaxStackSize();
52 void pushBranch(
unsigned Opcode,
bool isWQM =
false);
58 unsigned CFStack::getLoopDepth() {
59 return LoopStack.size();
62 bool CFStack::branchStackContains(CFStack::StackItem Item) {
66 bool CFStack::requiresWorkAroundForInst(
unsigned Opcode) {
67 if (Opcode == R600::CF_ALU_PUSH_BEFORE &&
ST->hasCaymanISA() &&
71 if (!
ST->hasCFAluBug())
75 default:
return false;
76 case R600::CF_ALU_PUSH_BEFORE:
77 case R600::CF_ALU_ELSE_AFTER:
78 case R600::CF_ALU_BREAK:
79 case R600::CF_ALU_CONTINUE:
80 if (CurrentSubEntries == 0)
82 if (
ST->getWavefrontSize() == 64) {
91 return CurrentSubEntries > 3;
93 assert(
ST->getWavefrontSize() == 32);
99 return CurrentSubEntries > 7;
104 unsigned CFStack::getSubEntrySize(CFStack::StackItem Item) {
108 case CFStack::FIRST_NON_WQM_PUSH:
122 case CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY:
127 case CFStack::SUB_ENTRY:
132 void CFStack::updateMaxStackSize() {
133 unsigned CurrentStackSize = CurrentEntries +
divideCeil(CurrentSubEntries, 4);
134 MaxStackSize =
std::max(CurrentStackSize, MaxStackSize);
137 void CFStack::pushBranch(
unsigned Opcode,
bool isWQM) {
140 case R600::CF_PUSH_EG:
141 case R600::CF_ALU_PUSH_BEFORE:
143 if (!
ST->hasCaymanISA() &&
144 !branchStackContains(CFStack::FIRST_NON_WQM_PUSH))
145 Item = CFStack::FIRST_NON_WQM_PUSH;
148 else if (CurrentEntries > 0 &&
150 !
ST->hasCaymanISA() &&
151 !branchStackContains(CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY))
152 Item = CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY;
154 Item = CFStack::SUB_ENTRY;
159 BranchStack.push_back(Item);
163 CurrentSubEntries += getSubEntrySize(Item);
164 updateMaxStackSize();
167 void CFStack::pushLoop() {
170 updateMaxStackSize();
173 void CFStack::popBranch() {
174 CFStack::StackItem Top = BranchStack.back();
178 CurrentSubEntries-= getSubEntrySize(Top);
179 BranchStack.pop_back();
182 void CFStack::popLoop() {
184 LoopStack.pop_back();
189 using ClauseFile = std::pair<MachineInstr *, std::vector<MachineInstr *>>;
191 enum ControlFlowInstruction {
207 unsigned MaxFetchInst;
211 switch (
MI.getOpcode()) {
220 const MCInstrDesc &getHWInstrDesc(ControlFlowInstruction CFI)
const {
225 Opcode = isEg ? R600::CF_TC_EG : R600::CF_TC_R600;
228 Opcode = isEg ? R600::CF_VC_EG : R600::CF_VC_R600;
231 Opcode = isEg ? R600::CF_CALL_FS_EG : R600::CF_CALL_FS_R600;
234 Opcode = isEg ? R600::WHILE_LOOP_EG : R600::WHILE_LOOP_R600;
237 Opcode = isEg ? R600::END_LOOP_EG : R600::END_LOOP_R600;
240 Opcode = isEg ? R600::LOOP_BREAK_EG : R600::LOOP_BREAK_R600;
242 case CF_LOOP_CONTINUE:
243 Opcode = isEg ? R600::CF_CONTINUE_EG : R600::CF_CONTINUE_R600;
246 Opcode = isEg ? R600::CF_JUMP_EG : R600::CF_JUMP_R600;
249 Opcode = isEg ? R600::CF_ELSE_EG : R600::CF_ELSE_R600;
252 Opcode = isEg ? R600::POP_EG : R600::POP_R600;
255 if (
ST->hasCaymanISA()) {
256 Opcode = R600::CF_END_CM;
259 Opcode = isEg ? R600::CF_END_EG : R600::CF_END_R600;
262 assert (Opcode &&
"No opcode selected");
263 return TII->get(Opcode);
267 std::set<unsigned> &DstRegs)
const {
268 unsigned DstMI, SrcMI;
270 E =
MI.operands_end();
282 &R600::R600_Reg128RegClass);
291 &R600::R600_Reg128RegClass);
294 if ((DstRegs.find(SrcMI) == DstRegs.end())) {
295 DstRegs.insert(DstMI);
305 std::vector<MachineInstr *> ClauseContent;
306 unsigned AluInstCount = 0;
307 bool IsTex =
TII->usesTextureCache(*ClauseHead);
308 std::set<unsigned> DstRegs;
310 if (IsTrivialInst(*
I))
312 if (AluInstCount >= MaxFetchInst)
314 if ((IsTex && !
TII->usesTextureCache(*
I)) ||
315 (!IsTex && !
TII->usesVertexCache(*
I)))
317 if (!isCompatibleWithClause(*
I, DstRegs))
320 ClauseContent.push_back(&*
I);
323 getHWInstrDesc(IsTex?CF_TC:CF_VC))
325 .
addImm(AluInstCount - 1);
326 return ClauseFile(MIb,
std::move(ClauseContent));
329 void getLiteral(
MachineInstr &
MI, std::vector<MachineOperand *> &Lits)
const {
330 static const unsigned LiteralRegs[] = {
338 for (
const auto &Src:Srcs) {
339 if (Src.first->getReg() != R600::ALU_LITERAL_X)
341 int64_t
Imm = Src.second;
342 std::vector<MachineOperand *>::iterator It =
344 return val->isImm() && (
val->getImm() ==
Imm);
349 TII->getOperandIdx(
MI.getOpcode(), R600::OpName::literal));
351 if (It != Lits.end()) {
353 unsigned Index = It - Lits.begin();
354 Src.first->setReg(LiteralRegs[Index]);
357 assert(Lits.size() < 4 &&
"Too many literals in Instruction Group");
358 Src.first->setReg(LiteralRegs[Lits.size()]);
359 Lits.push_back(&Operand);
366 const std::vector<unsigned> &Literals)
const {
368 for (
unsigned i = 0,
e = Literals.size();
i <
e;
i+=2) {
369 unsigned LiteralPair0 = Literals[
i];
370 unsigned LiteralPair1 = (
i + 1 <
e)?Literals[
i + 1]:0;
371 InsertPos =
BuildMI(
MBB, InsertPos->getDebugLoc(),
372 TII->get(R600::LITERALS))
383 std::vector<MachineInstr *> ClauseContent;
386 if (IsTrivialInst(*
I)) {
390 if (!
I->isBundle() && !
TII->isALUInstr(
I->getOpcode()))
392 std::vector<MachineOperand *>Literals;
396 while (++BI !=
E && BI->isBundledWithPred()) {
397 BI->unbundleFromPred();
399 if (MO.isReg() && MO.isInternalRead())
400 MO.setIsInternalRead(
false);
402 getLiteral(*BI, Literals);
403 ClauseContent.push_back(&*BI);
408 getLiteral(*
I, Literals);
409 ClauseContent.push_back(&*
I);
412 for (
unsigned i = 0,
e = Literals.size();
i <
e;
i += 2) {
414 TII->get(R600::LITERALS));
415 if (Literals[
i]->
isImm()) {
416 MILit.
addImm(Literals[
i]->getImm());
422 if (Literals[
i + 1]->
isImm()) {
423 MILit.
addImm(Literals[
i + 1]->getImm());
430 ClauseContent.push_back(MILit);
433 assert(ClauseContent.size() < 128 &&
"ALU clause is too big");
435 return ClauseFile(&ClauseHead,
std::move(ClauseContent));
441 CounterPropagateAddr(*
Clause.first, CfCount);
445 BB->splice(InsertPos,
BB,
MI);
446 CfCount += 2 *
Clause.second.size();
450 ClauseFile &
Clause,
unsigned &CfCount) {
451 Clause.first->getOperand(0).setImm(0);
452 CounterPropagateAddr(*
Clause.first, CfCount);
456 BB->splice(InsertPos,
BB,
MI);
457 CfCount +=
Clause.second.size();
461 MI.getOperand(0).setImm(
Addr +
MI.getOperand(0).getImm());
463 void CounterPropagateAddr(
const std::set<MachineInstr *> &MIs,
464 unsigned Addr)
const {
466 CounterPropagateAddr(*
MI,
Addr);
477 MaxFetchInst =
ST->getTexVTXClauseSize();
478 TII =
ST->getInstrInfo();
479 TRI =
ST->getRegisterInfo();
487 unsigned CfCount = 0;
488 std::vector<std::pair<unsigned, std::set<MachineInstr *>>> LoopStack;
489 std::vector<MachineInstr * > IfThenElseStack;
492 getHWInstrDesc(CF_CALL_FS));
495 std::vector<ClauseFile> FetchClauses, AluClauses;
496 std::vector<MachineInstr *> LastAlu(1);
497 std::vector<MachineInstr *> ToPopAfter;
501 if (
TII->usesTextureCache(*
I) ||
TII->usesVertexCache(*
I)) {
503 FetchClauses.push_back(MakeFetchClause(
MBB,
I));
505 LastAlu.back() =
nullptr;
510 if (
MI->getOpcode() != R600::ENDIF)
511 LastAlu.back() =
nullptr;
512 if (
MI->getOpcode() == R600::CF_ALU)
513 LastAlu.back() = &*
MI;
515 bool RequiresWorkAround =
516 CFStack.requiresWorkAroundForInst(
MI->getOpcode());
517 switch (
MI->getOpcode()) {
518 case R600::CF_ALU_PUSH_BEFORE:
519 if (RequiresWorkAround) {
521 <<
"Applying bug work-around for ALU_PUSH_BEFORE\n");
525 MI->setDesc(
TII->get(R600::CF_ALU));
527 CFStack.pushBranch(R600::CF_PUSH_EG);
529 CFStack.pushBranch(R600::CF_ALU_PUSH_BEFORE);
533 AluClauses.push_back(MakeALUClause(
MBB,
I));
537 case R600::WHILELOOP: {
540 getHWInstrDesc(CF_WHILE_LOOP))
542 std::pair<unsigned, std::set<MachineInstr *>> Pair(CfCount,
543 std::set<MachineInstr *>());
544 Pair.second.insert(MIb);
546 MI->eraseFromParent();
550 case R600::ENDLOOP: {
552 std::pair<unsigned, std::set<MachineInstr *>> Pair =
554 LoopStack.pop_back();
555 CounterPropagateAddr(Pair.second, CfCount);
558 MI->eraseFromParent();
562 case R600::IF_PREDICATE_SET: {
563 LastAlu.push_back(
nullptr);
565 getHWInstrDesc(CF_JUMP))
568 IfThenElseStack.push_back(MIb);
570 MI->eraseFromParent();
576 IfThenElseStack.pop_back();
577 CounterPropagateAddr(*JumpInst, CfCount);
579 getHWInstrDesc(CF_ELSE))
583 IfThenElseStack.push_back(MIb);
584 MI->eraseFromParent();
590 if (LastAlu.back()) {
591 ToPopAfter.push_back(LastAlu.back());
594 getHWInstrDesc(CF_POP))
603 IfThenElseStack.pop_back();
604 CounterPropagateAddr(*IfOrElseInst, CfCount);
607 MI->eraseFromParent();
613 getHWInstrDesc(CF_LOOP_BREAK))
615 LoopStack.back().second.insert(MIb);
616 MI->eraseFromParent();
619 case R600::CONTINUE: {
621 getHWInstrDesc(CF_LOOP_CONTINUE))
623 LoopStack.back().second.insert(MIb);
624 MI->eraseFromParent();
636 MI->eraseFromParent();
637 for (ClauseFile &CF : FetchClauses)
638 EmitFetchClause(
I,
DL, CF, CfCount);
639 for (ClauseFile &CF : AluClauses)
640 EmitALUClause(
I,
DL, CF, CfCount);
644 if (
TII->isExport(
MI->getOpcode())) {
653 TII->get(R600::CF_ALU_POP_AFTER))
654 .
addImm(Alu->getOperand(0).getImm())
655 .
addImm(Alu->getOperand(1).getImm())
656 .
addImm(Alu->getOperand(2).getImm())
657 .
addImm(Alu->getOperand(3).getImm())
658 .
addImm(Alu->getOperand(4).getImm())
659 .
addImm(Alu->getOperand(5).getImm())
660 .
addImm(Alu->getOperand(6).getImm())
661 .
addImm(Alu->getOperand(7).getImm())
662 .
addImm(Alu->getOperand(8).getImm());
663 Alu->eraseFromParent();
672 return "R600 Control Flow Finalizer Pass";
679 "R600 Control Flow Finalizer",
false,
false)
683 char R600ControlFlowFinalizer::
ID = 0;
688 return new R600ControlFlowFinalizer();