24#define DEBUG_TYPE "r600cf"
32 FIRST_NON_WQM_PUSH = 2,
33 FIRST_NON_WQM_PUSH_W_FULL_ENTRY = 3
37 std::vector<StackItem> BranchStack;
38 std::vector<StackItem> LoopStack;
39 unsigned MaxStackSize;
40 unsigned CurrentEntries = 0;
41 unsigned CurrentSubEntries = 0;
47 unsigned getLoopDepth();
48 bool branchStackContains(CFStack::StackItem);
49 bool requiresWorkAroundForInst(
unsigned Opcode);
50 unsigned getSubEntrySize(CFStack::StackItem Item);
51 void updateMaxStackSize();
52 void pushBranch(
unsigned Opcode,
bool isWQM =
false);
58unsigned CFStack::getLoopDepth() {
59 return LoopStack.size();
62bool CFStack::branchStackContains(CFStack::StackItem Item) {
66bool CFStack::requiresWorkAroundForInst(
unsigned Opcode) {
67 if (Opcode == R600::CF_ALU_PUSH_BEFORE && ST->hasCaymanISA() &&
71 if (!ST->hasCFAluBug())
75 default:
return false;
76 case R600::CF_ALU_PUSH_BEFORE:
77 case R600::CF_ALU_ELSE_AFTER:
78 case R600::CF_ALU_BREAK:
79 case R600::CF_ALU_CONTINUE:
80 if (CurrentSubEntries == 0)
82 if (ST->getWavefrontSize() == 64) {
91 return CurrentSubEntries > 3;
93 assert(ST->getWavefrontSize() == 32);
99 return CurrentSubEntries > 7;
103unsigned CFStack::getSubEntrySize(CFStack::StackItem Item) {
107 case CFStack::FIRST_NON_WQM_PUSH:
108 assert(!ST->hasCaymanISA());
120 case CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY:
125 case CFStack::SUB_ENTRY:
130void CFStack::updateMaxStackSize() {
131 unsigned CurrentStackSize = CurrentEntries +
divideCeil(CurrentSubEntries, 4);
132 MaxStackSize = std::max(CurrentStackSize, MaxStackSize);
135void CFStack::pushBranch(
unsigned Opcode,
bool isWQM) {
136 CFStack::StackItem Item = CFStack::ENTRY;
138 case R600::CF_PUSH_EG:
139 case R600::CF_ALU_PUSH_BEFORE:
141 if (!ST->hasCaymanISA() &&
142 !branchStackContains(CFStack::FIRST_NON_WQM_PUSH))
143 Item = CFStack::FIRST_NON_WQM_PUSH;
146 else if (CurrentEntries > 0 &&
148 !ST->hasCaymanISA() &&
149 !branchStackContains(CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY))
150 Item = CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY;
152 Item = CFStack::SUB_ENTRY;
154 Item = CFStack::ENTRY;
157 BranchStack.push_back(Item);
158 if (Item == CFStack::ENTRY)
161 CurrentSubEntries += getSubEntrySize(Item);
162 updateMaxStackSize();
165void CFStack::pushLoop() {
166 LoopStack.push_back(CFStack::ENTRY);
168 updateMaxStackSize();
171void CFStack::popBranch() {
172 CFStack::StackItem Top = BranchStack.back();
173 if (Top == CFStack::ENTRY)
176 CurrentSubEntries-= getSubEntrySize(Top);
177 BranchStack.pop_back();
180void CFStack::popLoop() {
182 LoopStack.pop_back();
187 using ClauseFile = std::pair<MachineInstr *, std::vector<MachineInstr *>>;
189 enum ControlFlowInstruction {
205 unsigned MaxFetchInst;
209 switch (
MI.getOpcode()) {
218 const MCInstrDesc &getHWInstrDesc(ControlFlowInstruction CFI)
const {
223 Opcode = isEg ? R600::CF_TC_EG : R600::CF_TC_R600;
226 Opcode = isEg ? R600::CF_VC_EG : R600::CF_VC_R600;
229 Opcode = isEg ? R600::CF_CALL_FS_EG : R600::CF_CALL_FS_R600;
232 Opcode = isEg ? R600::WHILE_LOOP_EG : R600::WHILE_LOOP_R600;
235 Opcode = isEg ? R600::END_LOOP_EG : R600::END_LOOP_R600;
238 Opcode = isEg ? R600::LOOP_BREAK_EG : R600::LOOP_BREAK_R600;
240 case CF_LOOP_CONTINUE:
241 Opcode = isEg ? R600::CF_CONTINUE_EG : R600::CF_CONTINUE_R600;
244 Opcode = isEg ? R600::CF_JUMP_EG : R600::CF_JUMP_R600;
247 Opcode = isEg ? R600::CF_ELSE_EG : R600::CF_ELSE_R600;
250 Opcode = isEg ? R600::POP_EG : R600::POP_R600;
253 if (ST->hasCaymanISA()) {
254 Opcode = R600::CF_END_CM;
257 Opcode = isEg ? R600::CF_END_EG : R600::CF_END_R600;
260 assert (Opcode &&
"No opcode selected");
261 return TII->get(Opcode);
265 std::set<unsigned> &DstRegs)
const {
266 unsigned DstMI, SrcMI;
268 E =
MI.operands_end();
275 if (R600::R600_Reg128RegClass.
contains(Reg))
278 DstMI =
TRI->getMatchingSuperReg(Reg,
280 &R600::R600_Reg128RegClass);
284 if (R600::R600_Reg128RegClass.
contains(Reg))
287 SrcMI =
TRI->getMatchingSuperReg(Reg,
289 &R600::R600_Reg128RegClass);
292 if ((DstRegs.find(SrcMI) == DstRegs.end())) {
293 DstRegs.insert(DstMI);
303 std::vector<MachineInstr *> ClauseContent;
304 unsigned AluInstCount = 0;
305 bool IsTex =
TII->usesTextureCache(*ClauseHead);
306 std::set<unsigned> DstRegs;
308 if (IsTrivialInst(*
I))
310 if (AluInstCount >= MaxFetchInst)
312 if ((IsTex && !
TII->usesTextureCache(*
I)) ||
313 (!IsTex && !
TII->usesVertexCache(*
I)))
315 if (!isCompatibleWithClause(*
I, DstRegs))
318 ClauseContent.push_back(&*
I);
321 getHWInstrDesc(IsTex?CF_TC:CF_VC))
323 .
addImm(AluInstCount - 1);
324 return ClauseFile(MIb, std::move(ClauseContent));
327 void getLiteral(
MachineInstr &
MI, std::vector<MachineOperand *> &Lits)
const {
328 static const unsigned LiteralRegs[] = {
336 for (
const auto &Src:Srcs) {
337 if (Src.first->getReg() != R600::ALU_LITERAL_X)
339 int64_t Imm = Src.second;
340 std::vector<MachineOperand *>::iterator It =
347 TII->getOperandIdx(
MI.getOpcode(), R600::OpName::literal));
349 if (It != Lits.end()) {
351 unsigned Index = It - Lits.begin();
352 Src.first->setReg(LiteralRegs[Index]);
355 assert(Lits.size() < 4 &&
"Too many literals in Instruction Group");
356 Src.first->setReg(LiteralRegs[Lits.size()]);
357 Lits.push_back(&Operand);
364 const std::vector<unsigned> &Literals)
const {
366 for (
unsigned i = 0, e = Literals.size(); i < e; i+=2) {
367 unsigned LiteralPair0 = Literals[i];
368 unsigned LiteralPair1 = (i + 1 < e)?Literals[i + 1]:0;
369 InsertPos =
BuildMI(
MBB, InsertPos->getDebugLoc(),
370 TII->get(R600::LITERALS))
381 std::vector<MachineInstr *> ClauseContent;
384 if (IsTrivialInst(*
I)) {
388 if (!
I->isBundle() && !
TII->isALUInstr(
I->getOpcode()))
390 std::vector<MachineOperand *>Literals;
394 while (++BI != E && BI->isBundledWithPred()) {
395 BI->unbundleFromPred();
397 if (MO.isReg() && MO.isInternalRead())
398 MO.setIsInternalRead(
false);
400 getLiteral(*BI, Literals);
401 ClauseContent.push_back(&*BI);
406 getLiteral(*
I, Literals);
407 ClauseContent.push_back(&*
I);
410 for (
unsigned i = 0, e = Literals.size(); i < e; i += 2) {
412 TII->get(R600::LITERALS));
413 if (Literals[i]->
isImm()) {
414 MILit.
addImm(Literals[i]->getImm());
420 if (Literals[i + 1]->
isImm()) {
421 MILit.
addImm(Literals[i + 1]->getImm());
428 ClauseContent.push_back(MILit);
431 assert(ClauseContent.size() < 128 &&
"ALU clause is too big");
433 return ClauseFile(&ClauseHead, std::move(ClauseContent));
439 CounterPropagateAddr(*
Clause.first, CfCount);
444 CfCount += 2 *
Clause.second.size();
448 ClauseFile &
Clause,
unsigned &CfCount) {
449 Clause.first->getOperand(0).setImm(0);
450 CounterPropagateAddr(*
Clause.first, CfCount);
455 CfCount +=
Clause.second.size();
459 MI.getOperand(0).setImm(
Addr +
MI.getOperand(0).getImm());
461 void CounterPropagateAddr(
const std::set<MachineInstr *> &MIs,
462 unsigned Addr)
const {
464 CounterPropagateAddr(*
MI,
Addr);
475 MaxFetchInst = ST->getTexVTXClauseSize();
476 TII = ST->getInstrInfo();
477 TRI = ST->getRegisterInfo();
485 unsigned CfCount = 0;
486 std::vector<std::pair<unsigned, std::set<MachineInstr *>>> LoopStack;
487 std::vector<MachineInstr * > IfThenElseStack;
490 getHWInstrDesc(CF_CALL_FS));
493 std::vector<ClauseFile> FetchClauses, AluClauses;
494 std::vector<MachineInstr *> LastAlu(1);
495 std::vector<MachineInstr *> ToPopAfter;
499 if (
TII->usesTextureCache(*
I) ||
TII->usesVertexCache(*
I)) {
501 FetchClauses.push_back(MakeFetchClause(
MBB,
I));
503 LastAlu.back() =
nullptr;
508 if (
MI->getOpcode() != R600::ENDIF)
509 LastAlu.back() =
nullptr;
510 if (
MI->getOpcode() == R600::CF_ALU)
511 LastAlu.back() = &*
MI;
513 bool RequiresWorkAround =
514 CFStack.requiresWorkAroundForInst(
MI->getOpcode());
515 switch (
MI->getOpcode()) {
516 case R600::CF_ALU_PUSH_BEFORE:
517 if (RequiresWorkAround) {
519 <<
"Applying bug work-around for ALU_PUSH_BEFORE\n");
523 MI->setDesc(
TII->get(R600::CF_ALU));
525 CFStack.pushBranch(R600::CF_PUSH_EG);
527 CFStack.pushBranch(R600::CF_ALU_PUSH_BEFORE);
531 AluClauses.push_back(MakeALUClause(
MBB,
I));
535 case R600::WHILELOOP: {
538 getHWInstrDesc(CF_WHILE_LOOP))
540 std::pair<unsigned, std::set<MachineInstr *>> Pair(CfCount,
541 std::set<MachineInstr *>());
542 Pair.second.insert(MIb);
543 LoopStack.push_back(std::move(Pair));
544 MI->eraseFromParent();
548 case R600::ENDLOOP: {
550 std::pair<unsigned, std::set<MachineInstr *>> Pair =
551 std::move(LoopStack.back());
552 LoopStack.pop_back();
553 CounterPropagateAddr(Pair.second, CfCount);
556 MI->eraseFromParent();
560 case R600::IF_PREDICATE_SET: {
561 LastAlu.push_back(
nullptr);
563 getHWInstrDesc(CF_JUMP))
566 IfThenElseStack.push_back(MIb);
568 MI->eraseFromParent();
574 IfThenElseStack.pop_back();
575 CounterPropagateAddr(*JumpInst, CfCount);
577 getHWInstrDesc(CF_ELSE))
581 IfThenElseStack.push_back(MIb);
582 MI->eraseFromParent();
588 if (LastAlu.back()) {
589 ToPopAfter.push_back(LastAlu.back());
592 getHWInstrDesc(CF_POP))
601 IfThenElseStack.pop_back();
602 CounterPropagateAddr(*IfOrElseInst, CfCount);
605 MI->eraseFromParent();
611 getHWInstrDesc(CF_LOOP_BREAK))
613 LoopStack.back().second.insert(MIb);
614 MI->eraseFromParent();
617 case R600::CONTINUE: {
619 getHWInstrDesc(CF_LOOP_CONTINUE))
621 LoopStack.back().second.insert(MIb);
622 MI->eraseFromParent();
634 MI->eraseFromParent();
635 for (ClauseFile &CF : FetchClauses)
636 EmitFetchClause(
I,
DL, CF, CfCount);
637 for (ClauseFile &CF : AluClauses)
638 EmitALUClause(
I,
DL, CF, CfCount);
642 if (
TII->isExport(
MI->getOpcode())) {
651 TII->get(R600::CF_ALU_POP_AFTER))
652 .
addImm(Alu->getOperand(0).getImm())
653 .
addImm(Alu->getOperand(1).getImm())
654 .
addImm(Alu->getOperand(2).getImm())
655 .
addImm(Alu->getOperand(3).getImm())
656 .
addImm(Alu->getOperand(4).getImm())
657 .
addImm(Alu->getOperand(5).getImm())
658 .
addImm(Alu->getOperand(6).getImm())
659 .
addImm(Alu->getOperand(7).getImm())
660 .
addImm(Alu->getOperand(8).getImm());
661 Alu->eraseFromParent();
670 return "R600 Control Flow Finalizer Pass";
677 "R600 Control Flow Finalizer",
false,
false)
681char R600ControlFlowFinalizer::
ID = 0;
686 return new R600ControlFlowFinalizer();
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
const HexagonInstrInfo * TII
#define ENTRY(ASMNAME, ENUM)
unsigned const TargetRegisterInfo * TRI
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
R600 Control Flow Finalizer
Provides R600 specific target descriptions.
AMDGPU R600 specific subclass of TargetSubtarget.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isImm(const MachineOperand &MO, MachineRegisterInfo *MRI)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
FunctionPass class - This class is used to implement most global optimizations.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Describe properties that are true of each instruction in the target description file.
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
Instructions::iterator instr_iterator
instr_iterator instr_end()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
Representation of each machine instruction.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Register getReg() const
getReg - Returns the register number.
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Wrapper class representing virtual and physical registers.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
FunctionPass * createR600ControlFlowFinalizer()
char & R600ControlFlowFinalizerID
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
static unsigned getSubRegFromChannel(unsigned Channel)