24#define DEBUG_TYPE "r600cf"
32 FIRST_NON_WQM_PUSH = 2,
33 FIRST_NON_WQM_PUSH_W_FULL_ENTRY = 3
37 std::vector<StackItem> BranchStack;
38 std::vector<StackItem> LoopStack;
39 unsigned MaxStackSize;
40 unsigned CurrentEntries = 0;
41 unsigned CurrentSubEntries = 0;
47 unsigned getLoopDepth();
48 bool branchStackContains(CFStack::StackItem);
49 bool requiresWorkAroundForInst(
unsigned Opcode);
50 unsigned getSubEntrySize(CFStack::StackItem Item);
51 void updateMaxStackSize();
52 void pushBranch(
unsigned Opcode,
bool isWQM =
false);
58unsigned CFStack::getLoopDepth() {
59 return LoopStack.size();
62bool CFStack::branchStackContains(CFStack::StackItem Item) {
66bool CFStack::requiresWorkAroundForInst(
unsigned Opcode) {
67 if (Opcode == R600::CF_ALU_PUSH_BEFORE && ST->hasCaymanISA() &&
71 if (!ST->hasCFAluBug())
75 default:
return false;
76 case R600::CF_ALU_PUSH_BEFORE:
77 case R600::CF_ALU_ELSE_AFTER:
78 case R600::CF_ALU_BREAK:
79 case R600::CF_ALU_CONTINUE:
80 if (CurrentSubEntries == 0)
82 if (ST->getWavefrontSize() == 64) {
91 return CurrentSubEntries > 3;
93 assert(ST->getWavefrontSize() == 32);
99 return CurrentSubEntries > 7;
103unsigned CFStack::getSubEntrySize(CFStack::StackItem Item) {
107 case CFStack::FIRST_NON_WQM_PUSH:
108 assert(!ST->hasCaymanISA());
120 case CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY:
125 case CFStack::SUB_ENTRY:
130void CFStack::updateMaxStackSize() {
131 unsigned CurrentStackSize = CurrentEntries +
divideCeil(CurrentSubEntries, 4);
132 MaxStackSize = std::max(CurrentStackSize, MaxStackSize);
135void CFStack::pushBranch(
unsigned Opcode,
bool isWQM) {
136 CFStack::StackItem Item = CFStack::ENTRY;
138 case R600::CF_PUSH_EG:
139 case R600::CF_ALU_PUSH_BEFORE:
141 if (!ST->hasCaymanISA() &&
142 !branchStackContains(CFStack::FIRST_NON_WQM_PUSH))
143 Item = CFStack::FIRST_NON_WQM_PUSH;
146 else if (CurrentEntries > 0 &&
148 !ST->hasCaymanISA() &&
149 !branchStackContains(CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY))
150 Item = CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY;
152 Item = CFStack::SUB_ENTRY;
154 Item = CFStack::ENTRY;
157 BranchStack.push_back(Item);
158 if (Item == CFStack::ENTRY)
161 CurrentSubEntries += getSubEntrySize(Item);
162 updateMaxStackSize();
165void CFStack::pushLoop() {
166 LoopStack.push_back(CFStack::ENTRY);
168 updateMaxStackSize();
171void CFStack::popBranch() {
172 CFStack::StackItem Top = BranchStack.back();
173 if (Top == CFStack::ENTRY)
176 CurrentSubEntries-= getSubEntrySize(Top);
177 BranchStack.pop_back();
180void CFStack::popLoop() {
182 LoopStack.pop_back();
187 using ClauseFile = std::pair<MachineInstr *, std::vector<MachineInstr *>>;
189 enum ControlFlowInstruction {
205 unsigned MaxFetchInst;
209 switch (
MI.getOpcode()) {
218 const MCInstrDesc &getHWInstrDesc(ControlFlowInstruction CFI)
const {
223 Opcode = isEg ? R600::CF_TC_EG : R600::CF_TC_R600;
226 Opcode = isEg ? R600::CF_VC_EG : R600::CF_VC_R600;
229 Opcode = isEg ? R600::CF_CALL_FS_EG : R600::CF_CALL_FS_R600;
232 Opcode = isEg ? R600::WHILE_LOOP_EG : R600::WHILE_LOOP_R600;
235 Opcode = isEg ? R600::END_LOOP_EG : R600::END_LOOP_R600;
238 Opcode = isEg ? R600::LOOP_BREAK_EG : R600::LOOP_BREAK_R600;
240 case CF_LOOP_CONTINUE:
241 Opcode = isEg ? R600::CF_CONTINUE_EG : R600::CF_CONTINUE_R600;
244 Opcode = isEg ? R600::CF_JUMP_EG : R600::CF_JUMP_R600;
247 Opcode = isEg ? R600::CF_ELSE_EG : R600::CF_ELSE_R600;
250 Opcode = isEg ? R600::POP_EG : R600::POP_R600;
253 if (ST->hasCaymanISA()) {
254 Opcode = R600::CF_END_CM;
257 Opcode = isEg ? R600::CF_END_EG : R600::CF_END_R600;
260 assert (Opcode &&
"No opcode selected");
261 return TII->get(Opcode);
265 std::set<unsigned> &DstRegs)
const {
266 unsigned DstMI, SrcMI;
268 E =
MI.operands_end();
275 if (R600::R600_Reg128RegClass.
contains(Reg))
278 DstMI =
TRI->getMatchingSuperReg(Reg,
280 &R600::R600_Reg128RegClass);
284 if (R600::R600_Reg128RegClass.
contains(Reg))
287 SrcMI =
TRI->getMatchingSuperReg(Reg,
289 &R600::R600_Reg128RegClass);
292 if ((DstRegs.find(SrcMI) == DstRegs.end())) {
293 DstRegs.insert(DstMI);
303 std::vector<MachineInstr *> ClauseContent;
304 unsigned AluInstCount = 0;
305 bool IsTex =
TII->usesTextureCache(*ClauseHead);
306 std::set<unsigned> DstRegs;
308 if (IsTrivialInst(*
I))
310 if (AluInstCount >= MaxFetchInst)
312 if ((IsTex && !
TII->usesTextureCache(*
I)) ||
313 (!IsTex && !
TII->usesVertexCache(*
I)))
315 if (!isCompatibleWithClause(*
I, DstRegs))
318 ClauseContent.push_back(&*
I);
321 getHWInstrDesc(IsTex?CF_TC:CF_VC))
323 .
addImm(AluInstCount - 1);
324 return ClauseFile(MIb, std::move(ClauseContent));
327 void getLiteral(
MachineInstr &
MI, std::vector<MachineOperand *> &Lits)
const {
328 static const unsigned LiteralRegs[] = {
336 for (
const auto &Src:Srcs) {
337 if (Src.first->getReg() != R600::ALU_LITERAL_X)
339 int64_t Imm = Src.second;
340 std::vector<MachineOperand *>::iterator It =
347 TII->getOperandIdx(
MI.getOpcode(), R600::OpName::literal));
349 if (It != Lits.end()) {
351 unsigned Index = It - Lits.begin();
352 Src.first->setReg(LiteralRegs[
Index]);
355 assert(Lits.size() < 4 &&
"Too many literals in Instruction Group");
356 Src.first->setReg(LiteralRegs[Lits.size()]);
357 Lits.push_back(&Operand);
364 const std::vector<unsigned> &Literals)
const {
366 for (
unsigned i = 0, e = Literals.size(); i < e; i+=2) {
367 unsigned LiteralPair0 = Literals[i];
368 unsigned LiteralPair1 = (i + 1 < e)?Literals[i + 1]:0;
369 InsertPos =
BuildMI(
MBB, InsertPos->getDebugLoc(),
370 TII->get(R600::LITERALS))
381 std::vector<MachineInstr *> ClauseContent;
384 if (IsTrivialInst(*
I)) {
388 if (!
I->isBundle() && !
TII->isALUInstr(
I->getOpcode()))
390 std::vector<MachineOperand *>Literals;
394 while (++BI != E && BI->isBundledWithPred()) {
395 BI->unbundleFromPred();
397 if (MO.isReg() && MO.isInternalRead())
398 MO.setIsInternalRead(
false);
400 getLiteral(*BI, Literals);
401 ClauseContent.push_back(&*BI);
406 getLiteral(*
I, Literals);
407 ClauseContent.push_back(&*
I);
410 for (
unsigned i = 0, e = Literals.size(); i < e; i += 2) {
412 TII->get(R600::LITERALS));
413 if (Literals[i]->
isImm()) {
414 MILit.
addImm(Literals[i]->getImm());
420 if (Literals[i + 1]->
isImm()) {
421 MILit.
addImm(Literals[i + 1]->getImm());
428 ClauseContent.push_back(MILit);
431 assert(ClauseContent.size() < 128 &&
"ALU clause is too big");
433 return ClauseFile(&ClauseHead, std::move(ClauseContent));
439 CounterPropagateAddr(*
Clause.first, CfCount);
444 CfCount += 2 *
Clause.second.size();
448 ClauseFile &
Clause,
unsigned &CfCount) {
449 Clause.first->getOperand(0).setImm(0);
450 CounterPropagateAddr(*
Clause.first, CfCount);
455 CfCount +=
Clause.second.size();
459 MI.getOperand(0).setImm(
Addr +
MI.getOperand(0).getImm());
461 void CounterPropagateAddr(
const std::set<MachineInstr *> &MIs,
462 unsigned Addr)
const {
464 CounterPropagateAddr(*
MI,
Addr);
475 MaxFetchInst = ST->getTexVTXClauseSize();
476 TII = ST->getInstrInfo();
477 TRI = ST->getRegisterInfo();
485 unsigned CfCount = 0;
486 std::vector<std::pair<unsigned, std::set<MachineInstr *>>> LoopStack;
487 std::vector<MachineInstr * > IfThenElseStack;
490 getHWInstrDesc(CF_CALL_FS));
493 std::vector<ClauseFile> FetchClauses, AluClauses;
494 std::vector<MachineInstr *> LastAlu(1);
495 std::vector<MachineInstr *> ToPopAfter;
499 if (
TII->usesTextureCache(*
I) ||
TII->usesVertexCache(*
I)) {
501 FetchClauses.push_back(MakeFetchClause(
MBB,
I));
503 LastAlu.back() =
nullptr;
508 if (
MI->getOpcode() != R600::ENDIF)
509 LastAlu.back() =
nullptr;
510 if (
MI->getOpcode() == R600::CF_ALU)
511 LastAlu.back() = &*
MI;
513 bool RequiresWorkAround =
514 CFStack.requiresWorkAroundForInst(
MI->getOpcode());
515 switch (
MI->getOpcode()) {
516 case R600::CF_ALU_PUSH_BEFORE:
517 if (RequiresWorkAround) {
519 <<
"Applying bug work-around for ALU_PUSH_BEFORE\n");
523 MI->setDesc(
TII->get(R600::CF_ALU));
525 CFStack.pushBranch(R600::CF_PUSH_EG);
527 CFStack.pushBranch(R600::CF_ALU_PUSH_BEFORE);
531 AluClauses.push_back(MakeALUClause(
MBB,
I));
535 case R600::WHILELOOP: {
538 getHWInstrDesc(CF_WHILE_LOOP))
540 std::pair<unsigned, std::set<MachineInstr *>> Pair(CfCount,
541 std::set<MachineInstr *>());
542 Pair.second.insert(MIb);
543 LoopStack.push_back(std::move(Pair));
544 MI->eraseFromParent();
548 case R600::ENDLOOP: {
550 std::pair<unsigned, std::set<MachineInstr *>> Pair =
551 std::move(LoopStack.back());
552 LoopStack.pop_back();
553 CounterPropagateAddr(Pair.second, CfCount);
556 MI->eraseFromParent();
560 case R600::IF_PREDICATE_SET: {
561 LastAlu.push_back(
nullptr);
563 getHWInstrDesc(CF_JUMP))
566 IfThenElseStack.push_back(MIb);
568 MI->eraseFromParent();
574 IfThenElseStack.pop_back();
575 CounterPropagateAddr(*JumpInst, CfCount);
577 getHWInstrDesc(CF_ELSE))
581 IfThenElseStack.push_back(MIb);
582 MI->eraseFromParent();
588 if (LastAlu.back()) {
589 ToPopAfter.push_back(LastAlu.back());
592 getHWInstrDesc(CF_POP))
601 IfThenElseStack.pop_back();
602 CounterPropagateAddr(*IfOrElseInst, CfCount);
605 MI->eraseFromParent();
611 getHWInstrDesc(CF_LOOP_BREAK))
613 LoopStack.back().second.insert(MIb);
614 MI->eraseFromParent();
617 case R600::CONTINUE: {
619 getHWInstrDesc(CF_LOOP_CONTINUE))
621 LoopStack.back().second.insert(MIb);
622 MI->eraseFromParent();
634 MI->eraseFromParent();
635 for (ClauseFile &CF : FetchClauses)
636 EmitFetchClause(
I,
DL, CF, CfCount);
637 for (ClauseFile &CF : AluClauses)
638 EmitALUClause(
I,
DL, CF, CfCount);
642 if (
TII->isExport(
MI->getOpcode())) {
651 TII->get(R600::CF_ALU_POP_AFTER))
652 .
addImm(Alu->getOperand(0).getImm())
653 .
addImm(Alu->getOperand(1).getImm())
654 .
addImm(Alu->getOperand(2).getImm())
655 .
addImm(Alu->getOperand(3).getImm())
656 .
addImm(Alu->getOperand(4).getImm())
657 .
addImm(Alu->getOperand(5).getImm())
658 .
addImm(Alu->getOperand(6).getImm())
659 .
addImm(Alu->getOperand(7).getImm())
660 .
addImm(Alu->getOperand(8).getImm());
661 Alu->eraseFromParent();
670 return "R600 Control Flow Finalizer Pass";
677 "R600 Control Flow Finalizer",
false,
false)
681char R600ControlFlowFinalizer::
ID = 0;
686 return new R600ControlFlowFinalizer();
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
const HexagonInstrInfo * TII
#define ENTRY(ASMNAME, ENUM)
unsigned const TargetRegisterInfo * TRI
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
R600 Control Flow Finalizer
Provides R600 specific target descriptions.
AMDGPU R600 specific subclass of TargetSubtarget.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isImm(const MachineOperand &MO, MachineRegisterInfo *MRI)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
FunctionPass class - This class is used to implement most global optimizations.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Describe properties that are true of each instruction in the target description file.
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
Instructions::iterator instr_iterator
instr_iterator instr_end()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
Representation of each machine instruction.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Register getReg() const
getReg - Returns the register number.
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Wrapper class representing virtual and physical registers.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
FunctionPass * createR600ControlFlowFinalizer()
char & R600ControlFlowFinalizerID
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
static unsigned getSubRegFromChannel(unsigned Channel)