69 static const unsigned SkipThreshold = 12;
92 void computeIndirectRegAndOffset(
unsigned VecReg,
unsigned &
Reg,
int &Offset);
102 const char *getPassName()
const override {
103 return "SI Lower control flow instructions";
113 return new SILowerControlFlowPass(tm);
119 unsigned NumInstr = 0;
122 MBB = *MBB->succ_begin()) {
125 NumInstr < SkipThreshold &&
I != E; ++
I) {
127 if (
I->isBundle() || !
I->isBundled())
128 if (++NumInstr >= SkipThreshold)
161 BuildMI(MBB, Insert, DL,
TII->get(AMDGPU::S_CBRANCH_EXECNZ))
166 BuildMI(MBB, Insert, DL,
TII->get(AMDGPU::EXP))
178 BuildMI(MBB, Insert, DL,
TII->get(AMDGPU::S_ENDPGM));
187 BuildMI(MBB, &MI, DL,
TII->get(AMDGPU::S_AND_SAVEEXEC_B64),
Reg)
191 .addReg(AMDGPU::EXEC)
206 TII->get(AMDGPU::S_OR_SAVEEXEC_B64), Dst)
209 BuildMI(MBB, &MI, DL,
TII->get(AMDGPU::S_XOR_B64), AMDGPU::EXEC)
210 .addReg(AMDGPU::EXEC)
225 BuildMI(MBB, &MI, DL,
TII->get(AMDGPU::S_OR_B64), Dst)
226 .addReg(AMDGPU::EXEC)
232 void SILowerControlFlowPass::IfBreak(
MachineInstr &MI) {
240 BuildMI(MBB, &MI, DL,
TII->get(AMDGPU::S_OR_B64), Dst)
247 void SILowerControlFlowPass::ElseBreak(
MachineInstr &MI) {
255 BuildMI(MBB, &MI, DL,
TII->get(AMDGPU::S_OR_B64), Dst)
267 BuildMI(MBB, &MI, DL,
TII->get(AMDGPU::S_ANDN2_B64), AMDGPU::EXEC)
268 .addReg(AMDGPU::EXEC)
271 BuildMI(MBB, &MI, DL,
TII->get(AMDGPU::S_CBRANCH_EXECNZ))
273 .addReg(AMDGPU::EXEC);
284 TII->get(AMDGPU::S_OR_B64), AMDGPU::EXEC)
285 .addReg(AMDGPU::EXEC)
314 if (Op.
getImm() & 0x80000000) {
315 BuildMI(MBB, &MI, DL,
TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
319 BuildMI(MBB, &MI, DL,
TII->get(AMDGPU::V_CMPX_LE_F32_e32), AMDGPU::VCC)
336 if (AMDGPU::SReg_32RegClass.
contains(Idx)) {
338 BuildMI(MBB, &MI, DL,
TII->get(AMDGPU::S_ADD_I32), AMDGPU::M0)
342 BuildMI(MBB, &MI, DL,
TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
348 assert(AMDGPU::SReg_64RegClass.
contains(Save));
349 assert(AMDGPU::VGPR_32RegClass.
contains(Idx));
352 BuildMI(MBB, &MI, DL,
TII->get(AMDGPU::S_MOV_B64), Save)
353 .addReg(AMDGPU::EXEC);
356 BuildMI(MBB, &MI, DL,
TII->get(AMDGPU::V_READFIRSTLANE_B32),
361 BuildMI(MBB, &MI, DL,
TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
362 .addReg(AMDGPU::VCC_LO);
365 BuildMI(MBB, &MI, DL,
TII->get(AMDGPU::V_CMP_EQ_U32_e32), AMDGPU::VCC)
370 BuildMI(MBB, &MI, DL,
TII->get(AMDGPU::S_AND_SAVEEXEC_B64), AMDGPU::VCC)
371 .addReg(AMDGPU::VCC);
374 BuildMI(MBB, &MI, DL,
TII->get(AMDGPU::S_ADD_I32), AMDGPU::M0)
382 BuildMI(MBB, &MI, DL,
TII->get(AMDGPU::S_XOR_B64), AMDGPU::EXEC)
383 .addReg(AMDGPU::EXEC)
387 BuildMI(MBB, &MI, DL,
TII->get(AMDGPU::S_CBRANCH_EXECNZ))
392 BuildMI(MBB, &MI, DL,
TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
406 void SILowerControlFlowPass::computeIndirectRegAndOffset(
unsigned VecReg,
409 unsigned SubReg = TRI->getSubReg(VecReg, AMDGPU::sub0);
414 int RegIdx = TRI->getHWRegIndex(SubReg) + Offset;
426 void SILowerControlFlowPass::IndirectSrc(
MachineInstr &MI) {
436 computeIndirectRegAndOffset(Vec, Reg, Off);
444 LoadM0(MI, MovRel, Off);
447 void SILowerControlFlowPass::IndirectDst(
MachineInstr &MI) {
457 computeIndirectRegAndOffset(Dst, Reg, Off);
466 LoadM0(MI, MovRel, Off);
469 bool SILowerControlFlowPass::runOnMachineFunction(
MachineFunction &MF) {
475 bool HaveKill =
false;
476 bool NeedWQM =
false;
477 bool NeedFlat =
false;
485 for (I = MBB.
begin(); I != MBB.
end(); I = Next) {
503 case AMDGPU::SI_ELSE:
507 case AMDGPU::SI_BREAK:
511 case AMDGPU::SI_IF_BREAK:
515 case AMDGPU::SI_ELSE_BREAK:
519 case AMDGPU::SI_LOOP:
524 case AMDGPU::SI_END_CF:
525 if (--Depth == 0 && HaveKill) {
532 case AMDGPU::SI_KILL:
540 case AMDGPU::S_BRANCH:
544 case AMDGPU::SI_INDIRECT_SRC:
548 case AMDGPU::SI_INDIRECT_DST_V1:
549 case AMDGPU::SI_INDIRECT_DST_V2:
550 case AMDGPU::SI_INDIRECT_DST_V4:
551 case AMDGPU::SI_INDIRECT_DST_V8:
552 case AMDGPU::SI_INDIRECT_DST_V16:
562 AMDGPU::EXEC).addReg(AMDGPU::EXEC);
580 uint64_t StackOffset = IndirectBegin < 0 ? 0 : (4 * IndirectBegin / 256);
582 assert((StackSizeBytes < 0xffff) && StackOffset < 0xffff &&
583 "Stack limits should be smaller than 16-bits");
596 BuildMI(MBB, Start, NoDL, SMovK, AMDGPU::FLAT_SCR_LO)
600 BuildMI(MBB, Start, NoDL, SMovK, AMDGPU::FLAT_SCR_HI)
const MachineFunction * getParent() const
getParent - Return the MachineFunction containing this basic block.
AMDGPU specific subclass of TargetSubtarget.
unsigned getRegister(unsigned i) const
getRegister - Return the specified register in the class.
MachineBasicBlock * getMBB() const
Describe properties that are true of each instruction in the target description file.
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
NodeTy * getNextNode()
Get the next node, or 0 for the list tail.
unsigned getShaderType() const
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
Reg
All possible values of the reg field in the ModR/M byte.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
const MachineInstrBuilder & addImm(int64_t Val) const
addImm - Add a new immediate operand.
Number of individual test Apply this number of consecutive mutations to each input If
const MachineBasicBlock & front() const
FunctionPass * createSILowerControlFlowPass(TargetMachine &tm)
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
bundle_iterator< MachineInstr, instr_iterator > iterator
This file contains the declarations for the subclasses of Constant, which represent the different fla...
const MachineOperand & getOperand(unsigned i) const
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
FunctionPass class - This class is used to implement most global optimizations.
MachineInstrBuilder BuildMI(MachineFunction &MF, DebugLoc DL, const MCInstrDesc &MCID)
BuildMI - Builder interface.
succ_iterator succ_begin()
MachineOperand class - Representation of each machine instruction operand.
MachineFrameInfo * getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Representation of each machine instruction.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Interface definition for SIInstrInfo.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
bool isInt< 16 >(int64_t x)
unsigned getReg() const
getReg - Returns the register number.
virtual const TargetInstrInfo * getInstrInfo() const
const MachineInstrBuilder & addOperand(const MachineOperand &MO) const
BasicBlockListType::iterator iterator
Primary interface to the complete machine description for the target machine.
const MachineBasicBlock & back() const
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
addReg - Add a new virtual register operand...