24 #define DEBUG_TYPE "si-insert-skips" 27 "amdgpu-skip-threshold-legacy",
28 cl::desc(
"Number of instructions before jumping over divergent control flow"),
41 bool EarlyExitClearsExec =
false;
64 return "SI insert s_cbranch_execz instructions";
79 "SI insert s_cbranch_execz instructions",
false,
false)
87 if (
MI.isMetaInstruction())
91 switch (
MI.getOpcode()) {
92 case AMDGPU::SI_MASK_BRANCH:
101 unsigned NumInstr = 0;
119 if (
I->getOpcode() == AMDGPU::S_CBRANCH_VCCNZ ||
120 I->getOpcode() == AMDGPU::S_CBRANCH_VCCZ)
123 if (
TII->hasUnwantedEffectsWhenEXECEmpty(*
I))
127 if (
TII->isSMRD(*
I) ||
TII->isVMEM(*
I) ||
TII->isFLAT(*
I) ||
128 I->getOpcode() == AMDGPU::S_WAITCNT)
143 if (!MDT->dominates(&
MBB, Other))
173 if (!EarlyExitBlock) {
179 EarlyExitClearsExec =
false;
182 if (ClearExec && !EarlyExitClearsExec) {
184 unsigned Mov =
ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
185 Register Exec =
ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
186 auto ExitI = EarlyExitBlock->getFirstNonPHI();
187 BuildMI(*EarlyExitBlock, ExitI,
DL,
TII->get(Mov), Exec).addImm(0);
188 EarlyExitClearsExec =
true;
200 DTUpdates.
push_back({DomTreeT::Insert, SplitBB, Succ});
234 ensureEarlyExitBlock(
MBB,
false);
238 .addMBB(EarlyExitBlock);
242 if (Next !=
MBB.
end() && !Next->isTerminator())
246 MDT->getBase().insertEdge(&
MBB, EarlyExitBlock);
256 switch (
MI.getOpcode()) {
257 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR: {
262 switch (
MI.getOperand(2).getImm()) {
265 Opcode = AMDGPU::V_CMPX_EQ_F32_e64;
269 Opcode = AMDGPU::V_CMPX_LT_F32_e64;
273 Opcode = AMDGPU::V_CMPX_LE_F32_e64;
277 Opcode = AMDGPU::V_CMPX_GT_F32_e64;
281 Opcode = AMDGPU::V_CMPX_GE_F32_e64;
285 Opcode = AMDGPU::V_CMPX_LG_F32_e64;
288 Opcode = AMDGPU::V_CMPX_O_F32_e64;
291 Opcode = AMDGPU::V_CMPX_U_F32_e64;
294 Opcode = AMDGPU::V_CMPX_NLG_F32_e64;
297 Opcode = AMDGPU::V_CMPX_NGE_F32_e64;
300 Opcode = AMDGPU::V_CMPX_NGT_F32_e64;
303 Opcode = AMDGPU::V_CMPX_NLE_F32_e64;
306 Opcode = AMDGPU::V_CMPX_NLT_F32_e64;
309 Opcode = AMDGPU::V_CMPX_NEQ_F32_e64;
316 if (
ST.hasNoSdstCMPX())
322 MI.getOperand(0).getReg())) {
325 .
add(
MI.getOperand(1))
326 .
add(
MI.getOperand(0));
329 if (!
ST.hasNoSdstCMPX())
333 .add(
MI.getOperand(1))
335 .add(
MI.getOperand(0));
341 case AMDGPU::SI_KILL_I1_TERMINATOR: {
344 unsigned Exec =
ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
346 int64_t KillVal =
MI.getOperand(1).getImm();
347 assert(KillVal == 0 || KillVal == -1);
351 int64_t Imm =
Op.getImm();
352 assert(Imm == 0 || Imm == -1);
354 if (Imm == KillVal) {
356 : AMDGPU::S_MOV_B64), Exec)
363 unsigned Opcode = KillVal ? AMDGPU::S_ANDN2_B64 : AMDGPU::S_AND_B64;
365 Opcode = KillVal ? AMDGPU::S_ANDN2_B32 : AMDGPU::S_AND_B32;
380 ensureEarlyExitBlock(
MBB,
true);
383 .addMBB(EarlyExitBlock);
384 auto Next = std::next(
MI.getIterator());
386 if (Next !=
MBB.
end() && !Next->isTerminator())
390 MDT->getBase().insertEdge(&
MBB, EarlyExitBlock);
398 if (!shouldSkip(**SrcMBB.
succ_begin(), *DestBB))
404 BuildMI(SrcMBB, InsPt,
DL,
TII->get(AMDGPU::S_CBRANCH_EXECZ))
412 TII =
ST.getInstrInfo();
413 TRI = &
TII->getRegisterInfo();
414 MDT = &getAnalysis<MachineDominatorTree>();
419 bool MadeChange =
false;
427 switch (
MI.getOpcode()) {
428 case AMDGPU::SI_MASK_BRANCH:
429 MadeChange |= skipMaskBranch(
MI,
MBB);
432 case AMDGPU::S_BRANCH:
437 MI.eraseFromParent();
442 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
443 case AMDGPU::SI_KILL_I1_TERMINATOR: {
445 bool CanKill = kill(
MI);
458 dominatesAllReachable(
MBB)) {
463 MI.eraseFromParent();
468 case AMDGPU::SI_KILL_CLEANUP:
470 dominatesAllReachable(
MBB)) {
473 MI.eraseFromParent();
477 case AMDGPU::SI_EARLY_TERMINATE_SCC0:
491 Instr->eraseFromParent();
494 skipIfDead(*
Kill->getParent(), std::next(
Kill->getIterator()),
495 Kill->getDebugLoc());
496 Kill->eraseFromParent();
499 EarlyTermInstrs.clear();
500 EarlyExitBlock =
nullptr;
const MachineInstrBuilder & add(const MachineOperand &MO) const
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
AMDGPU specific subclass of TargetSubtarget.
This class represents lattice values for constants.
void push_back(const T &Elt)
char & SIInsertSkipsPassID
unsigned const TargetRegisterInfo * TRI
The last use of a register.
iterator_range< succ_iterator > successors()
Function & getFunction()
Return the LLVM function that this machine code represents.
static void splitBlock(MachineBasicBlock &MBB, MachineInstr &MI, MachineDominatorTree *MDT)
Value of the register doesn't matter.
AnalysisUsage & addRequired()
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
static cl::opt< unsigned > SkipThresholdFlag("amdgpu-skip-threshold-legacy", cl::desc("Number of instructions before jumping over divergent control flow"), cl::init(12), cl::Hidden)
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
Core dominator tree base class.
void applyUpdates(ArrayRef< UpdateType > Updates)
Inform the dominator tree about a sequence of CFG edge insertions and deletions and perform a batch u...
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
initializer< Ty > init(const Ty &Val)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Represent the analysis usage information of a pass.
static unsigned SkipThreshold
MachineBasicBlock * splitAt(MachineInstr &SplitInst, bool UpdateLiveIns=true, LiveIntervals *LIS=nullptr)
Split a basic block into 2 pieces at SplitPoint.
INITIALIZE_PASS_BEGIN(SIInsertSkips, DEBUG_TYPE, "SI insert s_cbranch_execz instructions", false, false) INITIALIZE_PASS_END(SIInsertSkips
self_iterator getIterator()
succ_iterator succ_begin()
Calling convention used for Mesa/AMDPAL geometry shaders.
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Iterator for intrusive lists based on ilist_node.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
BlockVerifier::State From
LLVM_READONLY int getVOPe32(uint16_t Opcode)
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
MachineOperand class - Representation of each machine instruction operand.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Calling convention used for Mesa/AMDPAL pixel shaders.
LLVM_READONLY int getVCMPXNoSDstOp(uint16_t Opcode)
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
Representation of each machine instruction.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
static bool opcodeEmitsNoInsts(const MachineInstr &MI)
iterator_range< df_iterator< T > > depth_first(const T &G)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void insert(iterator MBBI, MachineBasicBlock *MBB)
StringRef - Represent a constant reference to a string, i.e.
inst_range instructions(Function *F)
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
static void generateEndPgm(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, const SIInstrInfo *TII, bool IsPS)
MachineBasicBlock MachineBasicBlock::iterator MBBI
Wrapper class representing virtual and physical registers.
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.