63#define DEBUG_TYPE "si-lower-control-flow"
88 unsigned Andn2TermOpc;
91 unsigned OrSaveExecOpc;
94 bool EnableOptimizeEndCf =
false;
131 while (
I !=
End && !
I->isUnconditionalBranch())
137 void optimizeEndCf();
147 return "SI Lower control flow pseudo instructions";
163char SILowerControlFlow::ID = 0;
166 "SI lower control flow",
false,
false)
182 while (!Worklist.
empty()) {
197 Register SaveExecReg =
MI.getOperand(0).getReg();
198 auto U =
MRI->use_instr_nodbg_begin(SaveExecReg);
200 if (U ==
MRI->use_instr_nodbg_end() ||
201 std::next(U) !=
MRI->use_instr_nodbg_end() ||
202 U->getOpcode() != AMDGPU::SI_END_CF)
212 Register SaveExecReg =
MI.getOperand(0).getReg();
214 assert(
Cond.getSubReg() == AMDGPU::NoSubRegister);
227 auto UseMI =
MRI->use_instr_nodbg_begin(SaveExecReg);
233 Register CopyReg = SimpleIf ? SaveExecReg
234 :
MRI->createVirtualRegister(BoolRC);
239 LoweredIf.
insert(CopyReg);
250 setImpSCCDefDead(*
And,
true);
258 setImpSCCDefDead(*
Xor, ImpDefSCC.
isDead());
271 I = skipToUncondBrOrEnd(
MBB,
I);
276 .
add(
MI.getOperand(2));
279 MI.eraseFromParent();
295 MI.eraseFromParent();
317 Register SaveReg =
MRI->createVirtualRegister(BoolRC);
320 .
add(
MI.getOperand(1));
344 ElsePt = skipToUncondBrOrEnd(
MBB, ElsePt);
351 MI.eraseFromParent();
356 MI.eraseFromParent();
374 auto Dst =
MI.getOperand(0).getReg();
380 bool SkipAnding =
false;
381 if (
MI.getOperand(1).isReg()) {
383 SkipAnding =
Def->getParent() ==
MI.getParent()
392 Register AndReg =
MRI->createVirtualRegister(BoolRC);
395 .
add(
MI.getOperand(1));
400 .
add(
MI.getOperand(2));
405 .
add(
MI.getOperand(1))
406 .
add(
MI.getOperand(2));
419 MI.eraseFromParent();
429 .
add(
MI.getOperand(0));
433 auto BranchPt = skipToUncondBrOrEnd(
MBB,
MI.getIterator());
436 .
add(
MI.getOperand(1));
443 MI.eraseFromParent();
447SILowerControlFlow::skipIgnoreExecInstsTrivialSucc(
457 for ( ; It !=
E; ++It) {
458 if (
TII->mayReadEXEC(*
MRI, *It))
465 if (
B->succ_size() != 1)
485 bool NeedBlockSplit =
false;
489 if (
I->modifiesRegister(DataReg,
TRI)) {
490 NeedBlockSplit =
true;
495 unsigned Opcode = OrOpc;
497 if (NeedBlockSplit) {
499 if (MDT && SplitBB != &
MBB) {
514 .
add(
MI.getOperand(0));
518 if (SplitBB != &
MBB) {
528 if (
Op.isReg() &&
Op.isDef() &&
Op.getReg().isVirtual())
534 for (
unsigned i = 0, e =
MRI->getNumVirtRegs(); i != e; ++i) {
542 if (
Kill->getParent() == SplitBB && !DefInOrigBlock.
contains(Reg))
550 LoweredEndCf.
insert(NewMI);
555 MI.eraseFromParent();
564void SILowerControlFlow::findMaskOperands(
MachineInstr &
MI,
unsigned OpNo,
567 if (!
Op.isReg() || !
Op.getReg().isVirtual()) {
573 if (!Def ||
Def->getParent() !=
MI.getParent() ||
574 !(
Def->isFullCopy() || (
Def->getOpcode() ==
MI.getOpcode())))
580 for (
auto I =
Def->getIterator();
I !=
MI.getIterator(); ++
I)
581 if (
I->modifiesRegister(AMDGPU::EXEC,
TRI) &&
582 !(
I->isCopy() &&
I->getOperand(0).getReg() != Exec))
585 for (
const auto &
SrcOp :
Def->explicit_operands())
588 Src.push_back(
SrcOp);
596 assert(
MI.getNumExplicitOperands() == 3);
598 unsigned OpToReplace = 1;
599 findMaskOperands(
MI, 1, Ops);
600 if (Ops.
size() == 1) OpToReplace = 2;
601 findMaskOperands(
MI, 2, Ops);
602 if (Ops.
size() != 3)
return;
604 unsigned UniqueOpndIdx;
605 if (Ops[0].isIdenticalTo(Ops[1])) UniqueOpndIdx = 2;
606 else if (Ops[0].isIdenticalTo(Ops[2])) UniqueOpndIdx = 1;
607 else if (Ops[1].isIdenticalTo(Ops[2])) UniqueOpndIdx = 1;
611 MI.removeOperand(OpToReplace);
612 MI.addOperand(Ops[UniqueOpndIdx]);
613 if (
MRI->use_empty(Reg))
614 MRI->getUniqueVRegDef(Reg)->eraseFromParent();
617void SILowerControlFlow::optimizeEndCf() {
620 if (!EnableOptimizeEndCf)
626 skipIgnoreExecInstsTrivialSucc(
MBB, std::next(
MI->getIterator()));
627 if (Next ==
MBB.
end() || !LoweredEndCf.
count(&*Next))
632 =
TII->getNamedOperand(*Next, AMDGPU::OpName::src1)->getReg();
636 if (Def && LoweredIf.
count(SavedExec)) {
642 Reg =
TII->getNamedOperand(*
MI, AMDGPU::OpName::src1)->getReg();
643 MI->eraseFromParent();
646 removeMBBifRedundant(
MBB);
658 switch (
MI.getOpcode()) {
663 case AMDGPU::SI_ELSE:
667 case AMDGPU::SI_IF_BREAK:
671 case AMDGPU::SI_LOOP:
675 case AMDGPU::SI_WATERFALL_LOOP:
676 MI.setDesc(
TII->get(AMDGPU::S_CBRANCH_EXECNZ));
679 case AMDGPU::SI_END_CF:
680 SplitBB = emitEndCf(
MI);
684 assert(
false &&
"Attempt to process unsupported instruction");
693 case AMDGPU::S_AND_B64:
694 case AMDGPU::S_OR_B64:
695 case AMDGPU::S_AND_B32:
696 case AMDGPU::S_OR_B32:
698 combineMasks(MaskMI);
713 bool IsWave32 =
ST.isWave32();
715 if (
MI.getOpcode() == AMDGPU::SI_INIT_EXEC) {
718 TII->get(IsWave32 ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64), Exec)
722 MI.eraseFromParent();
733 Register InputReg =
MI.getOperand(0).getReg();
739 if (DefInstr != FirstMI) {
757 Register CountReg =
MRI->createVirtualRegister(&AMDGPU::SGPR_32RegClass);
758 auto BfeMI =
BuildMI(*
MBB, FirstMI,
DL,
TII->get(AMDGPU::S_BFE_U32), CountReg)
760 .
addImm((
MI.getOperand(1).getImm() & Mask) | 0x70000);
765 TII->get(IsWave32 ? AMDGPU::S_BFM_B32 : AMDGPU::S_BFM_B64), Exec)
768 auto CmpMI =
BuildMI(*
MBB, FirstMI,
DL,
TII->get(AMDGPU::S_CMP_EQ_U32))
775 TII->get(IsWave32 ? AMDGPU::S_CMOV_B32 : AMDGPU::S_CMOV_B64),
780 MI.eraseFromParent();
785 MI.eraseFromParent();
799 if (!
I.isDebugInstr() && !
I.isUnconditionalBranch())
810 if (
P->getFallThrough() == &
MBB)
812 P->ReplaceUsesOfBlockWith(&
MBB, Succ);
834 MF->
splice(std::next(FallThroughPos), Succ);
846 TII =
ST.getInstrInfo();
847 TRI = &
TII->getRegisterInfo();
848 EnableOptimizeEndCf =
852 LIS = getAnalysisIfAvailable<LiveIntervals>();
854 LV = getAnalysisIfAvailable<LiveVariables>();
855 MDT = getAnalysisIfAvailable<MachineDominatorTree>();
857 BoolRC =
TRI->getBoolRC();
860 AndOpc = AMDGPU::S_AND_B32;
861 OrOpc = AMDGPU::S_OR_B32;
862 XorOpc = AMDGPU::S_XOR_B32;
863 MovTermOpc = AMDGPU::S_MOV_B32_term;
864 Andn2TermOpc = AMDGPU::S_ANDN2_B32_term;
865 XorTermrOpc = AMDGPU::S_XOR_B32_term;
866 OrTermrOpc = AMDGPU::S_OR_B32_term;
867 OrSaveExecOpc = AMDGPU::S_OR_SAVEEXEC_B32;
868 Exec = AMDGPU::EXEC_LO;
870 AndOpc = AMDGPU::S_AND_B64;
871 OrOpc = AMDGPU::S_OR_B64;
872 XorOpc = AMDGPU::S_XOR_B64;
873 MovTermOpc = AMDGPU::S_MOV_B64_term;
874 Andn2TermOpc = AMDGPU::S_ANDN2_B64_term;
875 XorTermrOpc = AMDGPU::S_XOR_B64_term;
876 OrTermrOpc = AMDGPU::S_OR_B64_term;
877 OrSaveExecOpc = AMDGPU::S_OR_SAVEEXEC_B64;
882 const bool CanDemote =
884 for (
auto &
MBB : MF) {
885 bool IsKillBlock =
false;
887 if (
TII->isKillTerminator(
Term.getOpcode())) {
893 if (CanDemote && !IsKillBlock) {
894 for (
auto &
MI :
MBB) {
895 if (
MI.getOpcode() == AMDGPU::SI_DEMOTE_I1) {
903 bool Changed =
false;
906 BI != MF.end(); BI = NextBB) {
907 NextBB = std::next(BI);
917 switch (
MI.getOpcode()) {
919 case AMDGPU::SI_ELSE:
920 case AMDGPU::SI_IF_BREAK:
921 case AMDGPU::SI_WATERFALL_LOOP:
922 case AMDGPU::SI_LOOP:
923 case AMDGPU::SI_END_CF:
924 SplitMBB = process(
MI);
929 case AMDGPU::SI_INIT_EXEC:
930 case AMDGPU::SI_INIT_EXEC_FROM_INPUT:
931 lowerInitExec(
MBB,
MI);
941 if (SplitMBB !=
MBB) {
942 MBB = Next->getParent();
950 LoweredEndCf.
clear();
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Provides AMDGPU specific target descriptions.
SmallVector< MachineOperand, 4 > Cond
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
unsigned const TargetRegisterInfo * TRI
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
static cl::opt< bool > RemoveRedundantEndcf("amdgpu-remove-redundant-endcf", cl::init(true), cl::ReallyHidden)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isSimpleIf(const MachineInstr &MI, const MachineRegisterInfo *MRI)
This file defines the SmallSet class.
Represent the analysis usage information of a pass.
AnalysisUsage & addPreservedID(const void *ID)
AnalysisUsage & addUsedIfAvailable()
Add the specified Pass class to the set of analyses used by this pass.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Implements a dense probed hash-table based set.
Base class for the actual dominator tree node.
DomTreeNodeBase * getIDom() const
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
void removeAllRegUnitsForPhysReg(MCRegister Reg)
Remove associated live ranges for the register units associated with Reg.
SlotIndex InsertMachineInstrInMaps(MachineInstr &MI)
void handleMove(MachineInstr &MI, bool UpdateFlags=false)
Call this method to notify LiveIntervals that instruction MI has been moved within a basic block.
void RemoveMachineInstrFromMaps(MachineInstr &MI)
void removeInterval(Register Reg)
Interval removal.
LiveInterval & createAndComputeVirtRegInterval(Register Reg)
SlotIndex ReplaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
void replaceKillInstruction(Register Reg, MachineInstr &OldMI, MachineInstr &NewMI)
replaceKillInstruction - Update register kill info by replacing a kill instruction with a new one.
void recomputeForSingleDefVirtReg(Register Reg)
Recompute liveness from scratch for a virtual register Reg that is known to have a single def that do...
VarInfo & getVarInfo(Register Reg)
getVarInfo - Return the VarInfo structure for the specified VIRTUAL register.
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
bool canFallThrough()
Return true if the block can implicitly transfer control to the block after it by falling off the end...
succ_iterator succ_begin()
unsigned succ_size() const
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
pred_iterator pred_begin()
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
MachineBasicBlock * splitAt(MachineInstr &SplitInst, bool UpdateLiveIns=true, LiveIntervals *LIS=nullptr)
Split a basic block into 2 pieces at SplitPoint.
void eraseFromParent()
This method unlinks 'this' from the containing function and deletes it.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< iterator > terminators()
DebugLoc findBranchDebugLoc()
Find and return the merged DebugLoc of the branch instructions of the block.
iterator_range< succ_iterator > successors()
iterator_range< pred_iterator > predecessors()
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineDomTreeNode * addNewBlock(MachineBasicBlock *BB, MachineBasicBlock *DomBB)
addNewBlock - Add a new node to the dominator tree information.
MachineDomTreeNode * getNode(MachineBasicBlock *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
void eraseNode(MachineBasicBlock *BB)
eraseNode - Removes a node from the dominator tree.
bool dominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
void changeImmediateDominator(MachineBasicBlock *N, MachineBasicBlock *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
void splice(iterator InsertPt, iterator MBBI)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
MachineInstr * removeFromParent()
Unlink 'this' from the containing basic block, and return it without deleting it.
const MachineBasicBlock * getParent() const
MachineOperand class - Representation of each machine instruction operand.
void setIsDead(bool Val=true)
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Wrapper class representing virtual and physical registers.
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
static bool isVALU(const MachineInstr &MI)
A vector that has set insertion semantics.
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
void clear()
Completely clear the SetVector.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
bool contains(const T &V) const
Check if the SmallSet contains the given element.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
CodeGenOpt::Level getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
std::pair< iterator, bool > insert(const ValueT &V)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
self_iterator getIterator()
constexpr char WavefrontSize[]
Key for Kernel::CodeProps::Metadata::mWavefrontSize.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ Kill
The last use of a register.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
auto reverse(ContainerTy &&C)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
@ Or
Bitwise or logical OR of integers.
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
char & LiveVariablesID
LiveVariables pass - This pass computes the set of blocks in which each variable is life and sets mac...
char & SILowerControlFlowID
VarInfo - This represents the regions where a virtual register is live in the program.
std::vector< MachineInstr * > Kills
Kills - List of MachineInstruction's which are the last use of this virtual register (kill it) in the...