63#define DEBUG_TYPE "si-lower-control-flow"
88 unsigned Andn2TermOpc;
91 unsigned OrSaveExecOpc;
94 bool EnableOptimizeEndCf =
false;
131 while (
I != End && !
I->isUnconditionalBranch())
137 void optimizeEndCf();
147 return "SI Lower control flow pseudo instructions";
163char SILowerControlFlow::ID = 0;
166 "SI lower control flow",
false,
false)
182 while (!Worklist.
empty()) {
197 Register SaveExecReg =
MI.getOperand(0).getReg();
198 auto U =
MRI->use_instr_nodbg_begin(SaveExecReg);
200 if (U ==
MRI->use_instr_nodbg_end() ||
201 std::next(U) !=
MRI->use_instr_nodbg_end() ||
202 U->getOpcode() != AMDGPU::SI_END_CF)
212 Register SaveExecReg =
MI.getOperand(0).getReg();
214 assert(
Cond.getSubReg() == AMDGPU::NoSubRegister);
227 auto UseMI =
MRI->use_instr_nodbg_begin(SaveExecReg);
233 Register CopyReg = SimpleIf ? SaveExecReg
234 :
MRI->createVirtualRegister(BoolRC);
239 LoweredIf.
insert(CopyReg);
250 setImpSCCDefDead(*
And,
true);
258 setImpSCCDefDead(*
Xor, ImpDefSCC.
isDead());
271 I = skipToUncondBrOrEnd(
MBB,
I);
276 .
add(
MI.getOperand(2));
279 MI.eraseFromParent();
295 MI.eraseFromParent();
317 Register SaveReg =
MRI->createVirtualRegister(BoolRC);
320 .
add(
MI.getOperand(1));
344 ElsePt = skipToUncondBrOrEnd(
MBB, ElsePt);
351 MI.eraseFromParent();
356 MI.eraseFromParent();
374 auto Dst =
MI.getOperand(0).getReg();
380 bool SkipAnding =
false;
381 if (
MI.getOperand(1).isReg()) {
383 SkipAnding =
Def->getParent() ==
MI.getParent()
392 Register AndReg =
MRI->createVirtualRegister(BoolRC);
395 .
add(
MI.getOperand(1));
400 .
add(
MI.getOperand(2));
405 .
add(
MI.getOperand(1))
406 .
add(
MI.getOperand(2));
419 MI.eraseFromParent();
429 .
add(
MI.getOperand(0));
431 auto BranchPt = skipToUncondBrOrEnd(
MBB,
MI.getIterator());
434 .
add(
MI.getOperand(1));
441 MI.eraseFromParent();
445SILowerControlFlow::skipIgnoreExecInstsTrivialSucc(
455 for ( ; It !=
E; ++It) {
456 if (
TII->mayReadEXEC(*
MRI, *It))
463 if (
B->succ_size() != 1)
483 bool NeedBlockSplit =
false;
487 if (
I->modifiesRegister(DataReg,
TRI)) {
488 NeedBlockSplit =
true;
493 unsigned Opcode = OrOpc;
495 if (NeedBlockSplit) {
497 if (MDT && SplitBB != &
MBB) {
512 .
add(
MI.getOperand(0));
516 if (SplitBB != &
MBB) {
522 if (
Op.isReg() &&
Op.isDef() &&
Op.getReg().isVirtual())
527 for (
unsigned i = 0, e =
MRI->getNumVirtRegs(); i != e; ++i) {
532 VI.AliveBlocks.set(SplitBB->getNumber());
535 if (
Kill->getParent() == SplitBB && !SplitDefs.
contains(Reg))
543 LoweredEndCf.
insert(NewMI);
548 MI.eraseFromParent();
557void SILowerControlFlow::findMaskOperands(
MachineInstr &
MI,
unsigned OpNo,
560 if (!
Op.isReg() || !
Op.getReg().isVirtual()) {
566 if (!Def ||
Def->getParent() !=
MI.getParent() ||
567 !(
Def->isFullCopy() || (
Def->getOpcode() ==
MI.getOpcode())))
573 for (
auto I =
Def->getIterator();
I !=
MI.getIterator(); ++
I)
574 if (
I->modifiesRegister(AMDGPU::EXEC,
TRI) &&
575 !(
I->isCopy() &&
I->getOperand(0).getReg() != Exec))
578 for (
const auto &
SrcOp :
Def->explicit_operands())
581 Src.push_back(
SrcOp);
589 assert(
MI.getNumExplicitOperands() == 3);
591 unsigned OpToReplace = 1;
592 findMaskOperands(
MI, 1, Ops);
593 if (Ops.
size() == 1) OpToReplace = 2;
594 findMaskOperands(
MI, 2, Ops);
595 if (Ops.
size() != 3)
return;
597 unsigned UniqueOpndIdx;
598 if (Ops[0].isIdenticalTo(Ops[1])) UniqueOpndIdx = 2;
599 else if (Ops[0].isIdenticalTo(Ops[2])) UniqueOpndIdx = 1;
600 else if (Ops[1].isIdenticalTo(Ops[2])) UniqueOpndIdx = 1;
604 MI.removeOperand(OpToReplace);
605 MI.addOperand(Ops[UniqueOpndIdx]);
606 if (
MRI->use_empty(Reg))
607 MRI->getUniqueVRegDef(Reg)->eraseFromParent();
610void SILowerControlFlow::optimizeEndCf() {
613 if (!EnableOptimizeEndCf)
619 skipIgnoreExecInstsTrivialSucc(
MBB, std::next(
MI->getIterator()));
620 if (Next ==
MBB.
end() || !LoweredEndCf.
count(&*Next))
625 =
TII->getNamedOperand(*Next, AMDGPU::OpName::src1)->getReg();
629 if (Def && LoweredIf.
count(SavedExec)) {
635 Reg =
TII->getNamedOperand(*
MI, AMDGPU::OpName::src1)->getReg();
636 MI->eraseFromParent();
639 removeMBBifRedundant(
MBB);
651 switch (
MI.getOpcode()) {
656 case AMDGPU::SI_ELSE:
660 case AMDGPU::SI_IF_BREAK:
664 case AMDGPU::SI_LOOP:
668 case AMDGPU::SI_WATERFALL_LOOP:
669 MI.setDesc(
TII->get(AMDGPU::S_CBRANCH_EXECNZ));
672 case AMDGPU::SI_END_CF:
673 SplitBB = emitEndCf(
MI);
677 assert(
false &&
"Attempt to process unsupported instruction");
686 case AMDGPU::S_AND_B64:
687 case AMDGPU::S_OR_B64:
688 case AMDGPU::S_AND_B32:
689 case AMDGPU::S_OR_B32:
691 combineMasks(MaskMI);
706 bool IsWave32 =
ST.isWave32();
708 if (
MI.getOpcode() == AMDGPU::SI_INIT_EXEC) {
711 TII->get(IsWave32 ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64), Exec)
715 MI.eraseFromParent();
726 Register InputReg =
MI.getOperand(0).getReg();
732 if (DefInstr != FirstMI) {
750 Register CountReg =
MRI->createVirtualRegister(&AMDGPU::SGPR_32RegClass);
751 auto BfeMI =
BuildMI(*
MBB, FirstMI,
DL,
TII->get(AMDGPU::S_BFE_U32), CountReg)
753 .
addImm((
MI.getOperand(1).getImm() & Mask) | 0x70000);
758 TII->get(IsWave32 ? AMDGPU::S_BFM_B32 : AMDGPU::S_BFM_B64), Exec)
761 auto CmpMI =
BuildMI(*
MBB, FirstMI,
DL,
TII->get(AMDGPU::S_CMP_EQ_U32))
768 TII->get(IsWave32 ? AMDGPU::S_CMOV_B32 : AMDGPU::S_CMOV_B64),
773 MI.eraseFromParent();
778 MI.eraseFromParent();
792 if (!
I.isDebugInstr() && !
I.isUnconditionalBranch())
803 if (
P->getFallThrough() == &
MBB)
805 P->ReplaceUsesOfBlockWith(&
MBB, Succ);
827 MF->
splice(std::next(FallThroughPos), Succ);
839 TII =
ST.getInstrInfo();
840 TRI = &
TII->getRegisterInfo();
841 EnableOptimizeEndCf =
845 LIS = getAnalysisIfAvailable<LiveIntervals>();
847 LV = getAnalysisIfAvailable<LiveVariables>();
848 MDT = getAnalysisIfAvailable<MachineDominatorTree>();
850 BoolRC =
TRI->getBoolRC();
853 AndOpc = AMDGPU::S_AND_B32;
854 OrOpc = AMDGPU::S_OR_B32;
855 XorOpc = AMDGPU::S_XOR_B32;
856 MovTermOpc = AMDGPU::S_MOV_B32_term;
857 Andn2TermOpc = AMDGPU::S_ANDN2_B32_term;
858 XorTermrOpc = AMDGPU::S_XOR_B32_term;
859 OrTermrOpc = AMDGPU::S_OR_B32_term;
860 OrSaveExecOpc = AMDGPU::S_OR_SAVEEXEC_B32;
861 Exec = AMDGPU::EXEC_LO;
863 AndOpc = AMDGPU::S_AND_B64;
864 OrOpc = AMDGPU::S_OR_B64;
865 XorOpc = AMDGPU::S_XOR_B64;
866 MovTermOpc = AMDGPU::S_MOV_B64_term;
867 Andn2TermOpc = AMDGPU::S_ANDN2_B64_term;
868 XorTermrOpc = AMDGPU::S_XOR_B64_term;
869 OrTermrOpc = AMDGPU::S_OR_B64_term;
870 OrSaveExecOpc = AMDGPU::S_OR_SAVEEXEC_B64;
875 const bool CanDemote =
877 for (
auto &
MBB : MF) {
878 bool IsKillBlock =
false;
880 if (
TII->isKillTerminator(
Term.getOpcode())) {
886 if (CanDemote && !IsKillBlock) {
887 for (
auto &
MI :
MBB) {
888 if (
MI.getOpcode() == AMDGPU::SI_DEMOTE_I1) {
896 bool Changed =
false;
899 BI != MF.end(); BI = NextBB) {
900 NextBB = std::next(BI);
910 switch (
MI.getOpcode()) {
912 case AMDGPU::SI_ELSE:
913 case AMDGPU::SI_IF_BREAK:
914 case AMDGPU::SI_WATERFALL_LOOP:
915 case AMDGPU::SI_LOOP:
916 case AMDGPU::SI_END_CF:
917 SplitMBB = process(
MI);
922 case AMDGPU::SI_INIT_EXEC:
923 case AMDGPU::SI_INIT_EXEC_FROM_INPUT:
924 lowerInitExec(
MBB,
MI);
934 if (SplitMBB !=
MBB) {
935 MBB = Next->getParent();
943 LoweredEndCf.
clear();
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Provides AMDGPU specific target descriptions.
SmallVector< MachineOperand, 4 > Cond
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
unsigned const TargetRegisterInfo * TRI
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
static cl::opt< bool > RemoveRedundantEndcf("amdgpu-remove-redundant-endcf", cl::init(true), cl::ReallyHidden)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isSimpleIf(const MachineInstr &MI, const MachineRegisterInfo *MRI)
This file defines the SmallSet class.
Represent the analysis usage information of a pass.
AnalysisUsage & addPreservedID(const void *ID)
AnalysisUsage & addUsedIfAvailable()
Add the specified Pass class to the set of analyses used by this pass.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Implements a dense probed hash-table based set.
Base class for the actual dominator tree node.
DomTreeNodeBase * getIDom() const
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
void removeAllRegUnitsForPhysReg(MCRegister Reg)
Remove associated live ranges for the register units associated with Reg.
SlotIndex InsertMachineInstrInMaps(MachineInstr &MI)
void handleMove(MachineInstr &MI, bool UpdateFlags=false)
Call this method to notify LiveIntervals that instruction MI has been moved within a basic block.
void RemoveMachineInstrFromMaps(MachineInstr &MI)
void removeInterval(Register Reg)
Interval removal.
LiveInterval & createAndComputeVirtRegInterval(Register Reg)
SlotIndex ReplaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
void replaceKillInstruction(Register Reg, MachineInstr &OldMI, MachineInstr &NewMI)
replaceKillInstruction - Update register kill info by replacing a kill instruction with a new one.
void recomputeForSingleDefVirtReg(Register Reg)
Recompute liveness from scratch for a virtual register Reg that is known to have a single def that do...
VarInfo & getVarInfo(Register Reg)
getVarInfo - Return the VarInfo structure for the specified VIRTUAL register.
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
bool canFallThrough()
Return true if the block can implicitly transfer control to the block after it by falling off the end...
succ_iterator succ_begin()
unsigned succ_size() const
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
pred_iterator pred_begin()
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
MachineBasicBlock * splitAt(MachineInstr &SplitInst, bool UpdateLiveIns=true, LiveIntervals *LIS=nullptr)
Split a basic block into 2 pieces at SplitPoint.
void eraseFromParent()
This method unlinks 'this' from the containing function and deletes it.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< iterator > terminators()
DebugLoc findBranchDebugLoc()
Find and return the merged DebugLoc of the branch instructions of the block.
iterator_range< succ_iterator > successors()
iterator_range< pred_iterator > predecessors()
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineDomTreeNode * addNewBlock(MachineBasicBlock *BB, MachineBasicBlock *DomBB)
addNewBlock - Add a new node to the dominator tree information.
MachineDomTreeNode * getNode(MachineBasicBlock *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
void eraseNode(MachineBasicBlock *BB)
eraseNode - Removes a node from the dominator tree.
bool dominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
void changeImmediateDominator(MachineBasicBlock *N, MachineBasicBlock *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
void splice(iterator InsertPt, iterator MBBI)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
MachineInstr * removeFromParent()
Unlink 'this' from the containing basic block, and return it without deleting it.
const MachineBasicBlock * getParent() const
MachineOperand class - Representation of each machine instruction operand.
void setIsDead(bool Val=true)
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Wrapper class representing virtual and physical registers.
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
static bool isVALU(const MachineInstr &MI)
A vector that has set insertion semantics.
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
void clear()
Completely clear the SetVector.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
bool contains(const T &V) const
Check if the SmallSet contains the given element.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
CodeGenOpt::Level getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
std::pair< iterator, bool > insert(const ValueT &V)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
self_iterator getIterator()
constexpr char WavefrontSize[]
Key for Kernel::CodeProps::Metadata::mWavefrontSize.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ Kill
The last use of a register.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
auto reverse(ContainerTy &&C)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
@ Or
Bitwise or logical OR of integers.
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
char & LiveVariablesID
LiveVariables pass - This pass computes the set of blocks in which each variable is life and sets mac...
char & SILowerControlFlowID
VarInfo - This represents the regions where a virtual register is live in the program.
std::vector< MachineInstr * > Kills
Kills - List of MachineInstruction's which are the last use of this virtual register (kill it) in the...