66#define DEBUG_TYPE "si-lower-control-flow"
74class SILowerControlFlow {
91 bool EnableOptimizeEndCf =
false;
126 while (
I != End && !
I->isUnconditionalBranch())
132 void optimizeEndCf();
135 SILowerControlFlow(
const GCNSubtarget *ST, LiveIntervals *LIS,
136 LiveVariables *LV, MachineDominatorTree *MDT,
137 MachinePostDominatorTree *PDT)
138 : LIS(LIS), LV(LV), MDT(MDT), PDT(PDT),
139 LMC(AMDGPU::LaneMaskConstants::
get(*
ST)) {}
140 bool run(MachineFunction &MF);
147 SILowerControlFlowLegacy() : MachineFunctionPass(ID) {}
149 bool runOnMachineFunction(MachineFunction &MF)
override;
151 StringRef getPassName()
const override {
152 return "SI Lower control flow pseudo instructions";
155 void getAnalysisUsage(AnalysisUsage &AU)
const override {
169char SILowerControlFlowLegacy::ID = 0;
188 while (!Worklist.
empty()) {
203 Register SaveExecReg =
MI.getOperand(0).getReg();
204 auto U =
MRI->use_instr_nodbg_begin(SaveExecReg);
206 if (U ==
MRI->use_instr_nodbg_end() ||
207 std::next(U) !=
MRI->use_instr_nodbg_end() ||
208 U->getOpcode() != AMDGPU::SI_END_CF)
214void SILowerControlFlow::emitIf(MachineInstr &
MI) {
215 MachineBasicBlock &
MBB = *
MI.getParent();
218 Register SaveExecReg =
MI.getOperand(0).getReg();
219 MachineOperand&
Cond =
MI.getOperand(1);
220 assert(
Cond.getSubReg() == AMDGPU::NoSubRegister);
222 MachineOperand &ImpDefSCC =
MI.getOperand(4);
233 auto UseMI =
MRI->use_instr_nodbg_begin(SaveExecReg);
239 Register CopyReg = SimpleIf ? SaveExecReg
240 :
MRI->createVirtualRegister(BoolRC);
241 MachineInstr *CopyExec =
BuildMI(
MBB,
I,
DL,
TII->get(AMDGPU::COPY), CopyReg)
244 LoweredIf.
insert(CopyReg);
253 setImpSCCDefDead(*
And,
true);
255 MachineInstr *
Xor =
nullptr;
260 setImpSCCDefDead(*
Xor, ImpDefSCC.
isDead());
265 MachineInstr *SetExec =
273 I = skipToUncondBrOrEnd(
MBB,
I);
277 MachineInstr *NewBr =
BuildMI(
MBB,
I,
DL,
TII->get(AMDGPU::S_CBRANCH_EXECZ))
278 .
add(
MI.getOperand(2));
281 MI.eraseFromParent();
297 MI.eraseFromParent();
302 RecomputeRegs.
insert(SaveExecReg);
308void SILowerControlFlow::emitElse(MachineInstr &
MI) {
309 MachineBasicBlock &
MBB = *
MI.getParent();
319 Register SaveReg =
MRI->createVirtualRegister(BoolRC);
320 MachineInstr *OrSaveExec =
322 .
add(
MI.getOperand(1));
326 MachineBasicBlock *DestBB =
MI.getOperand(2).getMBB();
343 ElsePt = skipToUncondBrOrEnd(
MBB, ElsePt);
350 MI.eraseFromParent();
355 MI.eraseFromParent();
363 RecomputeRegs.
insert(SrcReg);
364 RecomputeRegs.
insert(DstReg);
371void SILowerControlFlow::emitIfBreak(MachineInstr &
MI) {
372 MachineBasicBlock &
MBB = *
MI.getParent();
374 auto Dst =
MI.getOperand(0).getReg();
380 bool SkipAnding =
false;
381 if (
MI.getOperand(1).isReg()) {
382 if (MachineInstr *Def =
MRI->getUniqueVRegDef(
MI.getOperand(1).getReg())) {
383 SkipAnding =
Def->getParent() ==
MI.getParent()
390 MachineInstr *
And =
nullptr, *
Or =
nullptr;
393 AndReg =
MRI->createVirtualRegister(BoolRC);
396 .
add(
MI.getOperand(1));
401 .
add(
MI.getOperand(2));
404 .
add(
MI.getOperand(1))
405 .
add(
MI.getOperand(2));
416 RecomputeRegs.
insert(
And->getOperand(2).getReg());
422 MI.eraseFromParent();
425void SILowerControlFlow::emitLoop(MachineInstr &
MI) {
426 MachineBasicBlock &
MBB = *
MI.getParent();
429 MachineInstr *AndN2 =
432 .
add(
MI.getOperand(0));
436 auto BranchPt = skipToUncondBrOrEnd(
MBB,
MI.getIterator());
439 .
add(
MI.getOperand(1));
442 RecomputeRegs.
insert(
MI.getOperand(0).getReg());
447 MI.eraseFromParent();
451SILowerControlFlow::skipIgnoreExecInstsTrivialSucc(
454 SmallPtrSet<const MachineBasicBlock *, 4> Visited;
455 MachineBasicBlock *
B = &
MBB;
461 for ( ; It !=
E; ++It) {
462 if (
TII->mayReadEXEC(*
MRI, *It))
469 if (
B->succ_size() != 1)
473 MachineBasicBlock *Succ = *
B->succ_begin();
480MachineBasicBlock *SILowerControlFlow::emitEndCf(MachineInstr &
MI) {
481 MachineBasicBlock &
MBB = *
MI.getParent();
489 bool NeedBlockSplit =
false;
493 if (
I->modifiesRegister(DataReg,
TRI)) {
494 NeedBlockSplit =
true;
499 unsigned Opcode = LMC.
OrOpc;
500 MachineBasicBlock *SplitBB = &
MBB;
501 if (NeedBlockSplit) {
503 if (SplitBB != &
MBB && (MDT || PDT)) {
506 for (MachineBasicBlock *Succ : SplitBB->
successors()) {
507 DTUpdates.
push_back({DomTreeT::Insert, SplitBB, Succ});
522 .
add(
MI.getOperand(0));
526 if (SplitBB != &
MBB) {
531 DenseSet<Register> DefInOrigBlock;
533 for (MachineBasicBlock *BlockPiece : {&
MBB, SplitBB}) {
534 for (MachineInstr &
X : *BlockPiece) {
535 for (MachineOperand &
Op :
X.all_defs()) {
536 if (
Op.getReg().isVirtual())
542 for (
unsigned i = 0, e =
MRI->getNumVirtRegs(); i != e; ++i) {
549 for (MachineInstr *Kill :
VI.Kills) {
558 LoweredEndCf.
insert(NewMI);
563 MI.eraseFromParent();
572void SILowerControlFlow::findMaskOperands(MachineInstr &
MI,
unsigned OpNo,
573 SmallVectorImpl<MachineOperand> &Src)
const {
574 MachineOperand &
Op =
MI.getOperand(OpNo);
575 if (!
Op.isReg() || !
Op.getReg().isVirtual()) {
580 MachineInstr *
Def =
MRI->getUniqueVRegDef(
Op.getReg());
581 if (!Def ||
Def->getParent() !=
MI.getParent() ||
582 !(
Def->isFullCopy() || (
Def->getOpcode() ==
MI.getOpcode())))
588 for (
auto I =
Def->getIterator();
I !=
MI.getIterator(); ++
I)
589 if (
I->modifiesRegister(AMDGPU::EXEC,
TRI) &&
590 !(
I->isCopy() &&
I->getOperand(0).getReg() != LMC.
ExecReg))
593 for (
const auto &SrcOp :
Def->explicit_operands())
594 if (SrcOp.isReg() && SrcOp.isUse() &&
595 (SrcOp.getReg().isVirtual() || SrcOp.getReg() == LMC.
ExecReg))
596 Src.push_back(SrcOp);
603void SILowerControlFlow::combineMasks(MachineInstr &
MI) {
604 assert(
MI.getNumExplicitOperands() == 3);
606 unsigned OpToReplace = 1;
607 findMaskOperands(
MI, 1,
Ops);
608 if (
Ops.size() == 1) OpToReplace = 2;
609 findMaskOperands(
MI, 2,
Ops);
610 if (
Ops.size() != 3)
return;
612 unsigned UniqueOpndIdx;
613 if (
Ops[0].isIdenticalTo(
Ops[1])) UniqueOpndIdx = 2;
614 else if (
Ops[0].isIdenticalTo(
Ops[2])) UniqueOpndIdx = 1;
615 else if (
Ops[1].isIdenticalTo(
Ops[2])) UniqueOpndIdx = 1;
619 MI.removeOperand(OpToReplace);
620 MI.addOperand(
Ops[UniqueOpndIdx]);
622 MRI->getUniqueVRegDef(
Reg)->eraseFromParent();
625void SILowerControlFlow::optimizeEndCf() {
628 if (!EnableOptimizeEndCf)
631 for (MachineInstr *
MI :
reverse(LoweredEndCf)) {
632 MachineBasicBlock &
MBB = *
MI->getParent();
634 skipIgnoreExecInstsTrivialSucc(
MBB, std::next(
MI->getIterator()));
640 =
TII->getNamedOperand(*
Next, AMDGPU::OpName::src1)->getReg();
643 const MachineInstr *
Def =
MRI->getUniqueVRegDef(SavedExec);
644 if (Def && LoweredIf.
count(SavedExec)) {
650 Reg =
TII->getNamedOperand(*
MI, AMDGPU::OpName::src1)->getReg();
651 MI->eraseFromParent();
654 removeMBBifRedundant(
MBB);
659MachineBasicBlock *SILowerControlFlow::process(MachineInstr &
MI) {
660 MachineBasicBlock &
MBB = *
MI.getParent();
662 MachineInstr *Prev = (
I !=
MBB.
begin()) ? &*(std::prev(
I)) : nullptr;
664 MachineBasicBlock *SplitBB = &
MBB;
666 switch (
MI.getOpcode()) {
671 case AMDGPU::SI_ELSE:
675 case AMDGPU::SI_IF_BREAK:
679 case AMDGPU::SI_LOOP:
683 case AMDGPU::SI_WATERFALL_LOOP:
684 MI.setDesc(
TII->get(AMDGPU::S_CBRANCH_EXECNZ));
687 case AMDGPU::SI_END_CF:
688 SplitBB = emitEndCf(
MI);
692 assert(
false &&
"Attempt to process unsupported instruction");
699 MachineInstr &MaskMI = *
I;
701 case AMDGPU::S_AND_B64:
702 case AMDGPU::S_OR_B64:
703 case AMDGPU::S_AND_B32:
704 case AMDGPU::S_OR_B32:
706 combineMasks(MaskMI);
717bool SILowerControlFlow::removeMBBifRedundant(MachineBasicBlock &
MBB) {
719 if (!
I.isDebugInstr() && !
I.isUnconditionalBranch())
726 MachineBasicBlock *FallThrough =
nullptr;
733 if (
P->getFallThrough(
false) == &
MBB)
736 DTUpdates.
push_back({DomTreeT::Insert,
P, Succ});
754 MachineInstr *BranchMI =
BuildMI(*FallThrough, FallThrough->
end(),
764bool SILowerControlFlow::run(MachineFunction &MF) {
766 TII =
ST.getInstrInfo();
767 TRI = &
TII->getRegisterInfo();
772 BoolRC =
TRI->getBoolRC();
775 const bool CanDemote =
777 for (
auto &
MBB : MF) {
778 bool IsKillBlock =
false;
780 if (
TII->isKillTerminator(
Term.getOpcode())) {
786 if (CanDemote && !IsKillBlock) {
787 for (
auto &
MI :
MBB) {
788 if (
MI.getOpcode() == AMDGPU::SI_DEMOTE_I1) {
799 BI != MF.end(); BI = NextBB) {
800 NextBB = std::next(BI);
801 MachineBasicBlock *
MBB = &*BI;
807 MachineInstr &
MI = *
I;
808 MachineBasicBlock *SplitMBB =
MBB;
810 switch (
MI.getOpcode()) {
812 case AMDGPU::SI_ELSE:
813 case AMDGPU::SI_IF_BREAK:
814 case AMDGPU::SI_WATERFALL_LOOP:
815 case AMDGPU::SI_LOOP:
816 case AMDGPU::SI_END_CF:
817 SplitMBB = process(
MI);
822 if (SplitMBB !=
MBB) {
838 RecomputeRegs.clear();
839 LoweredEndCf.
clear();
846bool SILowerControlFlowLegacy::runOnMachineFunction(MachineFunction &MF) {
849 auto *LISWrapper = getAnalysisIfAvailable<LiveIntervalsWrapperPass>();
850 LiveIntervals *LIS = LISWrapper ? &LISWrapper->getLIS() :
nullptr;
852 auto *LVWrapper = getAnalysisIfAvailable<LiveVariablesWrapperPass>();
853 LiveVariables *LV = LVWrapper ? &LVWrapper->getLV() :
nullptr;
854 auto *MDTWrapper = getAnalysisIfAvailable<MachineDominatorTreeWrapperPass>();
855 MachineDominatorTree *MDT = MDTWrapper ? &MDTWrapper->getDomTree() :
nullptr;
857 getAnalysisIfAvailable<MachinePostDominatorTreeWrapperPass>();
858 MachinePostDominatorTree *PDT =
859 PDTWrapper ? &PDTWrapper->getPostDomTree() :
nullptr;
860 return SILowerControlFlow(ST, LIS, LV, MDT, PDT).run(MF);
874 bool Changed = SILowerControlFlow(ST, LIS, LV, MDT, PDT).run(MF);
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Provides AMDGPU specific target descriptions.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Register const TargetRegisterInfo * TRI
Promote Memory to Register
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
const SmallVectorImpl< MachineOperand > & Cond
static cl::opt< bool > RemoveRedundantEndcf("amdgpu-remove-redundant-endcf", cl::init(true), cl::ReallyHidden)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isSimpleIf(const MachineInstr &MI, const MachineRegisterInfo *MRI)
This file defines the SmallSet class.
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
const unsigned XorTermOpc
const unsigned MovTermOpc
const unsigned OrSaveExecOpc
const unsigned AndN2TermOpc
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
AnalysisUsage & addUsedIfAvailable()
Add the specified Pass class to the set of analyses used by this pass.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Implements a dense probed hash-table based set.
void applyUpdates(ArrayRef< UpdateType > Updates)
Inform the dominator tree about a sequence of CFG edge insertions and deletions and perform a batch u...
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
void removeAllRegUnitsForPhysReg(MCRegister Reg)
Remove associated live ranges for the register units associated with Reg.
SlotIndex InsertMachineInstrInMaps(MachineInstr &MI)
LLVM_ABI void handleMove(MachineInstr &MI, bool UpdateFlags=false)
Call this method to notify LiveIntervals that instruction MI has been moved within a basic block.
void RemoveMachineInstrFromMaps(MachineInstr &MI)
void removeInterval(Register Reg)
Interval removal.
LiveInterval & createAndComputeVirtRegInterval(Register Reg)
SlotIndex ReplaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
LLVM_ABI void replaceKillInstruction(Register Reg, MachineInstr &OldMI, MachineInstr &NewMI)
replaceKillInstruction - Update register kill info by replacing a kill instruction with a new one.
LLVM_ABI void recomputeForSingleDefVirtReg(Register Reg)
Recompute liveness from scratch for a virtual register Reg that is known to have a single def that do...
LLVM_ABI VarInfo & getVarInfo(Register Reg)
getVarInfo - Return the VarInfo structure for the specified VIRTUAL register.
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
succ_iterator succ_begin()
unsigned succ_size() const
LLVM_ABI void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
pred_iterator pred_begin()
LLVM_ABI void ReplaceUsesOfBlockWith(MachineBasicBlock *Old, MachineBasicBlock *New)
Given a machine basic block that branched to 'Old', change the code and CFG so that it branches to 'N...
LLVM_ABI bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
LLVM_ABI MachineBasicBlock * splitAt(MachineInstr &SplitInst, bool UpdateLiveIns=true, LiveIntervals *LIS=nullptr)
Split a basic block into 2 pieces at SplitPoint.
LLVM_ABI void eraseFromParent()
This method unlinks 'this' from the containing function and deletes it.
iterator_range< iterator > terminators()
LLVM_ABI DebugLoc findBranchDebugLoc()
Find and return the merged DebugLoc of the branch instructions of the block.
iterator_range< succ_iterator > successors()
iterator_range< pred_iterator > predecessors()
MachineInstrBundleIterator< MachineInstr > iterator
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
MachineOperand class - Representation of each machine instruction operand.
void setIsDead(bool Val=true)
Register getReg() const
getReg - Returns the register number.
MachinePostDominatorTree - an analysis pass wrapper for DominatorTree used to compute the post-domina...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Wrapper class representing virtual and physical registers.
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
static bool isVALU(const MachineInstr &MI)
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
A vector that has set insertion semantics.
void clear()
Completely clear the SetVector.
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
std::pair< iterator, bool > insert(const ValueT &V)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
self_iterator getIterator()
@ Kill
The last use of a register.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
NodeAddr< DefNode * > Def
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
auto reverse(ContainerTy &&C)
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
DominatorTreeBase< T, false > DomTreeBase
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
char & SILowerControlFlowLegacyID
@ Or
Bitwise or logical OR of integers.
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
FunctionAddr VTableAddr Next
DWARFExpression::Operation Op
std::vector< MachineInstr * > Kills
Kills - List of MachineInstruction's which are the last use of this virtual register (kill it) in the...