67#define DEBUG_TYPE "si-lower-control-flow"
75class SILowerControlFlow {
92 bool EnableOptimizeEndCf =
false;
127 while (
I != End && !
I->isUnconditionalBranch())
133 void optimizeEndCf();
136 SILowerControlFlow(
const GCNSubtarget *ST, LiveIntervals *LIS,
137 LiveVariables *LV, MachineDominatorTree *MDT,
138 MachinePostDominatorTree *PDT)
139 : LIS(LIS), LV(LV), MDT(MDT), PDT(PDT),
140 LMC(AMDGPU::LaneMaskConstants::
get(*
ST)) {}
141 bool run(MachineFunction &MF);
148 SILowerControlFlowLegacy() : MachineFunctionPass(ID) {}
150 bool runOnMachineFunction(MachineFunction &MF)
override;
152 StringRef getPassName()
const override {
153 return "SI Lower control flow pseudo instructions";
156 void getAnalysisUsage(AnalysisUsage &AU)
const override {
164 AU.
addPreserved<MachineBlockFrequencyInfoWrapperPass>();
171char SILowerControlFlowLegacy::ID = 0;
190 while (!Worklist.
empty()) {
205 Register SaveExecReg =
MI.getOperand(0).getReg();
206 auto U =
MRI->use_instr_nodbg_begin(SaveExecReg);
208 if (U ==
MRI->use_instr_nodbg_end() ||
209 std::next(U) !=
MRI->use_instr_nodbg_end() ||
210 U->getOpcode() != AMDGPU::SI_END_CF)
216void SILowerControlFlow::emitIf(MachineInstr &
MI) {
217 MachineBasicBlock &
MBB = *
MI.getParent();
220 Register SaveExecReg =
MI.getOperand(0).getReg();
221 MachineOperand&
Cond =
MI.getOperand(1);
222 assert(
Cond.getSubReg() == AMDGPU::NoSubRegister);
224 MachineOperand &ImpDefSCC =
MI.getOperand(4);
235 auto UseMI =
MRI->use_instr_nodbg_begin(SaveExecReg);
241 Register CopyReg = SimpleIf ? SaveExecReg
242 :
MRI->createVirtualRegister(BoolRC);
243 MachineInstr *CopyExec =
BuildMI(
MBB,
I,
DL,
TII->get(AMDGPU::COPY), CopyReg)
246 LoweredIf.
insert(CopyReg);
255 setImpSCCDefDead(*
And,
true);
257 MachineInstr *
Xor =
nullptr;
262 setImpSCCDefDead(*
Xor, ImpDefSCC.
isDead());
267 MachineInstr *SetExec =
275 I = skipToUncondBrOrEnd(
MBB,
I);
279 MachineInstr *NewBr =
BuildMI(
MBB,
I,
DL,
TII->get(AMDGPU::S_CBRANCH_EXECZ))
280 .
add(
MI.getOperand(2));
283 MI.eraseFromParent();
298 MI.eraseFromParent();
303 RecomputeRegs.
insert(SaveExecReg);
309void SILowerControlFlow::emitElse(MachineInstr &
MI) {
310 MachineBasicBlock &
MBB = *
MI.getParent();
320 Register SaveReg =
MRI->createVirtualRegister(BoolRC);
321 MachineInstr *OrSaveExec =
323 .
add(
MI.getOperand(1));
327 MachineBasicBlock *DestBB =
MI.getOperand(2).getMBB();
344 ElsePt = skipToUncondBrOrEnd(
MBB, ElsePt);
351 MI.eraseFromParent();
356 MI.eraseFromParent();
364 RecomputeRegs.
insert(SrcReg);
365 RecomputeRegs.
insert(DstReg);
369void SILowerControlFlow::emitIfBreak(MachineInstr &
MI) {
370 MachineBasicBlock &
MBB = *
MI.getParent();
372 auto Dst =
MI.getOperand(0).getReg();
378 bool SkipAnding =
false;
379 if (
MI.getOperand(1).isReg()) {
380 if (MachineInstr *Def =
MRI->getUniqueVRegDef(
MI.getOperand(1).getReg())) {
381 SkipAnding =
Def->getParent() ==
MI.getParent()
388 MachineInstr *
And =
nullptr, *
Or =
nullptr;
391 AndReg =
MRI->createVirtualRegister(BoolRC);
394 .
add(
MI.getOperand(1));
399 .
add(
MI.getOperand(2));
402 .
add(
MI.getOperand(1))
403 .
add(
MI.getOperand(2));
414 RecomputeRegs.
insert(
And->getOperand(2).getReg());
420 MI.eraseFromParent();
423void SILowerControlFlow::emitLoop(MachineInstr &
MI) {
424 MachineBasicBlock &
MBB = *
MI.getParent();
427 MachineInstr *AndN2 =
430 .
add(
MI.getOperand(0));
434 auto BranchPt = skipToUncondBrOrEnd(
MBB,
MI.getIterator());
437 .
add(
MI.getOperand(1));
440 RecomputeRegs.
insert(
MI.getOperand(0).getReg());
445 MI.eraseFromParent();
449SILowerControlFlow::skipIgnoreExecInstsTrivialSucc(
452 SmallPtrSet<const MachineBasicBlock *, 4> Visited;
453 MachineBasicBlock *
B = &
MBB;
459 for ( ; It !=
E; ++It) {
460 if (
TII->mayReadEXEC(*
MRI, *It))
467 if (
B->succ_size() != 1)
471 MachineBasicBlock *Succ = *
B->succ_begin();
478MachineBasicBlock *SILowerControlFlow::emitEndCf(MachineInstr &
MI) {
479 MachineBasicBlock &
MBB = *
MI.getParent();
487 bool NeedBlockSplit =
false;
491 if (
I->modifiesRegister(DataReg,
TRI)) {
492 NeedBlockSplit =
true;
497 unsigned Opcode = LMC.
OrOpc;
498 MachineBasicBlock *SplitBB = &
MBB;
499 if (NeedBlockSplit) {
501 if (SplitBB != &
MBB && (MDT || PDT)) {
504 for (MachineBasicBlock *Succ : SplitBB->
successors()) {
505 DTUpdates.
push_back({DomTreeT::Insert, SplitBB, Succ});
520 .
add(
MI.getOperand(0));
524 if (SplitBB != &
MBB) {
529 DenseSet<Register> DefInOrigBlock;
531 for (MachineBasicBlock *BlockPiece : {&
MBB, SplitBB}) {
532 for (MachineInstr &
X : *BlockPiece) {
533 for (MachineOperand &
Op :
X.all_defs()) {
534 if (
Op.getReg().isVirtual())
540 for (
unsigned i = 0, e =
MRI->getNumVirtRegs(); i != e; ++i) {
547 for (MachineInstr *Kill :
VI.Kills) {
556 LoweredEndCf.
insert(NewMI);
561 MI.eraseFromParent();
570void SILowerControlFlow::findMaskOperands(MachineInstr &
MI,
unsigned OpNo,
571 SmallVectorImpl<MachineOperand> &Src)
const {
572 MachineOperand &
Op =
MI.getOperand(OpNo);
573 if (!
Op.isReg() || !
Op.getReg().isVirtual()) {
578 MachineInstr *
Def =
MRI->getUniqueVRegDef(
Op.getReg());
579 if (!Def ||
Def->getParent() !=
MI.getParent() ||
580 !(
Def->isFullCopy() || (
Def->getOpcode() ==
MI.getOpcode())))
586 for (
auto I =
Def->getIterator();
I !=
MI.getIterator(); ++
I)
587 if (
I->modifiesRegister(AMDGPU::EXEC,
TRI) &&
588 !(
I->isCopy() &&
I->getOperand(0).getReg() != LMC.
ExecReg))
591 for (
const auto &SrcOp :
Def->explicit_operands())
592 if (SrcOp.isReg() && SrcOp.isUse() &&
593 (SrcOp.getReg().isVirtual() || SrcOp.getReg() == LMC.
ExecReg))
594 Src.push_back(SrcOp);
601void SILowerControlFlow::combineMasks(MachineInstr &
MI) {
602 assert(
MI.getNumExplicitOperands() == 3);
604 unsigned OpToReplace = 1;
605 findMaskOperands(
MI, 1,
Ops);
606 if (
Ops.size() == 1) OpToReplace = 2;
607 findMaskOperands(
MI, 2,
Ops);
608 if (
Ops.size() != 3)
return;
610 unsigned UniqueOpndIdx;
611 if (
Ops[0].isIdenticalTo(
Ops[1])) UniqueOpndIdx = 2;
612 else if (
Ops[0].isIdenticalTo(
Ops[2])) UniqueOpndIdx = 1;
613 else if (
Ops[1].isIdenticalTo(
Ops[2])) UniqueOpndIdx = 1;
617 MI.removeOperand(OpToReplace);
618 MI.addOperand(
Ops[UniqueOpndIdx]);
620 MRI->getUniqueVRegDef(
Reg)->eraseFromParent();
623void SILowerControlFlow::optimizeEndCf() {
626 if (!EnableOptimizeEndCf)
629 for (MachineInstr *
MI :
reverse(LoweredEndCf)) {
630 MachineBasicBlock &
MBB = *
MI->getParent();
632 skipIgnoreExecInstsTrivialSucc(
MBB, std::next(
MI->getIterator()));
638 =
TII->getNamedOperand(*
Next, AMDGPU::OpName::src1)->getReg();
641 const MachineInstr *
Def =
MRI->getUniqueVRegDef(SavedExec);
642 if (Def && LoweredIf.
count(SavedExec)) {
648 Reg =
TII->getNamedOperand(*
MI, AMDGPU::OpName::src1)->getReg();
649 MI->eraseFromParent();
652 removeMBBifRedundant(
MBB);
657MachineBasicBlock *SILowerControlFlow::process(MachineInstr &
MI) {
658 MachineBasicBlock &
MBB = *
MI.getParent();
660 MachineInstr *Prev = (
I !=
MBB.
begin()) ? &*(std::prev(
I)) : nullptr;
662 MachineBasicBlock *SplitBB = &
MBB;
664 switch (
MI.getOpcode()) {
669 case AMDGPU::SI_ELSE:
673 case AMDGPU::SI_IF_BREAK:
677 case AMDGPU::SI_LOOP:
681 case AMDGPU::SI_WATERFALL_LOOP:
682 MI.setDesc(
TII->get(AMDGPU::S_CBRANCH_EXECNZ));
685 case AMDGPU::SI_END_CF:
686 SplitBB = emitEndCf(
MI);
690 assert(
false &&
"Attempt to process unsupported instruction");
697 MachineInstr &MaskMI = *
I;
699 case AMDGPU::S_AND_B64:
700 case AMDGPU::S_OR_B64:
701 case AMDGPU::S_AND_B32:
702 case AMDGPU::S_OR_B32:
704 combineMasks(MaskMI);
715bool SILowerControlFlow::removeMBBifRedundant(MachineBasicBlock &
MBB) {
717 if (!
I.isDebugInstr() && !
I.isUnconditionalBranch())
724 MachineBasicBlock *FallThrough =
nullptr;
731 if (
P->getFallThrough(
false) == &
MBB)
734 DTUpdates.
push_back({DomTreeT::Insert,
P, Succ});
752 MachineInstr *BranchMI =
BuildMI(*FallThrough, FallThrough->
end(),
762bool SILowerControlFlow::run(MachineFunction &MF) {
764 TII =
ST.getInstrInfo();
770 BoolRC =
TRI->getBoolRC();
773 const bool CanDemote =
775 for (
auto &
MBB : MF) {
776 bool IsKillBlock =
false;
778 if (
TII->isKillTerminator(
Term.getOpcode())) {
784 if (CanDemote && !IsKillBlock) {
785 for (
auto &
MI :
MBB) {
786 if (
MI.getOpcode() == AMDGPU::SI_DEMOTE_I1) {
797 BI != MF.end(); BI = NextBB) {
798 NextBB = std::next(BI);
799 MachineBasicBlock *
MBB = &*BI;
805 MachineInstr &
MI = *
I;
806 MachineBasicBlock *SplitMBB =
MBB;
808 switch (
MI.getOpcode()) {
810 case AMDGPU::SI_ELSE:
811 case AMDGPU::SI_IF_BREAK:
812 case AMDGPU::SI_WATERFALL_LOOP:
813 case AMDGPU::SI_LOOP:
814 case AMDGPU::SI_END_CF:
815 SplitMBB = process(
MI);
820 if (SplitMBB !=
MBB) {
839 RecomputeRegs.clear();
840 LoweredEndCf.
clear();
847bool SILowerControlFlowLegacy::runOnMachineFunction(MachineFunction &MF) {
850 auto *LISWrapper = getAnalysisIfAvailable<LiveIntervalsWrapperPass>();
851 LiveIntervals *LIS = LISWrapper ? &LISWrapper->getLIS() :
nullptr;
853 auto *LVWrapper = getAnalysisIfAvailable<LiveVariablesWrapperPass>();
854 LiveVariables *LV = LVWrapper ? &LVWrapper->getLV() :
nullptr;
855 auto *MDTWrapper = getAnalysisIfAvailable<MachineDominatorTreeWrapperPass>();
856 MachineDominatorTree *MDT = MDTWrapper ? &MDTWrapper->getDomTree() :
nullptr;
858 getAnalysisIfAvailable<MachinePostDominatorTreeWrapperPass>();
859 MachinePostDominatorTree *PDT =
860 PDTWrapper ? &PDTWrapper->getPostDomTree() :
nullptr;
861 return SILowerControlFlow(ST, LIS, LV, MDT, PDT).run(MF);
875 bool Changed = SILowerControlFlow(ST, LIS, LV, MDT, PDT).run(MF);
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Provides AMDGPU specific target descriptions.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Register const TargetRegisterInfo * TRI
Promote Memory to Register
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
const SmallVectorImpl< MachineOperand > & Cond
static cl::opt< bool > RemoveRedundantEndcf("amdgpu-remove-redundant-endcf", cl::init(true), cl::ReallyHidden)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isSimpleIf(const MachineInstr &MI, const MachineRegisterInfo *MRI)
This file defines the SmallSet class.
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
const unsigned XorTermOpc
const unsigned MovTermOpc
const unsigned OrSaveExecOpc
const unsigned AndN2TermOpc
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
AnalysisUsage & addUsedIfAvailable()
Add the specified Pass class to the set of analyses used by this pass.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Implements a dense probed hash-table based set.
void applyUpdates(ArrayRef< UpdateType > Updates)
Inform the dominator tree about a sequence of CFG edge insertions and deletions and perform a batch u...
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
const HexagonRegisterInfo & getRegisterInfo() const
void removeAllRegUnitsForPhysReg(MCRegister Reg)
Remove associated live ranges for the register units associated with Reg.
SlotIndex InsertMachineInstrInMaps(MachineInstr &MI)
LLVM_ABI void handleMove(MachineInstr &MI, bool UpdateFlags=false)
Call this method to notify LiveIntervals that instruction MI has been moved within a basic block.
void RemoveMachineInstrFromMaps(MachineInstr &MI)
void removeInterval(Register Reg)
Interval removal.
LiveInterval & createAndComputeVirtRegInterval(Register Reg)
SlotIndex ReplaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
LLVM_ABI void replaceKillInstruction(Register Reg, MachineInstr &OldMI, MachineInstr &NewMI)
replaceKillInstruction - Update register kill info by replacing a kill instruction with a new one.
LLVM_ABI void recomputeForSingleDefVirtReg(Register Reg)
Recompute liveness from scratch for a virtual register Reg that is known to have a single def that do...
LLVM_ABI VarInfo & getVarInfo(Register Reg)
getVarInfo - Return the VarInfo structure for the specified VIRTUAL register.
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
succ_iterator succ_begin()
unsigned succ_size() const
LLVM_ABI void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
pred_iterator pred_begin()
LLVM_ABI void ReplaceUsesOfBlockWith(MachineBasicBlock *Old, MachineBasicBlock *New)
Given a machine basic block that branched to 'Old', change the code and CFG so that it branches to 'N...
LLVM_ABI bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
LLVM_ABI MachineBasicBlock * splitAt(MachineInstr &SplitInst, bool UpdateLiveIns=true, LiveIntervals *LIS=nullptr)
Split a basic block into 2 pieces at SplitPoint.
LLVM_ABI void eraseFromParent()
This method unlinks 'this' from the containing function and deletes it.
iterator_range< iterator > terminators()
LLVM_ABI DebugLoc findBranchDebugLoc()
Find and return the merged DebugLoc of the branch instructions of the block.
iterator_range< succ_iterator > successors()
iterator_range< pred_iterator > predecessors()
MachineInstrBundleIterator< MachineInstr > iterator
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
MachineOperand class - Representation of each machine instruction operand.
void setIsDead(bool Val=true)
Register getReg() const
getReg - Returns the register number.
MachinePostDominatorTree - an analysis pass wrapper for DominatorTree used to compute the post-domina...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Wrapper class representing virtual and physical registers.
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
static bool isVALU(const MachineInstr &MI)
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
A vector that has set insertion semantics.
size_type count(const_arg_type key) const
Count the number of elements of a given key in the SetVector.
void clear()
Completely clear the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
std::pair< iterator, bool > insert(const ValueT &V)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
self_iterator getIterator()
@ Kill
The last use of a register.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
NodeAddr< DefNode * > Def
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
auto reverse(ContainerTy &&C)
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
DominatorTreeBase< T, false > DomTreeBase
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
char & SILowerControlFlowLegacyID
@ Or
Bitwise or logical OR of integers.
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
FunctionAddr VTableAddr Next
DWARFExpression::Operation Op
std::vector< MachineInstr * > Kills
Kills - List of MachineInstruction's which are the last use of this virtual register (kill it) in the...