64#define DEBUG_TYPE "si-lower-control-flow"
72class SILowerControlFlow {
90 unsigned Andn2TermOpc;
93 unsigned OrSaveExecOpc;
96 bool EnableOptimizeEndCf =
false;
131 while (
I !=
End && !
I->isUnconditionalBranch())
137 void optimizeEndCf();
142 : LIS(LIS), LV(LV), MDT(MDT) {}
155 return "SI Lower control flow pseudo instructions";
171char SILowerControlFlowLegacy::ID = 0;
190 while (!Worklist.
empty()) {
205 Register SaveExecReg =
MI.getOperand(0).getReg();
206 auto U =
MRI->use_instr_nodbg_begin(SaveExecReg);
208 if (U ==
MRI->use_instr_nodbg_end() ||
209 std::next(U) !=
MRI->use_instr_nodbg_end() ||
210 U->getOpcode() != AMDGPU::SI_END_CF)
220 Register SaveExecReg =
MI.getOperand(0).getReg();
222 assert(
Cond.getSubReg() == AMDGPU::NoSubRegister);
235 auto UseMI =
MRI->use_instr_nodbg_begin(SaveExecReg);
241 Register CopyReg = SimpleIf ? SaveExecReg
242 :
MRI->createVirtualRegister(BoolRC);
247 LoweredIf.
insert(CopyReg);
258 setImpSCCDefDead(*
And,
true);
266 setImpSCCDefDead(*
Xor, ImpDefSCC.
isDead());
279 I = skipToUncondBrOrEnd(
MBB,
I);
284 .
add(
MI.getOperand(2));
287 MI.eraseFromParent();
303 MI.eraseFromParent();
308 RecomputeRegs.
insert(SaveExecReg);
325 Register SaveReg =
MRI->createVirtualRegister(BoolRC);
328 .
add(
MI.getOperand(1));
349 ElsePt = skipToUncondBrOrEnd(
MBB, ElsePt);
356 MI.eraseFromParent();
361 MI.eraseFromParent();
369 RecomputeRegs.
insert(SrcReg);
370 RecomputeRegs.
insert(DstReg);
380 auto Dst =
MI.getOperand(0).getReg();
386 bool SkipAnding =
false;
387 if (
MI.getOperand(1).isReg()) {
389 SkipAnding =
Def->getParent() ==
MI.getParent()
399 AndReg =
MRI->createVirtualRegister(BoolRC);
402 .
add(
MI.getOperand(1));
407 .
add(
MI.getOperand(2));
410 .
add(
MI.getOperand(1))
411 .
add(
MI.getOperand(2));
422 RecomputeRegs.
insert(
And->getOperand(2).getReg());
428 MI.eraseFromParent();
438 .
add(
MI.getOperand(0));
442 auto BranchPt = skipToUncondBrOrEnd(
MBB,
MI.getIterator());
445 .
add(
MI.getOperand(1));
448 RecomputeRegs.
insert(
MI.getOperand(0).getReg());
453 MI.eraseFromParent();
457SILowerControlFlow::skipIgnoreExecInstsTrivialSucc(
467 for ( ; It != E; ++It) {
468 if (
TII->mayReadEXEC(*
MRI, *It))
475 if (
B->succ_size() != 1)
495 bool NeedBlockSplit =
false;
499 if (
I->modifiesRegister(DataReg,
TRI)) {
500 NeedBlockSplit =
true;
505 unsigned Opcode = OrOpc;
507 if (NeedBlockSplit) {
509 if (MDT && SplitBB != &
MBB) {
524 .
add(
MI.getOperand(0));
528 if (SplitBB != &
MBB) {
538 if (
Op.getReg().isVirtual())
544 for (
unsigned i = 0, e =
MRI->getNumVirtRegs(); i != e; ++i) {
552 if (
Kill->getParent() == SplitBB && !DefInOrigBlock.
contains(Reg))
560 LoweredEndCf.
insert(NewMI);
565 MI.eraseFromParent();
574void SILowerControlFlow::findMaskOperands(
MachineInstr &
MI,
unsigned OpNo,
577 if (!
Op.isReg() || !
Op.getReg().isVirtual()) {
583 if (!Def ||
Def->getParent() !=
MI.getParent() ||
584 !(
Def->isFullCopy() || (
Def->getOpcode() ==
MI.getOpcode())))
590 for (
auto I =
Def->getIterator();
I !=
MI.getIterator(); ++
I)
591 if (
I->modifiesRegister(AMDGPU::EXEC,
TRI) &&
592 !(
I->isCopy() &&
I->getOperand(0).getReg() != Exec))
595 for (
const auto &
SrcOp :
Def->explicit_operands())
598 Src.push_back(
SrcOp);
606 assert(
MI.getNumExplicitOperands() == 3);
608 unsigned OpToReplace = 1;
609 findMaskOperands(
MI, 1, Ops);
610 if (Ops.
size() == 1) OpToReplace = 2;
611 findMaskOperands(
MI, 2, Ops);
612 if (Ops.
size() != 3)
return;
614 unsigned UniqueOpndIdx;
615 if (Ops[0].isIdenticalTo(Ops[1])) UniqueOpndIdx = 2;
616 else if (Ops[0].isIdenticalTo(Ops[2])) UniqueOpndIdx = 1;
617 else if (Ops[1].isIdenticalTo(Ops[2])) UniqueOpndIdx = 1;
621 MI.removeOperand(OpToReplace);
622 MI.addOperand(Ops[UniqueOpndIdx]);
623 if (
MRI->use_empty(Reg))
624 MRI->getUniqueVRegDef(Reg)->eraseFromParent();
627void SILowerControlFlow::optimizeEndCf() {
630 if (!EnableOptimizeEndCf)
636 skipIgnoreExecInstsTrivialSucc(
MBB, std::next(
MI->getIterator()));
637 if (Next ==
MBB.
end() || !LoweredEndCf.
count(&*Next))
642 =
TII->getNamedOperand(*Next, AMDGPU::OpName::src1)->getReg();
646 if (Def && LoweredIf.
count(SavedExec)) {
652 Reg =
TII->getNamedOperand(*
MI, AMDGPU::OpName::src1)->getReg();
653 MI->eraseFromParent();
656 removeMBBifRedundant(
MBB);
668 switch (
MI.getOpcode()) {
673 case AMDGPU::SI_ELSE:
677 case AMDGPU::SI_IF_BREAK:
681 case AMDGPU::SI_LOOP:
685 case AMDGPU::SI_WATERFALL_LOOP:
686 MI.setDesc(
TII->get(AMDGPU::S_CBRANCH_EXECNZ));
689 case AMDGPU::SI_END_CF:
690 SplitBB = emitEndCf(
MI);
694 assert(
false &&
"Attempt to process unsupported instruction");
703 case AMDGPU::S_AND_B64:
704 case AMDGPU::S_OR_B64:
705 case AMDGPU::S_AND_B32:
706 case AMDGPU::S_OR_B32:
708 combineMasks(MaskMI);
721 if (!
I.isDebugInstr() && !
I.isUnconditionalBranch())
732 if (
P->getFallThrough(
false) == &
MBB)
734 P->ReplaceUsesOfBlockWith(&
MBB, Succ);
767 TII =
ST.getInstrInfo();
768 TRI = &
TII->getRegisterInfo();
773 BoolRC =
TRI->getBoolRC();
776 AndOpc = AMDGPU::S_AND_B32;
777 OrOpc = AMDGPU::S_OR_B32;
778 XorOpc = AMDGPU::S_XOR_B32;
779 MovTermOpc = AMDGPU::S_MOV_B32_term;
780 Andn2TermOpc = AMDGPU::S_ANDN2_B32_term;
781 XorTermrOpc = AMDGPU::S_XOR_B32_term;
782 OrTermrOpc = AMDGPU::S_OR_B32_term;
783 OrSaveExecOpc = AMDGPU::S_OR_SAVEEXEC_B32;
784 Exec = AMDGPU::EXEC_LO;
786 AndOpc = AMDGPU::S_AND_B64;
787 OrOpc = AMDGPU::S_OR_B64;
788 XorOpc = AMDGPU::S_XOR_B64;
789 MovTermOpc = AMDGPU::S_MOV_B64_term;
790 Andn2TermOpc = AMDGPU::S_ANDN2_B64_term;
791 XorTermrOpc = AMDGPU::S_XOR_B64_term;
792 OrTermrOpc = AMDGPU::S_OR_B64_term;
793 OrSaveExecOpc = AMDGPU::S_OR_SAVEEXEC_B64;
798 const bool CanDemote =
800 for (
auto &
MBB : MF) {
801 bool IsKillBlock =
false;
803 if (
TII->isKillTerminator(
Term.getOpcode())) {
809 if (CanDemote && !IsKillBlock) {
810 for (
auto &
MI :
MBB) {
811 if (
MI.getOpcode() == AMDGPU::SI_DEMOTE_I1) {
819 bool Changed =
false;
822 BI != MF.end(); BI = NextBB) {
823 NextBB = std::next(BI);
833 switch (
MI.getOpcode()) {
835 case AMDGPU::SI_ELSE:
836 case AMDGPU::SI_IF_BREAK:
837 case AMDGPU::SI_WATERFALL_LOOP:
838 case AMDGPU::SI_LOOP:
839 case AMDGPU::SI_END_CF:
840 SplitMBB = process(
MI);
845 if (SplitMBB !=
MBB) {
846 MBB = Next->getParent();
855 for (
Register Reg : RecomputeRegs) {
861 RecomputeRegs.
clear();
862 LoweredEndCf.
clear();
869bool SILowerControlFlowLegacy::runOnMachineFunction(
MachineFunction &MF) {
871 auto *LISWrapper = getAnalysisIfAvailable<LiveIntervalsWrapperPass>();
872 LiveIntervals *LIS = LISWrapper ? &LISWrapper->getLIS() :
nullptr;
874 auto *LVWrapper = getAnalysisIfAvailable<LiveVariablesWrapperPass>();
875 LiveVariables *LV = LVWrapper ? &LVWrapper->getLV() :
nullptr;
876 auto *MDTWrapper = getAnalysisIfAvailable<MachineDominatorTreeWrapperPass>();
878 return SILowerControlFlow(LIS, LV, MDT).run(MF);
889 bool Changed = SILowerControlFlow(LIS, LV, MDT).run(MF);
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
Provides AMDGPU specific target descriptions.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
unsigned const TargetRegisterInfo * TRI
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
const SmallVectorImpl< MachineOperand > & Cond
static cl::opt< bool > RemoveRedundantEndcf("amdgpu-remove-redundant-endcf", cl::init(true), cl::ReallyHidden)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isSimpleIf(const MachineInstr &MI, const MachineRegisterInfo *MRI)
This file defines the SmallSet class.
A container for analyses that lazily runs them and caches their results.
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addUsedIfAvailable()
Add the specified Pass class to the set of analyses used by this pass.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
This class represents an Operation in the Expression.
Implements a dense probed hash-table based set.
Base class for the actual dominator tree node.
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
void eraseNode(NodeT *BB)
eraseNode - Removes a node from the dominator tree.
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
void removeAllRegUnitsForPhysReg(MCRegister Reg)
Remove associated live ranges for the register units associated with Reg.
SlotIndex InsertMachineInstrInMaps(MachineInstr &MI)
void handleMove(MachineInstr &MI, bool UpdateFlags=false)
Call this method to notify LiveIntervals that instruction MI has been moved within a basic block.
void RemoveMachineInstrFromMaps(MachineInstr &MI)
void removeInterval(Register Reg)
Interval removal.
LiveInterval & createAndComputeVirtRegInterval(Register Reg)
SlotIndex ReplaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
void replaceKillInstruction(Register Reg, MachineInstr &OldMI, MachineInstr &NewMI)
replaceKillInstruction - Update register kill info by replacing a kill instruction with a new one.
void recomputeForSingleDefVirtReg(Register Reg)
Recompute liveness from scratch for a virtual register Reg that is known to have a single def that do...
VarInfo & getVarInfo(Register Reg)
getVarInfo - Return the VarInfo structure for the specified VIRTUAL register.
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
succ_iterator succ_begin()
unsigned succ_size() const
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
pred_iterator pred_begin()
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
MachineBasicBlock * splitAt(MachineInstr &SplitInst, bool UpdateLiveIns=true, LiveIntervals *LIS=nullptr)
Split a basic block into 2 pieces at SplitPoint.
void eraseFromParent()
This method unlinks 'this' from the containing function and deletes it.
iterator_range< iterator > terminators()
DebugLoc findBranchDebugLoc()
Find and return the merged DebugLoc of the branch instructions of the block.
iterator_range< succ_iterator > successors()
iterator_range< pred_iterator > predecessors()
Analysis pass which computes a MachineDominatorTree.
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
bool dominates(const MachineInstr *A, const MachineInstr *B) const
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
MachineOperand class - Representation of each machine instruction operand.
void setIsDead(bool Val=true)
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Wrapper class representing virtual and physical registers.
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
static bool isVALU(const MachineInstr &MI)
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
A vector that has set insertion semantics.
void clear()
Completely clear the SetVector.
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
bool contains(const T &V) const
Check if the SmallSet contains the given element.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
std::pair< iterator, bool > insert(const ValueT &V)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
self_iterator getIterator()
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ Kill
The last use of a register.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
NodeAddr< DefNode * > Def
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
auto reverse(ContainerTy &&C)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
char & SILowerControlFlowLegacyID
@ Or
Bitwise or logical OR of integers.
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
VarInfo - This represents the regions where a virtual register is live in the program.
std::vector< MachineInstr * > Kills
Kills - List of MachineInstruction's which are the last use of this virtual register (kill it) in the...