63#define DEBUG_TYPE "si-lower-control-flow"
89 unsigned Andn2TermOpc;
92 unsigned OrSaveExecOpc;
95 bool EnableOptimizeEndCf =
false;
130 while (
I !=
End && !
I->isUnconditionalBranch())
136 void optimizeEndCf();
146 return "SI Lower control flow pseudo instructions";
162char SILowerControlFlow::ID = 0;
165 "SI lower control flow",
false,
false)
181 while (!Worklist.
empty()) {
196 Register SaveExecReg =
MI.getOperand(0).getReg();
197 auto U =
MRI->use_instr_nodbg_begin(SaveExecReg);
199 if (U ==
MRI->use_instr_nodbg_end() ||
200 std::next(U) !=
MRI->use_instr_nodbg_end() ||
201 U->getOpcode() != AMDGPU::SI_END_CF)
211 Register SaveExecReg =
MI.getOperand(0).getReg();
213 assert(
Cond.getSubReg() == AMDGPU::NoSubRegister);
226 auto UseMI =
MRI->use_instr_nodbg_begin(SaveExecReg);
232 Register CopyReg = SimpleIf ? SaveExecReg
233 :
MRI->createVirtualRegister(BoolRC);
238 LoweredIf.
insert(CopyReg);
249 setImpSCCDefDead(*
And,
true);
257 setImpSCCDefDead(*
Xor, ImpDefSCC.
isDead());
270 I = skipToUncondBrOrEnd(
MBB,
I);
275 .
add(
MI.getOperand(2));
278 MI.eraseFromParent();
294 MI.eraseFromParent();
299 RecomputeRegs.
insert(SaveExecReg);
316 Register SaveReg =
MRI->createVirtualRegister(BoolRC);
319 .
add(
MI.getOperand(1));
340 ElsePt = skipToUncondBrOrEnd(
MBB, ElsePt);
347 MI.eraseFromParent();
352 MI.eraseFromParent();
360 RecomputeRegs.
insert(SrcReg);
361 RecomputeRegs.
insert(DstReg);
371 auto Dst =
MI.getOperand(0).getReg();
377 bool SkipAnding =
false;
378 if (
MI.getOperand(1).isReg()) {
380 SkipAnding =
Def->getParent() ==
MI.getParent()
390 AndReg =
MRI->createVirtualRegister(BoolRC);
393 .
add(
MI.getOperand(1));
398 .
add(
MI.getOperand(2));
401 .
add(
MI.getOperand(1))
402 .
add(
MI.getOperand(2));
413 RecomputeRegs.
insert(
And->getOperand(2).getReg());
419 MI.eraseFromParent();
429 .
add(
MI.getOperand(0));
433 auto BranchPt = skipToUncondBrOrEnd(
MBB,
MI.getIterator());
436 .
add(
MI.getOperand(1));
439 RecomputeRegs.
insert(
MI.getOperand(0).getReg());
444 MI.eraseFromParent();
448SILowerControlFlow::skipIgnoreExecInstsTrivialSucc(
458 for ( ; It != E; ++It) {
459 if (
TII->mayReadEXEC(*
MRI, *It))
466 if (
B->succ_size() != 1)
486 bool NeedBlockSplit =
false;
490 if (
I->modifiesRegister(DataReg,
TRI)) {
491 NeedBlockSplit =
true;
496 unsigned Opcode = OrOpc;
498 if (NeedBlockSplit) {
500 if (MDT && SplitBB != &
MBB) {
515 .
add(
MI.getOperand(0));
519 if (SplitBB != &
MBB) {
529 if (
Op.getReg().isVirtual())
535 for (
unsigned i = 0, e =
MRI->getNumVirtRegs(); i != e; ++i) {
543 if (
Kill->getParent() == SplitBB && !DefInOrigBlock.
contains(Reg))
551 LoweredEndCf.
insert(NewMI);
556 MI.eraseFromParent();
565void SILowerControlFlow::findMaskOperands(
MachineInstr &
MI,
unsigned OpNo,
568 if (!
Op.isReg() || !
Op.getReg().isVirtual()) {
574 if (!Def ||
Def->getParent() !=
MI.getParent() ||
575 !(
Def->isFullCopy() || (
Def->getOpcode() ==
MI.getOpcode())))
581 for (
auto I =
Def->getIterator();
I !=
MI.getIterator(); ++
I)
582 if (
I->modifiesRegister(AMDGPU::EXEC,
TRI) &&
583 !(
I->isCopy() &&
I->getOperand(0).getReg() != Exec))
586 for (
const auto &
SrcOp :
Def->explicit_operands())
589 Src.push_back(
SrcOp);
597 assert(
MI.getNumExplicitOperands() == 3);
599 unsigned OpToReplace = 1;
600 findMaskOperands(
MI, 1, Ops);
601 if (Ops.
size() == 1) OpToReplace = 2;
602 findMaskOperands(
MI, 2, Ops);
603 if (Ops.
size() != 3)
return;
605 unsigned UniqueOpndIdx;
606 if (Ops[0].isIdenticalTo(Ops[1])) UniqueOpndIdx = 2;
607 else if (Ops[0].isIdenticalTo(Ops[2])) UniqueOpndIdx = 1;
608 else if (Ops[1].isIdenticalTo(Ops[2])) UniqueOpndIdx = 1;
612 MI.removeOperand(OpToReplace);
613 MI.addOperand(Ops[UniqueOpndIdx]);
614 if (
MRI->use_empty(Reg))
615 MRI->getUniqueVRegDef(Reg)->eraseFromParent();
618void SILowerControlFlow::optimizeEndCf() {
621 if (!EnableOptimizeEndCf)
627 skipIgnoreExecInstsTrivialSucc(
MBB, std::next(
MI->getIterator()));
628 if (Next ==
MBB.
end() || !LoweredEndCf.
count(&*Next))
633 =
TII->getNamedOperand(*Next, AMDGPU::OpName::src1)->getReg();
637 if (Def && LoweredIf.
count(SavedExec)) {
643 Reg =
TII->getNamedOperand(*
MI, AMDGPU::OpName::src1)->getReg();
644 MI->eraseFromParent();
647 removeMBBifRedundant(
MBB);
659 switch (
MI.getOpcode()) {
664 case AMDGPU::SI_ELSE:
668 case AMDGPU::SI_IF_BREAK:
672 case AMDGPU::SI_LOOP:
676 case AMDGPU::SI_WATERFALL_LOOP:
677 MI.setDesc(
TII->get(AMDGPU::S_CBRANCH_EXECNZ));
680 case AMDGPU::SI_END_CF:
681 SplitBB = emitEndCf(
MI);
685 assert(
false &&
"Attempt to process unsupported instruction");
694 case AMDGPU::S_AND_B64:
695 case AMDGPU::S_OR_B64:
696 case AMDGPU::S_AND_B32:
697 case AMDGPU::S_OR_B32:
699 combineMasks(MaskMI);
712 if (!
I.isDebugInstr() && !
I.isUnconditionalBranch())
723 if (
P->getFallThrough(
false) == &
MBB)
725 P->ReplaceUsesOfBlockWith(&
MBB, Succ);
758 TII =
ST.getInstrInfo();
759 TRI = &
TII->getRegisterInfo();
764 auto *LISWrapper = getAnalysisIfAvailable<LiveIntervalsWrapperPass>();
765 LIS = LISWrapper ? &LISWrapper->getLIS() :
nullptr;
767 auto *LVWrapper = getAnalysisIfAvailable<LiveVariablesWrapperPass>();
768 LV = LVWrapper ? &LVWrapper->getLV() :
nullptr;
769 auto *MDTWrapper = getAnalysisIfAvailable<MachineDominatorTreeWrapperPass>();
770 MDT = MDTWrapper ? &MDTWrapper->getDomTree() :
nullptr;
772 BoolRC =
TRI->getBoolRC();
775 AndOpc = AMDGPU::S_AND_B32;
776 OrOpc = AMDGPU::S_OR_B32;
777 XorOpc = AMDGPU::S_XOR_B32;
778 MovTermOpc = AMDGPU::S_MOV_B32_term;
779 Andn2TermOpc = AMDGPU::S_ANDN2_B32_term;
780 XorTermrOpc = AMDGPU::S_XOR_B32_term;
781 OrTermrOpc = AMDGPU::S_OR_B32_term;
782 OrSaveExecOpc = AMDGPU::S_OR_SAVEEXEC_B32;
783 Exec = AMDGPU::EXEC_LO;
785 AndOpc = AMDGPU::S_AND_B64;
786 OrOpc = AMDGPU::S_OR_B64;
787 XorOpc = AMDGPU::S_XOR_B64;
788 MovTermOpc = AMDGPU::S_MOV_B64_term;
789 Andn2TermOpc = AMDGPU::S_ANDN2_B64_term;
790 XorTermrOpc = AMDGPU::S_XOR_B64_term;
791 OrTermrOpc = AMDGPU::S_OR_B64_term;
792 OrSaveExecOpc = AMDGPU::S_OR_SAVEEXEC_B64;
797 const bool CanDemote =
799 for (
auto &
MBB : MF) {
800 bool IsKillBlock =
false;
802 if (
TII->isKillTerminator(
Term.getOpcode())) {
808 if (CanDemote && !IsKillBlock) {
809 for (
auto &
MI :
MBB) {
810 if (
MI.getOpcode() == AMDGPU::SI_DEMOTE_I1) {
818 bool Changed =
false;
821 BI != MF.end(); BI = NextBB) {
822 NextBB = std::next(BI);
832 switch (
MI.getOpcode()) {
834 case AMDGPU::SI_ELSE:
835 case AMDGPU::SI_IF_BREAK:
836 case AMDGPU::SI_WATERFALL_LOOP:
837 case AMDGPU::SI_LOOP:
838 case AMDGPU::SI_END_CF:
839 SplitMBB = process(
MI);
844 if (SplitMBB !=
MBB) {
845 MBB = Next->getParent();
854 for (
Register Reg : RecomputeRegs) {
860 RecomputeRegs.
clear();
861 LoweredEndCf.
clear();
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
Provides AMDGPU specific target descriptions.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
unsigned const TargetRegisterInfo * TRI
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
const SmallVectorImpl< MachineOperand > & Cond
static cl::opt< bool > RemoveRedundantEndcf("amdgpu-remove-redundant-endcf", cl::init(true), cl::ReallyHidden)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isSimpleIf(const MachineInstr &MI, const MachineRegisterInfo *MRI)
This file defines the SmallSet class.
Represent the analysis usage information of a pass.
AnalysisUsage & addPreservedID(const void *ID)
AnalysisUsage & addUsedIfAvailable()
Add the specified Pass class to the set of analyses used by this pass.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
This class represents an Operation in the Expression.
Implements a dense probed hash-table based set.
Base class for the actual dominator tree node.
DomTreeNodeBase * getIDom() const
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
void removeAllRegUnitsForPhysReg(MCRegister Reg)
Remove associated live ranges for the register units associated with Reg.
SlotIndex InsertMachineInstrInMaps(MachineInstr &MI)
void handleMove(MachineInstr &MI, bool UpdateFlags=false)
Call this method to notify LiveIntervals that instruction MI has been moved within a basic block.
void RemoveMachineInstrFromMaps(MachineInstr &MI)
void removeInterval(Register Reg)
Interval removal.
LiveInterval & createAndComputeVirtRegInterval(Register Reg)
SlotIndex ReplaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
void replaceKillInstruction(Register Reg, MachineInstr &OldMI, MachineInstr &NewMI)
replaceKillInstruction - Update register kill info by replacing a kill instruction with a new one.
void recomputeForSingleDefVirtReg(Register Reg)
Recompute liveness from scratch for a virtual register Reg that is known to have a single def that do...
VarInfo & getVarInfo(Register Reg)
getVarInfo - Return the VarInfo structure for the specified VIRTUAL register.
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
succ_iterator succ_begin()
unsigned succ_size() const
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
pred_iterator pred_begin()
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
MachineBasicBlock * splitAt(MachineInstr &SplitInst, bool UpdateLiveIns=true, LiveIntervals *LIS=nullptr)
Split a basic block into 2 pieces at SplitPoint.
void eraseFromParent()
This method unlinks 'this' from the containing function and deletes it.
iterator_range< iterator > terminators()
DebugLoc findBranchDebugLoc()
Find and return the merged DebugLoc of the branch instructions of the block.
iterator_range< succ_iterator > successors()
iterator_range< pred_iterator > predecessors()
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineDomTreeNode * addNewBlock(MachineBasicBlock *BB, MachineBasicBlock *DomBB)
addNewBlock - Add a new node to the dominator tree information.
MachineDomTreeNode * getNode(MachineBasicBlock *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
void eraseNode(MachineBasicBlock *BB)
eraseNode - Removes a node from the dominator tree.
bool dominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
void changeImmediateDominator(MachineBasicBlock *N, MachineBasicBlock *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
MachineOperand class - Representation of each machine instruction operand.
void setIsDead(bool Val=true)
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Wrapper class representing virtual and physical registers.
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
static bool isVALU(const MachineInstr &MI)
A vector that has set insertion semantics.
void clear()
Completely clear the SetVector.
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
bool contains(const T &V) const
Check if the SmallSet contains the given element.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
std::pair< iterator, bool > insert(const ValueT &V)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
self_iterator getIterator()
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ Kill
The last use of a register.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
NodeAddr< DefNode * > Def
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
auto reverse(ContainerTy &&C)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
@ Or
Bitwise or logical OR of integers.
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
char & LiveVariablesID
LiveVariables pass - This pass computes the set of blocks in which each variable is life and sets mac...
char & SILowerControlFlowID
VarInfo - This represents the regions where a virtual register is live in the program.
std::vector< MachineInstr * > Kills
Kills - List of MachineInstruction's which are the last use of this virtual register (kill it) in the...