Go to the documentation of this file.
63 #define DEBUG_TYPE "si-lower-control-flow"
88 unsigned Andn2TermOpc;
91 unsigned OrSaveExecOpc;
94 bool EnableOptimizeEndCf =
false;
131 while (
I != End && !
I->isUnconditionalBranch())
137 void optimizeEndCf();
147 return "SI Lower control flow pseudo instructions";
165 "SI lower control flow",
false,
false)
181 while (!Worklist.empty()) {
196 Register SaveExecReg =
MI.getOperand(0).getReg();
211 Register SaveExecReg =
MI.getOperand(0).getReg();
213 assert(
Cond.getSubReg() == AMDGPU::NoSubRegister);
232 Register CopyReg = SimpleIf ? SaveExecReg
238 LoweredIf.
insert(CopyReg);
249 setImpSCCDefDead(*And,
true);
257 setImpSCCDefDead(*Xor, ImpDefSCC.
isDead());
270 I = skipToUncondBrOrEnd(
MBB,
I);
275 .
add(
MI.getOperand(2));
278 MI.eraseFromParent();
294 MI.eraseFromParent();
319 .
add(
MI.getOperand(1));
343 ElsePt = skipToUncondBrOrEnd(
MBB, ElsePt);
350 MI.eraseFromParent();
355 MI.eraseFromParent();
373 auto Dst =
MI.getOperand(0).getReg();
379 bool SkipAnding =
false;
380 if (
MI.getOperand(1).isReg()) {
382 SkipAnding =
Def->getParent() ==
MI.getParent()
394 .
add(
MI.getOperand(1));
399 .
add(
MI.getOperand(2));
404 .
add(
MI.getOperand(1))
405 .
add(
MI.getOperand(2));
418 MI.eraseFromParent();
428 .
add(
MI.getOperand(0));
430 auto BranchPt = skipToUncondBrOrEnd(
MBB,
MI.getIterator());
433 .
add(
MI.getOperand(1));
440 MI.eraseFromParent();
444 SILowerControlFlow::skipIgnoreExecInstsTrivialSucc(
454 for ( ; It !=
E; ++It) {
455 if (
TII->mayReadEXEC(*
MRI, *It))
462 if (
B->succ_size() != 1)
482 bool NeedBlockSplit =
false;
486 if (
I->modifiesRegister(DataReg,
TRI)) {
487 NeedBlockSplit =
true;
492 unsigned Opcode = OrOpc;
494 if (NeedBlockSplit) {
496 if (MDT && SplitBB != &
MBB) {
511 .
add(
MI.getOperand(0));
515 if (SplitBB != &
MBB) {
521 if (
Op.isReg() &&
Op.isDef() &&
Op.getReg().isVirtual())
531 VI.AliveBlocks.set(SplitBB->getNumber());
542 LoweredEndCf.
insert(NewMI);
547 MI.eraseFromParent();
556 void SILowerControlFlow::findMaskOperands(
MachineInstr &
MI,
unsigned OpNo,
559 if (!
Op.isReg() || !
Op.getReg().isVirtual()) {
565 if (!
Def ||
Def->getParent() !=
MI.getParent() ||
566 !(
Def->isFullCopy() || (
Def->getOpcode() ==
MI.getOpcode())))
572 for (
auto I =
Def->getIterator();
I !=
MI.getIterator(); ++
I)
573 if (
I->modifiesRegister(AMDGPU::EXEC,
TRI) &&
574 !(
I->isCopy() &&
I->getOperand(0).getReg() != Exec))
577 for (
const auto &
SrcOp :
Def->explicit_operands())
580 Src.push_back(
SrcOp);
588 assert(
MI.getNumExplicitOperands() == 3);
590 unsigned OpToReplace = 1;
591 findMaskOperands(
MI, 1, Ops);
592 if (Ops.size() == 1) OpToReplace = 2;
593 findMaskOperands(
MI, 2, Ops);
594 if (Ops.size() != 3)
return;
596 unsigned UniqueOpndIdx;
597 if (Ops[0].isIdenticalTo(Ops[1])) UniqueOpndIdx = 2;
598 else if (Ops[0].isIdenticalTo(Ops[2])) UniqueOpndIdx = 1;
599 else if (Ops[1].isIdenticalTo(Ops[2])) UniqueOpndIdx = 1;
603 MI.removeOperand(OpToReplace);
604 MI.addOperand(Ops[UniqueOpndIdx]);
609 void SILowerControlFlow::optimizeEndCf() {
612 if (!EnableOptimizeEndCf)
618 skipIgnoreExecInstsTrivialSucc(
MBB, std::next(
MI->getIterator()));
619 if (Next ==
MBB.
end() || !LoweredEndCf.
count(&*Next))
624 =
TII->getNamedOperand(*Next, AMDGPU::OpName::src1)->getReg();
628 if (
Def && LoweredIf.
count(SavedExec)) {
634 Reg =
TII->getNamedOperand(*
MI, AMDGPU::OpName::src1)->getReg();
635 MI->eraseFromParent();
638 removeMBBifRedundant(
MBB);
650 switch (
MI.getOpcode()) {
655 case AMDGPU::SI_ELSE:
659 case AMDGPU::SI_IF_BREAK:
663 case AMDGPU::SI_LOOP:
667 case AMDGPU::SI_WATERFALL_LOOP:
668 MI.setDesc(
TII->get(AMDGPU::S_CBRANCH_EXECNZ));
671 case AMDGPU::SI_END_CF:
672 SplitBB = emitEndCf(
MI);
676 assert(
false &&
"Attempt to process unsupported instruction");
685 case AMDGPU::S_AND_B64:
686 case AMDGPU::S_OR_B64:
687 case AMDGPU::S_AND_B32:
688 case AMDGPU::S_OR_B32:
690 combineMasks(MaskMI);
705 bool IsWave32 =
ST.isWave32();
707 if (
MI.getOpcode() == AMDGPU::SI_INIT_EXEC) {
710 TII->get(IsWave32 ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64), Exec)
714 MI.eraseFromParent();
725 Register InputReg =
MI.getOperand(0).getReg();
731 if (DefInstr != FirstMI) {
750 auto BfeMI =
BuildMI(*
MBB, FirstMI,
DL,
TII->get(AMDGPU::S_BFE_U32), CountReg)
757 TII->get(IsWave32 ? AMDGPU::S_BFM_B32 : AMDGPU::S_BFM_B64), Exec)
760 auto CmpMI =
BuildMI(*
MBB, FirstMI,
DL,
TII->get(AMDGPU::S_CMP_EQ_U32))
767 TII->get(IsWave32 ? AMDGPU::S_CMOV_B32 : AMDGPU::S_CMOV_B64),
772 MI.eraseFromParent();
777 MI.eraseFromParent();
791 if (!
I.isDebugInstr() && !
I.isUnconditionalBranch())
802 if (
P->getFallThrough() == &
MBB)
804 P->ReplaceUsesOfBlockWith(&
MBB, Succ);
826 MF->
splice(std::next(FallThroughPos), Succ);
838 TII =
ST.getInstrInfo();
839 TRI = &
TII->getRegisterInfo();
840 EnableOptimizeEndCf =
844 LIS = getAnalysisIfAvailable<LiveIntervals>();
846 LV = getAnalysisIfAvailable<LiveVariables>();
847 MDT = getAnalysisIfAvailable<MachineDominatorTree>();
849 BoolRC =
TRI->getBoolRC();
852 AndOpc = AMDGPU::S_AND_B32;
853 OrOpc = AMDGPU::S_OR_B32;
854 XorOpc = AMDGPU::S_XOR_B32;
855 MovTermOpc = AMDGPU::S_MOV_B32_term;
856 Andn2TermOpc = AMDGPU::S_ANDN2_B32_term;
857 XorTermrOpc = AMDGPU::S_XOR_B32_term;
858 OrTermrOpc = AMDGPU::S_OR_B32_term;
859 OrSaveExecOpc = AMDGPU::S_OR_SAVEEXEC_B32;
860 Exec = AMDGPU::EXEC_LO;
862 AndOpc = AMDGPU::S_AND_B64;
863 OrOpc = AMDGPU::S_OR_B64;
864 XorOpc = AMDGPU::S_XOR_B64;
865 MovTermOpc = AMDGPU::S_MOV_B64_term;
866 Andn2TermOpc = AMDGPU::S_ANDN2_B64_term;
867 XorTermrOpc = AMDGPU::S_XOR_B64_term;
868 OrTermrOpc = AMDGPU::S_OR_B64_term;
869 OrSaveExecOpc = AMDGPU::S_OR_SAVEEXEC_B64;
874 const bool CanDemote =
876 for (
auto &
MBB : MF) {
877 bool IsKillBlock =
false;
879 if (
TII->isKillTerminator(
Term.getOpcode())) {
885 if (CanDemote && !IsKillBlock) {
886 for (
auto &
MI :
MBB) {
887 if (
MI.getOpcode() == AMDGPU::SI_DEMOTE_I1) {
895 bool Changed =
false;
898 BI != MF.end(); BI = NextBB) {
899 NextBB = std::next(BI);
909 switch (
MI.getOpcode()) {
911 case AMDGPU::SI_ELSE:
912 case AMDGPU::SI_IF_BREAK:
913 case AMDGPU::SI_WATERFALL_LOOP:
914 case AMDGPU::SI_LOOP:
915 case AMDGPU::SI_END_CF:
916 SplitMBB = process(
MI);
921 case AMDGPU::SI_INIT_EXEC:
922 case AMDGPU::SI_INIT_EXEC_FROM_INPUT:
923 lowerInitExec(
MBB,
MI);
933 if (SplitMBB !=
MBB) {
934 MBB = Next->getParent();
942 LoweredEndCf.
clear();
CodeGenOpt::Level getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
unsigned succ_size() const
pred_iterator pred_begin()
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
This is an optimization pass for GlobalISel generic memory operations.
static use_instr_nodbg_iterator use_instr_nodbg_end()
MachineInstrBuilder & UseMI
@ Or
Bitwise or logical OR of integers.
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const MachineInstrBuilder & add(const MachineOperand &MO) const
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
std::vector< MachineInstr * > Kills
Kills - List of MachineInstruction's which are the last use of this virtual register (kill it) in the...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Reg
All possible values of the reg field in the ModR/M byte.
INITIALIZE_PASS(SILowerControlFlow, DEBUG_TYPE, "SI lower control flow", false, false) static void setImpSCCDefDead(MachineInstr &MI
MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
use_instr_nodbg_iterator use_instr_nodbg_begin(Register RegNo) const
void removeAllRegUnitsForPhysReg(MCRegister Reg)
Remove associated live ranges for the register units associated with Reg.
void removeInterval(Register Reg)
Interval removal.
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
char & SILowerControlFlowID
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
unsigned getNumVirtRegs() const
getNumVirtRegs - Return the number of virtual registers created.
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
static cl::opt< bool > RemoveRedundantEndcf("amdgpu-remove-redundant-endcf", cl::init(true), cl::ReallyHidden)
bool dominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
LLVM_NODISCARD T pop_back_val()
iterator_range< iterator > terminators()
DomTreeNodeBase * getIDom() const
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
unsigned const TargetRegisterInfo * TRI
LiveInterval & createAndComputeVirtRegInterval(Register Reg)
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
void handleMove(MachineInstr &MI, bool UpdateFlags=false)
Call this method to notify LiveIntervals that instruction MI has been moved within a basic block.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
@ And
Bitwise or logical AND of integers.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
VarInfo - This represents the regions where a virtual register is live in the program.
MachineDomTreeNode * addNewBlock(MachineBasicBlock *BB, MachineBasicBlock *DomBB)
addNewBlock - Add a new node to the dominator tree information.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
void recomputeForSingleDefVirtReg(Register Reg)
Recompute liveness from scratch for a virtual register Reg that is known to have a single def that do...
SlotIndex InsertMachineInstrInMaps(MachineInstr &MI)
void eraseFromParent()
This method unlinks 'this' from the containing function and deletes it.
Represent the analysis usage information of a pass.
@ Kill
The last use of a register.
MachineInstr * removeFromParent()
Unlink 'this' from the containing basic block, and return it without deleting it.
void changeImmediateDominator(MachineBasicBlock *N, MachineBasicBlock *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
const HexagonInstrInfo * TII
void replaceKillInstruction(Register Reg, MachineInstr &OldMI, MachineInstr &NewMI)
replaceKillInstruction - Update register kill info by replacing a kill instruction with a new one.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
MachineOperand class - Representation of each machine instruction operand.
MachineBasicBlock * splitAt(MachineInstr &SplitInst, bool UpdateLiveIns=true, LiveIntervals *LIS=nullptr)
Split a basic block into 2 pieces at SplitPoint.
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
bool use_empty(Register RegNo) const
use_empty - Return true if there are no instructions using the specified register.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Implements a dense probed hash-table based set.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void setIsDead(bool Val=true)
SlotIndex ReplaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
Representation of each machine instruction.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
void splice(iterator InsertPt, iterator MBBI)
MachineDomTreeNode * getNode(MachineBasicBlock *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
initializer< Ty > init(const Ty &Val)
char & LiveVariablesID
LiveVariables pass - This pass computes the set of blocks in which each variable is life and sets mac...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool canFallThrough()
Return true if the block can implicitly transfer control to the block after it by falling off the end...
succ_iterator succ_begin()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Register getReg() const
getReg - Returns the register number.
iterator_range< pred_iterator > predecessors()
bool insert(const value_type &X)
Insert a new element into the SetVector.
void eraseNode(MachineBasicBlock *BB)
eraseNode - Removes a node from the dominator tree.
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
@ AMDGPU_PS
Calling convention used for Mesa/AMDPAL pixel shaders.
SmallVector< MachineOperand, 4 > Cond
iterator_range< succ_iterator > successors()
StringRef - Represent a constant reference to a string, i.e.
AnalysisUsage & addPreservedID(const void *ID)
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
DebugLoc findBranchDebugLoc()
Find and return the merged DebugLoc of the branch instructions of the block.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
self_iterator getIterator()
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
const MachineBasicBlock * getParent() const
Base class for the actual dominator tree node.
static bool isSimpleIf(const MachineInstr &MI, const MachineRegisterInfo *MRI)
unsigned const MachineRegisterInfo * MRI
Wrapper class representing virtual and physical registers.
void clear()
Completely clear the SetVector.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
Iterator for intrusive lists based on ilist_node.
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
static bool isVALU(const MachineInstr &MI)
void RemoveMachineInstrFromMaps(MachineInstr &MI)
VarInfo & getVarInfo(Register Reg)
getVarInfo - Return the VarInfo structure for the specified VIRTUAL register.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
constexpr char WavefrontSize[]
Key for Kernel::CodeProps::Metadata::mWavefrontSize.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
A vector that has set insertion semantics.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
@ Xor
Bitwise or logical XOR of integers.
bool contains(const T &V) const
Check if the SmallSet contains the given element.