23#define DEBUG_TYPE "si-pre-emit-peephole"
56 "SI peephole optimizations",
false,
false)
58char SIPreEmitPeephole::
ID = 0;
84 const bool IsWave32 =
ST.isWave32();
85 const unsigned CondReg =
TRI->getVCC();
86 const unsigned ExecReg = IsWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
87 const unsigned And = IsWave32 ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
88 const unsigned AndN2 = IsWave32 ? AMDGPU::S_ANDN2_B32 : AMDGPU::S_ANDN2_B64;
89 const unsigned Mov = IsWave32 ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
93 bool ReadsCond =
false;
94 unsigned Threshold = 5;
95 for (++
A;
A != E; ++
A) {
98 if (
A->modifiesRegister(ExecReg,
TRI))
100 if (
A->modifiesRegister(CondReg,
TRI)) {
101 if (!
A->definesRegister(CondReg,
TRI) ||
102 (
A->getOpcode() !=
And &&
A->getOpcode() != AndN2))
106 ReadsCond |=
A->readsRegister(CondReg,
TRI);
114 TII->commuteInstruction(*
A);
117 if (Op1.
getReg() != ExecReg)
122 int64_t MaskValue = 0;
126 auto M = std::next(
A);
127 bool ReadsSreg =
false;
128 bool ModifiesExec =
false;
129 for (;
M != E; ++
M) {
130 if (
M->definesRegister(SReg,
TRI))
132 if (
M->modifiesRegister(SReg,
TRI))
134 ReadsSreg |=
M->readsRegister(SReg,
TRI);
135 ModifiesExec |=
M->modifiesRegister(ExecReg,
TRI);
143 if (
A->getOpcode() ==
And && SReg == CondReg && !ModifiesExec &&
145 A->eraseFromParent();
148 if (!
M->isMoveImmediate() || !
M->getOperand(1).isImm() ||
149 (
M->getOperand(1).getImm() != -1 &&
M->getOperand(1).getImm() != 0))
151 MaskValue =
M->getOperand(1).getImm();
154 if (!ReadsSreg && Op2.
isKill()) {
155 A->getOperand(2).ChangeToImmediate(MaskValue);
156 M->eraseFromParent();
158 }
else if (Op2.
isImm()) {
165 assert(MaskValue == 0 || MaskValue == -1);
166 if (
A->getOpcode() == AndN2)
167 MaskValue = ~MaskValue;
169 if (!ReadsCond &&
A->registerDefIsDead(AMDGPU::SCC,
nullptr)) {
170 if (!
MI.killsRegister(CondReg,
TRI)) {
172 if (MaskValue == 0) {
173 BuildMI(*
A->getParent(), *
A,
A->getDebugLoc(),
TII->get(Mov), CondReg)
176 BuildMI(*
A->getParent(), *
A,
A->getDebugLoc(),
TII->get(Mov), CondReg)
181 A->eraseFromParent();
184 bool IsVCCZ =
MI.getOpcode() == AMDGPU::S_CBRANCH_VCCZ;
185 if (SReg == ExecReg) {
188 MI.eraseFromParent();
191 MI.setDesc(
TII->get(AMDGPU::S_BRANCH));
192 }
else if (IsVCCZ && MaskValue == 0) {
203 Found =
Term.isIdenticalTo(
MI);
206 assert(Found &&
"conditional branch is not terminator");
209 assert(Dst.isMBB() &&
"destination is not basic block");
211 BranchMI->eraseFromParent();
219 MI.setDesc(
TII->get(AMDGPU::S_BRANCH));
220 }
else if (!IsVCCZ && MaskValue == 0) {
223 assert(Dst.isMBB() &&
"destination is not basic block");
224 MI.getParent()->removeSuccessor(Dst.getMBB());
225 MI.eraseFromParent();
227 }
else if (MaskValue == -1) {
230 TII->get(IsVCCZ ? AMDGPU::S_CBRANCH_EXECZ : AMDGPU::S_CBRANCH_EXECNZ));
233 MI.removeOperand(
MI.findRegisterUseOperandIdx(CondReg,
TRI,
false ));
254 E =
MI.getIterator();
258 switch (
I->getOpcode()) {
259 case AMDGPU::S_SET_GPR_IDX_MODE:
261 case AMDGPU::S_SET_GPR_IDX_OFF:
266 if (
I->modifiesRegister(AMDGPU::M0,
TRI))
268 if (IdxReg &&
I->modifiesRegister(IdxReg,
TRI))
273 TRI->isVectorRegister(MRI, MO.getReg());
277 if (!IdxOn || !(
I->getOpcode() == AMDGPU::V_MOV_B32_indirect_write ||
278 I->getOpcode() == AMDGPU::V_MOV_B32_indirect_read))
284 MI.eraseFromBundle();
286 RI->eraseFromBundle();
290bool SIPreEmitPeephole::getBlockDestinations(
303class BranchWeightCostModel {
307 static constexpr uint64_t BranchNotTakenCost = 1;
314 :
TII(
TII), SchedModel(
TII.getSchedModel()) {
322 BranchTakenCost = SchedModel.computeInstrLatency(&Branch);
326 if (
TII.isWaitcnt(
MI.getOpcode()))
329 ThenCyclesCost += SchedModel.computeInstrLatency(&
MI);
341 return (Denominator - Numerator) * ThenCyclesCost <=
342 ((Denominator - Numerator) * BranchTakenCost +
343 Numerator * BranchNotTakenCost);
347bool SIPreEmitPeephole::mustRetainExeczBranch(
362 if (
MI.isConditionalBranch())
365 if (
MI.isUnconditionalBranch() &&
369 if (
MI.isMetaInstruction())
372 if (
TII->hasUnwantedEffectsWhenEXECEmpty(
MI))
375 if (!CostModel.isProfitable(
MI))
388 if (!
TII->getSchedModel().hasInstrSchedModel())
395 if (!getBlockDestinations(SrcMBB, TrueMBB, FalseMBB,
Cond))
403 if (mustRetainExeczBranch(
MI, *FalseMBB, *TrueMBB))
407 MI.eraseFromParent();
415 TII =
ST.getInstrInfo();
416 TRI = &
TII->getRegisterInfo();
417 bool Changed =
false;
426 switch (
MI.getOpcode()) {
427 case AMDGPU::S_CBRANCH_VCCZ:
428 case AMDGPU::S_CBRANCH_VCCNZ:
429 Changed |= optimizeVccBranch(
MI);
431 case AMDGPU::S_CBRANCH_EXECZ:
432 Changed |= removeExeczBranch(
MI,
MBB);
437 if (!
ST.hasVGPRIndexMode())
441 const unsigned Threshold = 20;
449 if (Count == Threshold)
454 if (
MI.getOpcode() != AMDGPU::S_SET_GPR_IDX_ON)
463 if (optimizeSetGPR(*SetGPRMI,
MI))
unsigned const MachineRegisterInfo * MRI
Provides AMDGPU specific target descriptions.
ReachingDefAnalysis InstSet & ToRemove
MachineBasicBlock MachineBasicBlock::iterator MBBI
BlockVerifier::State From
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
unsigned const TargetRegisterInfo * TRI
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isProfitable(const SmallVector< std::unique_ptr< StableFunctionMap::StableFunctionEntry > > &SFS)
static uint32_t getDenominator()
uint32_t getNumerator() const
static BranchProbability getZero()
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
MachineBasicBlock * getFallThrough(bool JumpToFallThrough=true)
Return the fallthrough block if the block can implicitly transfer control to the block after it by fa...
BranchProbability getSuccProbability(const_succ_iterator Succ) const
Return probability of the edge from this block to MBB.
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
Instructions::iterator instr_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< iterator > terminators()
iterator_range< succ_iterator > successors()
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
void RenumberBlocks(MachineBasicBlock *MBBFrom=nullptr)
RenumberBlocks - This discards all of the MachineBasicBlock numbers and recomputes them.
BasicBlockListType::const_iterator const_iterator
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Wrapper class representing virtual and physical registers.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Provide an instruction scheduling machine model to CodeGen passes.
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
This is an optimization pass for GlobalISel generic memory operations.
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
void initializeSIPreEmitPeepholePass(PassRegistry &)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
char & SIPreEmitPeepholeID
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
@ And
Bitwise or logical AND of integers.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.