26#define DEBUG_TYPE "si-optimize-exec-masking-pre-ra"
30class SIOptimizeExecMaskingPreRA {
62 return "SI optimize exec mask operations pre-RA";
75 "SI optimize exec mask operations pre-RA",
false,
false)
80char SIOptimizeExecMaskingPreRALegacy::
ID = 0;
85 return new SIOptimizeExecMaskingPreRALegacy();
106 for (MCRegUnit Unit :
TRI.regunits(
Reg.asMCReg())) {
132 unsigned Opc = MI.getOpcode();
133 return Opc == AMDGPU::S_CBRANCH_VCCZ ||
134 Opc == AMDGPU::S_CBRANCH_VCCNZ; });
139 TRI->findReachingDef(CondReg, AMDGPU::NoSubRegister, *
I, *
MRI, LIS);
140 if (!
And ||
And->getOpcode() != LMC.
AndOpc || !
And->getOperand(1).isReg() ||
141 !
And->getOperand(2).isReg())
144 MachineOperand *AndCC = &
And->getOperand(1);
148 AndCC = &
And->getOperand(2);
151 }
else if (
And->getOperand(2).getReg() !=
Register(ExecReg)) {
155 auto *
Cmp =
TRI->findReachingDef(CmpReg, CmpSubReg, *
And, *
MRI, LIS);
156 if (!Cmp || !(
Cmp->getOpcode() == AMDGPU::V_CMP_NE_U32_e32 ||
157 Cmp->getOpcode() == AMDGPU::V_CMP_NE_U32_e64) ||
158 Cmp->getParent() !=
And->getParent())
161 MachineOperand *Op1 =
TII->getNamedOperand(*Cmp, AMDGPU::OpName::src0);
162 MachineOperand *Op2 =
TII->getNamedOperand(*Cmp, AMDGPU::OpName::src1);
172 auto *Sel =
TRI->findReachingDef(SelReg, Op1->
getSubReg(), *Cmp, *
MRI, LIS);
173 if (!Sel || Sel->getOpcode() != AMDGPU::V_CNDMASK_B32_e64)
176 if (
TII->hasModifiersSet(*Sel, AMDGPU::OpName::src0_modifiers) ||
177 TII->hasModifiersSet(*Sel, AMDGPU::OpName::src1_modifiers))
180 Op1 =
TII->getNamedOperand(*Sel, AMDGPU::OpName::src0);
181 Op2 =
TII->getNamedOperand(*Sel, AMDGPU::OpName::src1);
182 MachineOperand *CC =
TII->getNamedOperand(*Sel, AMDGPU::OpName::src2);
199 [](
const VNInfo *VNI) {
200 return VNI->isPHIDef();
205 LLVM_DEBUG(
dbgs() <<
"Folding sequence:\n\t" << *Sel <<
'\t' << *Cmp <<
'\t'
208 MachineInstr *Andn2 =
210 And->getOperand(0).getReg())
215 MachineOperand &Andn2SCC = Andn2->
getOperand(3);
220 And->eraseFromParent();
243 [&](
const MachineInstr &
MI) {
244 return MI.readsRegister(CondReg, TRI);
250 Cmp->eraseFromParent();
257 if (
MRI->use_nodbg_empty(SelReg) && (IsKill ||
IsDead)) {
262 bool ShrinkSel = Sel->getOperand(0).readsReg();
263 Sel->eraseFromParent();
289bool SIOptimizeExecMaskingPreRA::optimizeElseBranch(MachineBasicBlock &
MBB) {
295 MachineInstr &SaveExecMI = *
First;
300 return MI.getOpcode() == LMC.XorTermOpc;
305 MachineInstr &XorTermMI = *
I;
313 MachineInstr *AndExecMI =
nullptr;
315 while (
I !=
First && !AndExecMI) {
316 if (
I->getOpcode() == LMC.
AndOpc &&
I->getOperand(0).getReg() == DstReg &&
317 I->getOperand(1).getReg() ==
Register(ExecReg))
330 for (MCRegUnit Unit :
TRI->regunits(ExecReg)) {
332 if (RegUnit.
find(StartIdx) != std::prev(RegUnit.
find(EndIdx)))
354 SIOptimizeExecMaskingPreRA(MF, &LIS).
run(MF);
358bool SIOptimizeExecMaskingPreRALegacy::runOnMachineFunction(
363 auto *LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
364 return SIOptimizeExecMaskingPreRA(MF, LIS).run(MF);
374 for (MachineBasicBlock &
MBB : MF) {
376 if (optimizeElseBranch(
MBB)) {
377 RecalcRegs.insert(AMDGPU::SCC);
381 if (optimizeVcndVcmpPair(
MBB)) {
382 RecalcRegs.insert(AMDGPU::VCC_LO);
383 RecalcRegs.insert(AMDGPU::VCC_HI);
384 RecalcRegs.insert(AMDGPU::SCC);
398 if (
Term.getOpcode() != AMDGPU::S_ENDPGM ||
Term.getNumOperands() != 1)
401 SmallVector<MachineBasicBlock*, 4> Blocks({&
MBB});
403 while (!Blocks.empty()) {
404 auto *CurBB = Blocks.pop_back_val();
405 auto I = CurBB->rbegin(),
E = CurBB->rend();
407 if (
I->isUnconditionalBranch() ||
I->getOpcode() == AMDGPU::S_ENDPGM)
409 else if (
I->isBranch())
414 if (
I->isDebugInstr()) {
419 if (
I->mayStore() ||
I->isBarrier() ||
I->isCall() ||
420 I->hasUnmodeledSideEffects() ||
I->hasOrderedMemoryRef())
424 <<
"Removing no effect instruction: " << *
I <<
'\n');
426 for (
auto &
Op :
I->operands()) {
428 RecalcRegs.insert(
Op.getReg());
431 auto Next = std::next(
I);
433 I->eraseFromParent();
443 for (
auto *Pred : CurBB->predecessors()) {
444 if (Pred->succ_size() == 1)
445 Blocks.push_back(Pred);
458 unsigned ScanThreshold = 10;
460 && ScanThreshold--; ++
I) {
462 if (!(
I->isFullCopy() &&
I->getOperand(1).getReg() ==
Register(ExecReg)))
465 Register SavedExec =
I->getOperand(0).getReg();
466 if (SavedExec.
isVirtual() &&
MRI->hasOneNonDBGUse(SavedExec)) {
467 MachineInstr *SingleExecUser = &*
MRI->use_instr_nodbg_begin(SavedExec);
473 static_cast<unsigned>(Idx) <
475 TII->isOperandLegal(*SingleExecUser, Idx, &
I->getOperand(1))) {
478 I->eraseFromParent();
479 MRI->replaceRegWith(SavedExec, ExecReg);
489 for (
auto Reg : RecalcRegs) {
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Provides AMDGPU specific target descriptions.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
Register const TargetRegisterInfo * TRI
Promote Memory to Register
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
static bool isDefBetween(Register Reg, SlotIndex First, SlotIndex Last, const MachineRegisterInfo *MRI, const LiveIntervals *LIS)
static bool isDefBetween(const LiveRange &LR, SlotIndex AndIdx, SlotIndex SelIdx)
SI Optimize VGPR LiveRange
static const LaneMaskConstants & get(const GCNSubtarget &ST)
const unsigned OrSaveExecOpc
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesAll()
Set by analyses that do not transform their input at all.
Implements a dense probed hash-table based set.
FunctionPass class - This class is used to implement most global optimizations.
LLVM_ABI Result run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
void removeAllRegUnitsForPhysReg(MCRegister Reg)
Remove associated live ranges for the register units associated with Reg.
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
void RemoveMachineInstrFromMaps(MachineInstr &MI)
LiveInterval & getInterval(Register Reg)
void removeInterval(Register Reg)
Interval removal.
LiveRange & getRegUnit(MCRegUnit Unit)
Return the live range for register unit Unit.
LLVM_ABI void removeVRegDefAt(LiveInterval &LI, SlotIndex Pos)
Remove value number and related live segments of LI and its subranges that start at position Pos.
LLVM_ABI bool shrinkToUses(LiveInterval *li, SmallVectorImpl< MachineInstr * > *dead=nullptr)
After removing some uses of a register, shrink its live range to just the remaining uses.
LiveInterval & createAndComputeVirtRegInterval(Register Reg)
SlotIndex ReplaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
Result of a LiveRange query.
bool isDeadDef() const
Return true if this instruction has a dead def.
VNInfo * valueIn() const
Return the value that is live-in to the instruction.
VNInfo * valueOut() const
Return the value leaving the instruction, if any.
bool isKill() const
Return true if the live-in value is killed by this instruction.
This class represents the liveness of a register, stack slot, etc.
iterator_range< vni_iterator > vnis()
LiveQueryResult Query(SlotIndex Idx) const
Query Liveness at Idx.
LLVM_ABI iterator find(SlotIndex Pos)
find - Return an iterator pointing to the first segment that ends after Pos, or end().
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Wrapper class representing physical registers. Should be passed by value.
static MCRegister from(unsigned Val)
Check the provided unsigned value is a valid MCRegister.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< iterator > terminators()
reverse_iterator rbegin()
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
LLVM_ABI int findRegisterUseOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isKill=false) const
Returns the operand index that is a use of the specific register or -1 if it is not found.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
unsigned getSubReg() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsDead(bool Val=true)
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Wrapper class representing virtual and physical registers.
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
SlotIndex - An opaque wrapper around machine indexes.
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
StringRef - Represent a constant reference to a string, i.e.
self_iterator getIterator()
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
char & SIOptimizeExecMaskingPreRAID
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
@ And
Bitwise or logical AND of integers.
FunctionAddr VTableAddr Next
DWARFExpression::Operation Op
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
FunctionPass * createSIOptimizeExecMaskingPreRAPass()
constexpr RegState getUndefRegState(bool B)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.