35#define DEBUG_TYPE "gcn-vopd-utils"
46 if (IsVOPD3 && !ST.hasVOPD3())
48 if (!IsVOPD3 && (
TII.isVOP3(MIX) ||
TII.isVOP3(MIY)))
50 if (
TII.isDPP(MIX) ||
TII.isDPP(MIY))
58 for (
auto &
Literal : UniqueLiterals) {
62 UniqueLiterals.push_back(&
Op);
66 auto getVRegIdx = [&](
unsigned OpcodeIdx,
unsigned OperandIdx) {
69 if (Operand.
isReg() &&
TRI->isVectorRegister(MRI, Operand.
getReg()))
76 for (
auto CompIdx : VOPD::COMPONENTS) {
81 if (!
TRI->isVectorRegister(MRI, Src0.
getReg())) {
84 }
else if (!
TII.isInlineConstant(Src0)) {
90 if (InstInfo[CompIdx].hasMandatoryLiteral()) {
94 auto CompOprIdx = InstInfo[CompIdx].getMandatoryLiteralCompOperandIndex();
95 addLiteral(
MI.getOperand(CompOprIdx));
97 if (
MI.getDesc().hasImplicitUseOfPhysReg(AMDGPU::VCC))
98 UniqueScalarRegs.
insert(AMDGPU::VCC_LO);
101 for (
auto OpName : {AMDGPU::OpName::src1, AMDGPU::OpName::src2}) {
105 if (
OpName == AMDGPU::OpName::src2) {
108 if (
MI.getOpcode() == AMDGPU::V_CNDMASK_B32_e64) {
109 UniqueScalarRegs.
insert(Src->getReg());
113 if (!Src->isReg() || !
TRI->isVGPR(MRI, Src->getReg()))
117 for (
auto OpName : {AMDGPU::OpName::clamp, AMDGPU::OpName::omod,
118 AMDGPU::OpName::op_sel}) {
126 {AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers,
127 AMDGPU::OpName::src2_modifiers}) {
135 if (UniqueLiterals.
size() > 1)
137 if ((UniqueLiterals.
size() + UniqueScalarRegs.
size()) > 2)
142 bool SkipSrc = (ST.hasGFX11_7Insts() || ST.hasGFX12Insts()) &&
143 MIX.
getOpcode() == AMDGPU::V_MOV_B32_e32 &&
144 MIY.
getOpcode() == AMDGPU::V_MOV_B32_e32;
146 if (InstInfo.hasInvalidOperand(getVRegIdx, *
TRI, SkipSrc, AllowSameVGPR,
155 *
TII.getNamedOperand(MIX, AMDGPU::OpName::src2);
161 *
TII.getNamedOperand(MIY, AMDGPU::OpName::src2);
168 <<
"\n\tY: " << MIY <<
"\n");
174static std::optional<VOPDMatchInfo>
185 if (!(FirstCanBeVOPD.
X && SecondCanBeVOPD.
Y) &&
186 !(FirstCanBeVOPD.
Y && SecondCanBeVOPD.
X))
190 if (
TII.hasRAWDependency(FirstMI, SecondMI))
194 bool AllowSameVGPR = ST.hasGFX12Insts();
196 if (FirstCanBeVOPD.
X && SecondCanBeVOPD.
Y) {
201 if (FirstCanBeVOPD.
Y && SecondCanBeVOPD.
X) {
204 bool IsAntiDep =
TII.hasRAWDependency(SecondMI, FirstMI);
205 AllowSameVGPR &= !IsAntiDep;
206 if (IsAntiDep && !
TII.isVOPDAntidependencyAllowed(SecondMI))
243 auto checkCanBeVOPD = [&](
bool VOPD3) {
246 return CanBeVOPD.
Y || CanBeVOPD.
X;
248 return checkCanBeVOPD(
false) || (ST.hasVOPD3() && checkCanBeVOPD(
true));
251#ifdef EXPENSIVE_CHECKS
254 MII != FirstMI->
getParent()->instr_end(); ++MII) {
255 if (&*MII == &SecondMI)
259 }() &&
"Expected FirstMI to precede SecondMI");
272struct VOPDPairingMutation : ScheduleDAGMutation {
279 void apply(ScheduleDAGInstrs *DAG)
override {
280 const TargetInstrInfo &
TII = *DAG->
TII;
283 LLVM_DEBUG(
dbgs() <<
"Target does not support VOPDPairingMutation\n");
287 BitVector VOPDCapable(DAG->
SUnits.size());
290 for (
auto ISUI = DAG->
SUnits.begin(),
E = DAG->
SUnits.end(); ISUI !=
E;
292 const MachineInstr *IMI = ISUI->getInstr();
295 VOPDCapable[IIdx] =
true;
299 for (
auto ISUI = DAG->
SUnits.begin(),
E = DAG->
SUnits.end(); ISUI !=
E;
301 if (!VOPDCapable[IIdx])
303 const MachineInstr *IMI = ISUI->getInstr();
304 unsigned JIdx = IIdx + 1;
305 for (
auto JSUI = ISUI + 1; JSUI !=
E; ++JSUI, ++JIdx) {
306 if (!VOPDCapable[JIdx] || JSUI->isBoundaryNode())
308 const MachineInstr *JMI = JSUI->getInstr();
314 VOPDCapable[JIdx] =
false;
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool shouldScheduleAdjacent(const TargetInstrInfo &TII, const TargetSubtargetInfo &TSI, const MachineInstr *FirstMI, const MachineInstr &SecondMI)
Check if the instr pair, FirstMI and SecondMI, should be fused together.
Provides AMDGPU specific target descriptions.
Base class for AMDGPU specific classes of TargetSubtarget.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
AMD GCN specific subclass of TargetSubtarget.
static std::optional< VOPDMatchInfo > tryMatchVOPDPairVariant(const SIInstrInfo &TII, unsigned EncodingFamily, MachineInstr &FirstMI, MachineInstr &SecondMI, bool IsVOPD3)
Core pair-eligibility check for a single VOPD encoding variant (VOPD or VOPD3).
static bool shouldScheduleVOPDAdjacent(const TargetInstrInfo &TII, const TargetSubtargetInfo &TSI, const MachineInstr *FirstMI, const MachineInstr &SecondMI)
Check if the instr pair, FirstMI and SecondMI, should be scheduled together.
const HexagonInstrInfo * TII
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Interface definition for SIInstrInfo.
This file defines the SmallVector class.
MachineInstrBundleIterator< const MachineInstr > const_iterator
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const GCNSubtarget & getSubtarget() const
const TargetInstrInfo * TII
Target instruction information.
std::vector< SUnit > SUnits
The scheduling units.
MachineFunction & MF
Machine function.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
TargetInstrInfo - Interface to description of machine instruction set.
TargetSubtargetInfo - Generic base class for all target subtargets.
CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
bool hasVOPD(const MCSubtargetInfo &STI)
void apply(Opt *O, const Mod &M, const Mods &... Ms)
This is an optimization pass for GlobalISel generic memory operations.
std::unique_ptr< ScheduleDAGMutation > createVOPDPairingMutation()
LLVM_ABI bool fuseInstructionPair(ScheduleDAGInstrs &DAG, SUnit &FirstSU, SUnit &SecondSU)
Create an artificial edge between FirstSU and SecondSU.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool checkVOPDRegConstraints(const SIInstrInfo &TII, const MachineInstr &FirstMI, const MachineInstr &SecondMI, bool IsVOPD3, bool AllowSameVGPR)
DWARFExpression::Operation Op
bool(*)(const TargetInstrInfo &TII, const TargetSubtargetInfo &STI, const MachineInstr *FirstMI, const MachineInstr &SecondMI) MacroFusionPredTy
Check if the instr pair, FirstMI and SecondMI, should be fused together.
std::optional< VOPDMatchInfo > tryMatchVOPDPair(const SIInstrInfo &TII, MachineInstr &FirstMI, MachineInstr &SecondMI)
Check whether FirstMI and SecondMI can be combined into a VOPD instruction.
LLVM_ABI bool hasLessThanNumFused(const SUnit &SU, unsigned FuseLimit)
Checks if the number of cluster edges between SU and its predecessors is less than FuseLimit.
Describes a matched VOPD pair: which instruction is the X component and which is the Y component,...