LLVM 22.0.0git
GCNVOPDUtils.cpp
Go to the documentation of this file.
1//===- GCNVOPDUtils.cpp - GCN VOPD Utils ------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This file contains the AMDGPU DAG scheduling
10/// mutation to pair VOPD instructions back to back. It also contains
11// subroutines useful in the creation of VOPD instructions
12//
13//===----------------------------------------------------------------------===//
14
15#include "GCNVOPDUtils.h"
16#include "AMDGPUSubtarget.h"
17#include "GCNSubtarget.h"
19#include "SIInstrInfo.h"
21#include "llvm/ADT/STLExtras.h"
31#include "llvm/MC/MCInst.h"
32
33using namespace llvm;
34
35#define DEBUG_TYPE "gcn-vopd-utils"
36
38 const MachineInstr &FirstMI,
39 const MachineInstr &SecondMI, bool IsVOPD3) {
40 namespace VOPD = AMDGPU::VOPD;
41
42 const MachineFunction *MF = FirstMI.getMF();
43 const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
44
45 if (IsVOPD3 && !ST.hasVOPD3())
46 return false;
47 if (!IsVOPD3 && (TII.isVOP3(FirstMI) || TII.isVOP3(SecondMI)))
48 return false;
49 if (TII.isDPP(FirstMI) || TII.isDPP(SecondMI))
50 return false;
51
52 const SIRegisterInfo *TRI = dyn_cast<SIRegisterInfo>(ST.getRegisterInfo());
53 const MachineRegisterInfo &MRI = MF->getRegInfo();
54 // Literals also count against scalar bus limit
56 auto addLiteral = [&](const MachineOperand &Op) {
57 for (auto &Literal : UniqueLiterals) {
58 if (Literal->isIdenticalTo(Op))
59 return;
60 }
61 UniqueLiterals.push_back(&Op);
62 };
63 SmallVector<Register> UniqueScalarRegs;
64 assert([&]() -> bool {
65 for (auto MII = MachineBasicBlock::const_iterator(&FirstMI);
66 MII != FirstMI.getParent()->instr_end(); ++MII) {
67 if (&*MII == &SecondMI)
68 return true;
69 }
70 return false;
71 }() && "Expected FirstMI to precede SecondMI");
72 // Cannot pair dependent instructions
73 for (const auto &Use : SecondMI.uses())
74 if (Use.isReg() && FirstMI.modifiesRegister(Use.getReg(), TRI))
75 return false;
76
77 auto getVRegIdx = [&](unsigned OpcodeIdx, unsigned OperandIdx) {
78 const MachineInstr &MI = (OpcodeIdx == VOPD::X) ? FirstMI : SecondMI;
79 const MachineOperand &Operand = MI.getOperand(OperandIdx);
80 if (Operand.isReg() && TRI->isVectorRegister(MRI, Operand.getReg()))
81 return Operand.getReg();
82 return Register();
83 };
84
85 auto InstInfo =
86 AMDGPU::getVOPDInstInfo(FirstMI.getDesc(), SecondMI.getDesc());
87
88 for (auto CompIdx : VOPD::COMPONENTS) {
89 const MachineInstr &MI = (CompIdx == VOPD::X) ? FirstMI : SecondMI;
90
91 const MachineOperand &Src0 = *TII.getNamedOperand(MI, AMDGPU::OpName::src0);
92 if (Src0.isReg()) {
93 if (!TRI->isVectorRegister(MRI, Src0.getReg())) {
94 if (!is_contained(UniqueScalarRegs, Src0.getReg()))
95 UniqueScalarRegs.push_back(Src0.getReg());
96 }
97 } else if (!TII.isInlineConstant(Src0)) {
98 if (IsVOPD3)
99 return false;
100 addLiteral(Src0);
101 }
102
103 if (InstInfo[CompIdx].hasMandatoryLiteral()) {
104 if (IsVOPD3)
105 return false;
106
107 auto CompOprIdx = InstInfo[CompIdx].getMandatoryLiteralCompOperandIndex();
108 addLiteral(MI.getOperand(CompOprIdx));
109 }
110 if (MI.getDesc().hasImplicitUseOfPhysReg(AMDGPU::VCC))
111 UniqueScalarRegs.push_back(AMDGPU::VCC_LO);
112
113 if (IsVOPD3) {
114 for (auto OpName : {AMDGPU::OpName::src1, AMDGPU::OpName::src2}) {
115 const MachineOperand *Src = TII.getNamedOperand(MI, OpName);
116 if (!Src)
117 continue;
118 if (OpName == AMDGPU::OpName::src2) {
119 if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::bitop3))
120 continue;
121 if (MI.getOpcode() == AMDGPU::V_CNDMASK_B32_e64) {
122 UniqueScalarRegs.push_back(Src->getReg());
123 continue;
124 }
125 }
126 if (!Src->isReg() || !TRI->isVGPR(MRI, Src->getReg()))
127 return false;
128 }
129
130 for (auto OpName : {AMDGPU::OpName::clamp, AMDGPU::OpName::omod,
131 AMDGPU::OpName::op_sel}) {
132 if (TII.hasModifiersSet(MI, OpName))
133 return false;
134 }
135
136 // Neg is allowed, other modifiers are not. NB: even though sext has the
137 // same value as neg, there are no combinable instructions with sext.
138 for (auto OpName :
139 {AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers,
140 AMDGPU::OpName::src2_modifiers}) {
141 const MachineOperand *Mods = TII.getNamedOperand(MI, OpName);
142 if (Mods && (Mods->getImm() & ~SISrcMods::NEG))
143 return false;
144 }
145 }
146 }
147
148 if (UniqueLiterals.size() > 1)
149 return false;
150 if ((UniqueLiterals.size() + UniqueScalarRegs.size()) > 2)
151 return false;
152
153 // On GFX12+ if both OpX and OpY are V_MOV_B32 then OPY uses SRC2
154 // source-cache.
155 bool SkipSrc = ST.getGeneration() >= AMDGPUSubtarget::GFX12 &&
156 FirstMI.getOpcode() == AMDGPU::V_MOV_B32_e32 &&
157 SecondMI.getOpcode() == AMDGPU::V_MOV_B32_e32;
158 bool AllowSameVGPR = ST.hasGFX1250Insts();
159
160 if (InstInfo.hasInvalidOperand(getVRegIdx, *TRI, SkipSrc, AllowSameVGPR,
161 IsVOPD3))
162 return false;
163
164 if (IsVOPD3) {
165 // BITOP3 can be converted to DUAL_BITOP2 only if src2 is zero.
166 if (AMDGPU::hasNamedOperand(SecondMI.getOpcode(), AMDGPU::OpName::bitop3)) {
167 const MachineOperand &Src2 =
168 *TII.getNamedOperand(SecondMI, AMDGPU::OpName::src2);
169 if (!Src2.isImm() || Src2.getImm())
170 return false;
171 }
172 if (AMDGPU::hasNamedOperand(FirstMI.getOpcode(), AMDGPU::OpName::bitop3)) {
173 const MachineOperand &Src2 =
174 *TII.getNamedOperand(FirstMI, AMDGPU::OpName::src2);
175 if (!Src2.isImm() || Src2.getImm())
176 return false;
177 }
178 }
179
180 LLVM_DEBUG(dbgs() << "VOPD Reg Constraints Passed\n\tX: " << FirstMI
181 << "\n\tY: " << SecondMI << "\n");
182 return true;
183}
184
185/// Check if the instr pair, FirstMI and SecondMI, should be scheduled
186/// together. Given SecondMI, when FirstMI is unspecified, then check if
187/// SecondMI may be part of a fused pair at all.
189 const TargetSubtargetInfo &TSI,
190 const MachineInstr *FirstMI,
191 const MachineInstr &SecondMI) {
192 const SIInstrInfo &STII = static_cast<const SIInstrInfo &>(TII);
193 const GCNSubtarget &ST = STII.getSubtarget();
194 unsigned EncodingFamily = AMDGPU::getVOPDEncodingFamily(ST);
195 unsigned Opc2 = SecondMI.getOpcode();
196
197 const auto checkVOPD = [&](bool VOPD3) -> bool {
198 auto SecondCanBeVOPD = AMDGPU::getCanBeVOPD(Opc2, EncodingFamily, VOPD3);
199
200 // One instruction case
201 if (!FirstMI)
202 return SecondCanBeVOPD.Y || SecondCanBeVOPD.X;
203
204 unsigned Opc = FirstMI->getOpcode();
205 auto FirstCanBeVOPD = AMDGPU::getCanBeVOPD(Opc, EncodingFamily, VOPD3);
206
207 if (!((FirstCanBeVOPD.X && SecondCanBeVOPD.Y) ||
208 (FirstCanBeVOPD.Y && SecondCanBeVOPD.X)))
209 return false;
210
211 return checkVOPDRegConstraints(STII, *FirstMI, SecondMI, VOPD3);
212 };
213
214 return checkVOPD(false) || (ST.hasVOPD3() && checkVOPD(true));
215}
216
217namespace {
218/// Adapts design from MacroFusion
219/// Puts valid candidate instructions back-to-back so they can easily
220/// be turned into VOPD instructions
221/// Greedily pairs instruction candidates. O(n^2) algorithm.
222struct VOPDPairingMutation : ScheduleDAGMutation {
223 MacroFusionPredTy shouldScheduleAdjacent; // NOLINT: function pointer
224
225 VOPDPairingMutation(
226 MacroFusionPredTy shouldScheduleAdjacent) // NOLINT: function pointer
228
229 void apply(ScheduleDAGInstrs *DAG) override {
230 const TargetInstrInfo &TII = *DAG->TII;
231 const GCNSubtarget &ST = DAG->MF.getSubtarget<GCNSubtarget>();
232 if (!AMDGPU::hasVOPD(ST) || !ST.isWave32()) {
233 LLVM_DEBUG(dbgs() << "Target does not support VOPDPairingMutation\n");
234 return;
235 }
236
237 std::vector<SUnit>::iterator ISUI, JSUI;
238 for (ISUI = DAG->SUnits.begin(); ISUI != DAG->SUnits.end(); ++ISUI) {
239 const MachineInstr *IMI = ISUI->getInstr();
240 if (!shouldScheduleAdjacent(TII, ST, nullptr, *IMI))
241 continue;
242 if (!hasLessThanNumFused(*ISUI, 2))
243 continue;
244
245 for (JSUI = ISUI + 1; JSUI != DAG->SUnits.end(); ++JSUI) {
246 if (JSUI->isBoundaryNode())
247 continue;
248 const MachineInstr *JMI = JSUI->getInstr();
249 if (!hasLessThanNumFused(*JSUI, 2) ||
250 !shouldScheduleAdjacent(TII, ST, IMI, *JMI))
251 continue;
252 if (fuseInstructionPair(*DAG, *ISUI, *JSUI))
253 break;
254 }
255 }
256 LLVM_DEBUG(dbgs() << "Completed VOPDPairingMutation\n");
257 }
258};
259} // namespace
260
261std::unique_ptr<ScheduleDAGMutation> llvm::createVOPDPairingMutation() {
262 return std::make_unique<VOPDPairingMutation>(shouldScheduleVOPDAdjacent);
263}
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool shouldScheduleAdjacent(const TargetInstrInfo &TII, const TargetSubtargetInfo &TSI, const MachineInstr *FirstMI, const MachineInstr &SecondMI)
Check if the instr pair, FirstMI and SecondMI, should be fused together.
const TargetInstrInfo & TII
Provides AMDGPU specific target descriptions.
Base class for AMDGPU specific classes of TargetSubtarget.
AMD GCN specific subclass of TargetSubtarget.
static bool shouldScheduleVOPDAdjacent(const TargetInstrInfo &TII, const TargetSubtargetInfo &TSI, const MachineInstr *FirstMI, const MachineInstr &SecondMI)
Check if the instr pair, FirstMI and SecondMI, should be scheduled together.
IRTranslator LLVM IR MI
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
Interface definition for SIInstrInfo.
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:114
MachineInstrBundleIterator< const MachineInstr > const_iterator
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
bool modifiesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr modifies (fully define or partially define) the specified register.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
mop_range uses()
Returns all operands which may be register uses.
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const GCNSubtarget & getSubtarget() const
const TargetInstrInfo * TII
Target instruction information.
std::vector< SUnit > SUnits
The scheduling units.
MachineFunction & MF
Machine function.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
TargetInstrInfo - Interface to description of machine instruction set.
TargetSubtargetInfo - Generic base class for all target subtargets.
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
bool hasVOPD(const MCSubtargetInfo &STI)
void apply(Opt *O, const Mod &M, const Mods &... Ms)
This is an optimization pass for GlobalISel generic memory operations.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
std::unique_ptr< ScheduleDAGMutation > createVOPDPairingMutation()
bool checkVOPDRegConstraints(const SIInstrInfo &TII, const MachineInstr &FirstMI, const MachineInstr &SecondMI, bool IsVOPD3)
LLVM_ABI bool fuseInstructionPair(ScheduleDAGInstrs &DAG, SUnit &FirstSU, SUnit &SecondSU)
Create an artificial edge between FirstSU and SecondSU.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
DWARFExpression::Operation Op
bool(*)(const TargetInstrInfo &TII, const TargetSubtargetInfo &STI, const MachineInstr *FirstMI, const MachineInstr &SecondMI) MacroFusionPredTy
Check if the instr pair, FirstMI and SecondMI, should be fused together.
Definition MacroFusion.h:33
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1909
LLVM_ABI bool hasLessThanNumFused(const SUnit &SU, unsigned FuseLimit)
Checks if the number of cluster edges between SU and its predecessors is less than FuseLimit.