LLVM 23.0.0git
GCNVOPDUtils.cpp
Go to the documentation of this file.
1//===- GCNVOPDUtils.cpp - GCN VOPD Utils ------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This file contains the AMDGPU DAG scheduling
10/// mutation to pair VOPD instructions back to back. It also contains
11// subroutines useful in the creation of VOPD instructions
12//
13//===----------------------------------------------------------------------===//
14
15#include "GCNVOPDUtils.h"
16#include "AMDGPUSubtarget.h"
17#include "GCNSubtarget.h"
19#include "SIInstrInfo.h"
21#include "llvm/ADT/STLExtras.h"
31#include "llvm/MC/MCInst.h"
32
33using namespace llvm;
34
35#define DEBUG_TYPE "gcn-vopd-utils"
36
38 const MachineInstr &MIX,
39 const MachineInstr &MIY, bool IsVOPD3) {
40 namespace VOPD = AMDGPU::VOPD;
41
42 const MachineFunction *MF = MIX.getMF();
43 const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
44
45 if (IsVOPD3 && !ST.hasVOPD3())
46 return false;
47 if (!IsVOPD3 && (TII.isVOP3(MIX) || TII.isVOP3(MIY)))
48 return false;
49 if (TII.isDPP(MIX) || TII.isDPP(MIY))
50 return false;
51
52 const SIRegisterInfo *TRI = dyn_cast<SIRegisterInfo>(ST.getRegisterInfo());
53 const MachineRegisterInfo &MRI = MF->getRegInfo();
54 // Literals also count against scalar bus limit
56 auto addLiteral = [&](const MachineOperand &Op) {
57 for (auto &Literal : UniqueLiterals) {
58 if (Literal->isIdenticalTo(Op))
59 return;
60 }
61 UniqueLiterals.push_back(&Op);
62 };
63 SmallSet<Register, 4> UniqueScalarRegs;
64
65 // MIX must not modify any registers used by MIY.
66 for (const auto &Use : MIY.uses())
67 if (Use.isReg() && MIX.modifiesRegister(Use.getReg(), TRI))
68 return false;
69
70 auto getVRegIdx = [&](unsigned OpcodeIdx, unsigned OperandIdx) {
71 const MachineInstr &MI = (OpcodeIdx == VOPD::X) ? MIX : MIY;
72 const MachineOperand &Operand = MI.getOperand(OperandIdx);
73 if (Operand.isReg() && TRI->isVectorRegister(MRI, Operand.getReg()))
74 return Operand.getReg();
75 return Register();
76 };
77
78 auto InstInfo = AMDGPU::getVOPDInstInfo(MIX.getDesc(), MIY.getDesc());
79
80 for (auto CompIdx : VOPD::COMPONENTS) {
81 const MachineInstr &MI = (CompIdx == VOPD::X) ? MIX : MIY;
82
83 const MachineOperand &Src0 = *TII.getNamedOperand(MI, AMDGPU::OpName::src0);
84 if (Src0.isReg()) {
85 if (!TRI->isVectorRegister(MRI, Src0.getReg())) {
86 UniqueScalarRegs.insert(Src0.getReg());
87 }
88 } else if (!TII.isInlineConstant(Src0)) {
89 if (IsVOPD3)
90 return false;
91 addLiteral(Src0);
92 }
93
94 if (InstInfo[CompIdx].hasMandatoryLiteral()) {
95 if (IsVOPD3)
96 return false;
97
98 auto CompOprIdx = InstInfo[CompIdx].getMandatoryLiteralCompOperandIndex();
99 addLiteral(MI.getOperand(CompOprIdx));
100 }
101 if (MI.getDesc().hasImplicitUseOfPhysReg(AMDGPU::VCC))
102 UniqueScalarRegs.insert(AMDGPU::VCC_LO);
103
104 if (IsVOPD3) {
105 for (auto OpName : {AMDGPU::OpName::src1, AMDGPU::OpName::src2}) {
106 const MachineOperand *Src = TII.getNamedOperand(MI, OpName);
107 if (!Src)
108 continue;
109 if (OpName == AMDGPU::OpName::src2) {
110 if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::bitop3))
111 continue;
112 if (MI.getOpcode() == AMDGPU::V_CNDMASK_B32_e64) {
113 UniqueScalarRegs.insert(Src->getReg());
114 continue;
115 }
116 }
117 if (!Src->isReg() || !TRI->isVGPR(MRI, Src->getReg()))
118 return false;
119 }
120
121 for (auto OpName : {AMDGPU::OpName::clamp, AMDGPU::OpName::omod,
122 AMDGPU::OpName::op_sel}) {
123 if (TII.hasModifiersSet(MI, OpName))
124 return false;
125 }
126
127 // Neg is allowed, other modifiers are not. NB: even though sext has the
128 // same value as neg, there are no combinable instructions with sext.
129 for (auto OpName :
130 {AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers,
131 AMDGPU::OpName::src2_modifiers}) {
132 const MachineOperand *Mods = TII.getNamedOperand(MI, OpName);
133 if (Mods && (Mods->getImm() & ~SISrcMods::NEG))
134 return false;
135 }
136 }
137 }
138
139 if (UniqueLiterals.size() > 1)
140 return false;
141 if ((UniqueLiterals.size() + UniqueScalarRegs.size()) > 2)
142 return false;
143
144 // On GFX1170+ if both OpX and OpY are V_MOV_B32 then OPY uses SRC2
145 // source-cache.
146 bool SkipSrc = (ST.hasGFX11_7Insts() || ST.hasGFX12Insts()) &&
147 MIX.getOpcode() == AMDGPU::V_MOV_B32_e32 &&
148 MIY.getOpcode() == AMDGPU::V_MOV_B32_e32;
149 bool AllowSameVGPR = ST.hasGFX1250Insts();
150
151 if (InstInfo.hasInvalidOperand(getVRegIdx, *TRI, SkipSrc, AllowSameVGPR,
152 IsVOPD3))
153 return false;
154
155 if (IsVOPD3) {
156 // BITOP3 can be converted to DUAL_BITOP2 only if src2 is zero.
157 // MIX check is only relevant to scheduling?
158 if (AMDGPU::hasNamedOperand(MIX.getOpcode(), AMDGPU::OpName::bitop3)) {
159 const MachineOperand &Src2 =
160 *TII.getNamedOperand(MIX, AMDGPU::OpName::src2);
161 if (!Src2.isImm() || Src2.getImm())
162 return false;
163 }
164 if (AMDGPU::hasNamedOperand(MIY.getOpcode(), AMDGPU::OpName::bitop3)) {
165 const MachineOperand &Src2 =
166 *TII.getNamedOperand(MIY, AMDGPU::OpName::src2);
167 if (!Src2.isImm() || Src2.getImm())
168 return false;
169 }
170 }
171
172 LLVM_DEBUG(dbgs() << "VOPD Reg Constraints Passed\n\tX: " << MIX
173 << "\n\tY: " << MIY << "\n");
174 return true;
175}
176
177/// Check if the instr pair, FirstMI and SecondMI, should be scheduled
178/// together. Given SecondMI, when FirstMI is unspecified, then check if
179/// SecondMI may be part of a fused pair at all.
181 const TargetSubtargetInfo &TSI,
182 const MachineInstr *FirstMI,
183 const MachineInstr &SecondMI) {
184 const SIInstrInfo &STII = static_cast<const SIInstrInfo &>(TII);
185 const GCNSubtarget &ST = STII.getSubtarget();
186 unsigned EncodingFamily = AMDGPU::getVOPDEncodingFamily(ST);
187 unsigned Opc2 = SecondMI.getOpcode();
188
189 const auto checkVOPD = [&](bool VOPD3) -> bool {
190 auto SecondCanBeVOPD = AMDGPU::getCanBeVOPD(Opc2, EncodingFamily, VOPD3);
191
192 // One instruction case
193 if (!FirstMI)
194 return SecondCanBeVOPD.Y || SecondCanBeVOPD.X;
195
196 unsigned Opc = FirstMI->getOpcode();
197 auto FirstCanBeVOPD = AMDGPU::getCanBeVOPD(Opc, EncodingFamily, VOPD3);
198
199 if (!((FirstCanBeVOPD.X && SecondCanBeVOPD.Y) ||
200 (FirstCanBeVOPD.Y && SecondCanBeVOPD.X)))
201 return false;
202
203#ifdef EXPENSIVE_CHECKS
204 assert([&]() -> bool {
205 for (auto MII = MachineBasicBlock::const_iterator(FirstMI);
206 MII != FirstMI->getParent()->instr_end(); ++MII) {
207 if (&*MII == &SecondMI)
208 return true;
209 }
210 return false;
211 }() && "Expected FirstMI to precede SecondMI");
212#endif
213
214 return checkVOPDRegConstraints(STII, *FirstMI, SecondMI, VOPD3);
215 };
216
217 return checkVOPD(false) || (ST.hasVOPD3() && checkVOPD(true));
218}
219
220namespace {
221/// Adapts design from MacroFusion
222/// Puts valid candidate instructions back-to-back so they can easily
223/// be turned into VOPD instructions
224/// Greedily pairs instruction candidates. O(n^2) algorithm.
225struct VOPDPairingMutation : ScheduleDAGMutation {
226 MacroFusionPredTy shouldScheduleAdjacent; // NOLINT: function pointer
227
228 VOPDPairingMutation(
229 MacroFusionPredTy shouldScheduleAdjacent) // NOLINT: function pointer
231
232 void apply(ScheduleDAGInstrs *DAG) override {
233 const TargetInstrInfo &TII = *DAG->TII;
234 const GCNSubtarget &ST = DAG->MF.getSubtarget<GCNSubtarget>();
235 if (!AMDGPU::hasVOPD(ST) || !ST.isWave32()) {
236 LLVM_DEBUG(dbgs() << "Target does not support VOPDPairingMutation\n");
237 return;
238 }
239
240 std::vector<SUnit>::iterator ISUI, JSUI;
241 for (ISUI = DAG->SUnits.begin(); ISUI != DAG->SUnits.end(); ++ISUI) {
242 const MachineInstr *IMI = ISUI->getInstr();
243 if (!shouldScheduleAdjacent(TII, ST, nullptr, *IMI))
244 continue;
245 if (!hasLessThanNumFused(*ISUI, 2))
246 continue;
247
248 for (JSUI = ISUI + 1; JSUI != DAG->SUnits.end(); ++JSUI) {
249 if (JSUI->isBoundaryNode())
250 continue;
251 const MachineInstr *JMI = JSUI->getInstr();
252 if (!hasLessThanNumFused(*JSUI, 2) ||
253 !shouldScheduleAdjacent(TII, ST, IMI, *JMI))
254 continue;
255 if (fuseInstructionPair(*DAG, *ISUI, *JSUI))
256 break;
257 }
258 }
259 LLVM_DEBUG(dbgs() << "Completed VOPDPairingMutation\n");
260 }
261};
262} // namespace
263
264std::unique_ptr<ScheduleDAGMutation> llvm::createVOPDPairingMutation() {
265 return std::make_unique<VOPDPairingMutation>(shouldScheduleVOPDAdjacent);
266}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool shouldScheduleAdjacent(const TargetInstrInfo &TII, const TargetSubtargetInfo &TSI, const MachineInstr *FirstMI, const MachineInstr &SecondMI)
Check if the instr pair, FirstMI and SecondMI, should be fused together.
Provides AMDGPU specific target descriptions.
Base class for AMDGPU specific classes of TargetSubtarget.
AMD GCN specific subclass of TargetSubtarget.
static bool shouldScheduleVOPDAdjacent(const TargetInstrInfo &TII, const TargetSubtargetInfo &TSI, const MachineInstr *FirstMI, const MachineInstr &SecondMI)
Check if the instr pair, FirstMI and SecondMI, should be scheduled together.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
Interface definition for SIInstrInfo.
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:114
MachineInstrBundleIterator< const MachineInstr > const_iterator
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
bool modifiesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr modifies (fully define or partially define) the specified register.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
mop_range uses()
Returns all operands which may be register uses.
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const GCNSubtarget & getSubtarget() const
const TargetInstrInfo * TII
Target instruction information.
std::vector< SUnit > SUnits
The scheduling units.
MachineFunction & MF
Machine function.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:134
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:184
size_type size() const
Definition SmallSet.h:171
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
TargetInstrInfo - Interface to description of machine instruction set.
TargetSubtargetInfo - Generic base class for all target subtargets.
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
bool hasVOPD(const MCSubtargetInfo &STI)
void apply(Opt *O, const Mod &M, const Mods &... Ms)
This is an optimization pass for GlobalISel generic memory operations.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
std::unique_ptr< ScheduleDAGMutation > createVOPDPairingMutation()
bool checkVOPDRegConstraints(const SIInstrInfo &TII, const MachineInstr &FirstMI, const MachineInstr &SecondMI, bool IsVOPD3)
LLVM_ABI bool fuseInstructionPair(ScheduleDAGInstrs &DAG, SUnit &FirstSU, SUnit &SecondSU)
Create an artificial edge between FirstSU and SecondSU.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
DWARFExpression::Operation Op
bool(*)(const TargetInstrInfo &TII, const TargetSubtargetInfo &STI, const MachineInstr *FirstMI, const MachineInstr &SecondMI) MacroFusionPredTy
Check if the instr pair, FirstMI and SecondMI, should be fused together.
Definition MacroFusion.h:33
LLVM_ABI bool hasLessThanNumFused(const SUnit &SU, unsigned FuseLimit)
Checks if the number of cluster edges between SU and its predecessors is less than FuseLimit.