LLVM 20.0.0git
GCNVOPDUtils.cpp
Go to the documentation of this file.
1//===- GCNVOPDUtils.cpp - GCN VOPD Utils ------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This file contains the AMDGPU DAG scheduling
10/// mutation to pair VOPD instructions back to back. It also contains
11// subroutines useful in the creation of VOPD instructions
12//
13//===----------------------------------------------------------------------===//
14
15#include "GCNVOPDUtils.h"
16#include "AMDGPUSubtarget.h"
17#include "GCNSubtarget.h"
19#include "SIInstrInfo.h"
21#include "llvm/ADT/STLExtras.h"
31#include "llvm/MC/MCInst.h"
32
33using namespace llvm;
34
35#define DEBUG_TYPE "gcn-vopd-utils"
36
38 const MachineInstr &FirstMI,
39 const MachineInstr &SecondMI) {
40 namespace VOPD = AMDGPU::VOPD;
41
42 const MachineFunction *MF = FirstMI.getMF();
43 const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
44 const SIRegisterInfo *TRI = dyn_cast<SIRegisterInfo>(ST.getRegisterInfo());
45 const MachineRegisterInfo &MRI = MF->getRegInfo();
46 // Literals also count against scalar bus limit
48 auto addLiteral = [&](const MachineOperand &Op) {
49 for (auto &Literal : UniqueLiterals) {
50 if (Literal->isIdenticalTo(Op))
51 return;
52 }
53 UniqueLiterals.push_back(&Op);
54 };
55 SmallVector<Register> UniqueScalarRegs;
56 assert([&]() -> bool {
57 for (auto MII = MachineBasicBlock::const_iterator(&FirstMI);
58 MII != FirstMI.getParent()->instr_end(); ++MII) {
59 if (&*MII == &SecondMI)
60 return true;
61 }
62 return false;
63 }() && "Expected FirstMI to precede SecondMI");
64 // Cannot pair dependent instructions
65 for (const auto &Use : SecondMI.uses())
66 if (Use.isReg() && FirstMI.modifiesRegister(Use.getReg(), TRI))
67 return false;
68
69 auto getVRegIdx = [&](unsigned OpcodeIdx, unsigned OperandIdx) {
70 const MachineInstr &MI = (OpcodeIdx == VOPD::X) ? FirstMI : SecondMI;
71 const MachineOperand &Operand = MI.getOperand(OperandIdx);
72 if (Operand.isReg() && TRI->isVectorRegister(MRI, Operand.getReg()))
73 return Operand.getReg();
74 return Register();
75 };
76
77 auto InstInfo =
78 AMDGPU::getVOPDInstInfo(FirstMI.getDesc(), SecondMI.getDesc());
79
80 for (auto CompIdx : VOPD::COMPONENTS) {
81 const MachineInstr &MI = (CompIdx == VOPD::X) ? FirstMI : SecondMI;
82
83 const MachineOperand &Src0 = MI.getOperand(VOPD::Component::SRC0);
84 if (Src0.isReg()) {
85 if (!TRI->isVectorRegister(MRI, Src0.getReg())) {
86 if (!is_contained(UniqueScalarRegs, Src0.getReg()))
87 UniqueScalarRegs.push_back(Src0.getReg());
88 }
89 } else {
90 if (!TII.isInlineConstant(MI, VOPD::Component::SRC0))
91 addLiteral(Src0);
92 }
93
94 if (InstInfo[CompIdx].hasMandatoryLiteral()) {
95 auto CompOprIdx = InstInfo[CompIdx].getMandatoryLiteralCompOperandIndex();
96 addLiteral(MI.getOperand(CompOprIdx));
97 }
98 if (MI.getDesc().hasImplicitUseOfPhysReg(AMDGPU::VCC))
99 UniqueScalarRegs.push_back(AMDGPU::VCC_LO);
100 }
101
102 if (UniqueLiterals.size() > 1)
103 return false;
104 if ((UniqueLiterals.size() + UniqueScalarRegs.size()) > 2)
105 return false;
106
107 // On GFX12 if both OpX and OpY are V_MOV_B32 then OPY uses SRC2 source-cache.
108 bool SkipSrc = ST.getGeneration() >= AMDGPUSubtarget::GFX12 &&
109 FirstMI.getOpcode() == AMDGPU::V_MOV_B32_e32 &&
110 SecondMI.getOpcode() == AMDGPU::V_MOV_B32_e32;
111
112 if (InstInfo.hasInvalidOperand(getVRegIdx, SkipSrc))
113 return false;
114
115 LLVM_DEBUG(dbgs() << "VOPD Reg Constraints Passed\n\tX: " << FirstMI
116 << "\n\tY: " << SecondMI << "\n");
117 return true;
118}
119
120/// Check if the instr pair, FirstMI and SecondMI, should be scheduled
121/// together. Given SecondMI, when FirstMI is unspecified, then check if
122/// SecondMI may be part of a fused pair at all.
124 const TargetSubtargetInfo &TSI,
125 const MachineInstr *FirstMI,
126 const MachineInstr &SecondMI) {
127 const SIInstrInfo &STII = static_cast<const SIInstrInfo &>(TII);
128 unsigned Opc2 = SecondMI.getOpcode();
129 auto SecondCanBeVOPD = AMDGPU::getCanBeVOPD(Opc2);
130
131 // One instruction case
132 if (!FirstMI)
133 return SecondCanBeVOPD.Y;
134
135 unsigned Opc = FirstMI->getOpcode();
136 auto FirstCanBeVOPD = AMDGPU::getCanBeVOPD(Opc);
137
138 if (!((FirstCanBeVOPD.X && SecondCanBeVOPD.Y) ||
139 (FirstCanBeVOPD.Y && SecondCanBeVOPD.X)))
140 return false;
141
142 return checkVOPDRegConstraints(STII, *FirstMI, SecondMI);
143}
144
145namespace {
146/// Adapts design from MacroFusion
147/// Puts valid candidate instructions back-to-back so they can easily
148/// be turned into VOPD instructions
149/// Greedily pairs instruction candidates. O(n^2) algorithm.
150struct VOPDPairingMutation : ScheduleDAGMutation {
151 MacroFusionPredTy shouldScheduleAdjacent; // NOLINT: function pointer
152
153 VOPDPairingMutation(
154 MacroFusionPredTy shouldScheduleAdjacent) // NOLINT: function pointer
156
157 void apply(ScheduleDAGInstrs *DAG) override {
158 const TargetInstrInfo &TII = *DAG->TII;
159 const GCNSubtarget &ST = DAG->MF.getSubtarget<GCNSubtarget>();
160 if (!AMDGPU::hasVOPD(ST) || !ST.isWave32()) {
161 LLVM_DEBUG(dbgs() << "Target does not support VOPDPairingMutation\n");
162 return;
163 }
164
165 std::vector<SUnit>::iterator ISUI, JSUI;
166 for (ISUI = DAG->SUnits.begin(); ISUI != DAG->SUnits.end(); ++ISUI) {
167 const MachineInstr *IMI = ISUI->getInstr();
168 if (!shouldScheduleAdjacent(TII, ST, nullptr, *IMI))
169 continue;
170 if (!hasLessThanNumFused(*ISUI, 2))
171 continue;
172
173 for (JSUI = ISUI + 1; JSUI != DAG->SUnits.end(); ++JSUI) {
174 if (JSUI->isBoundaryNode())
175 continue;
176 const MachineInstr *JMI = JSUI->getInstr();
177 if (!hasLessThanNumFused(*JSUI, 2) ||
178 !shouldScheduleAdjacent(TII, ST, IMI, *JMI))
179 continue;
180 if (fuseInstructionPair(*DAG, *ISUI, *JSUI))
181 break;
182 }
183 }
184 LLVM_DEBUG(dbgs() << "Completed VOPDPairingMutation\n");
185 }
186};
187} // namespace
188
189std::unique_ptr<ScheduleDAGMutation> llvm::createVOPDPairingMutation() {
190 return std::make_unique<VOPDPairingMutation>(shouldScheduleVOPDAdjacent);
191}
unsigned const MachineRegisterInfo * MRI
static bool shouldScheduleAdjacent(const TargetInstrInfo &TII, const TargetSubtargetInfo &TSI, const MachineInstr *FirstMI, const MachineInstr &SecondMI)
Check if the instr pair, FirstMI and SecondMI, should be fused together.
Provides AMDGPU specific target descriptions.
Base class for AMDGPU specific classes of TargetSubtarget.
#define LLVM_DEBUG(...)
Definition: Debug.h:106
AMD GCN specific subclass of TargetSubtarget.
static bool shouldScheduleVOPDAdjacent(const TargetInstrInfo &TII, const TargetSubtargetInfo &TSI, const MachineInstr *FirstMI, const MachineInstr &SecondMI)
Check if the instr pair, FirstMI and SecondMI, should be scheduled together.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
unsigned const TargetRegisterInfo * TRI
Interface definition for SIInstrInfo.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
This class represents an Operation in the Expression.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:575
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:347
iterator_range< mop_iterator > uses()
Returns a range that includes all operands which may be register uses.
Definition: MachineInstr.h:739
bool modifiesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr modifies (fully define or partially define) the specified register.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:572
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
A ScheduleDAG for scheduling lists of MachineInstr.
Mutate the DAG as a postpass after normal DAG building.
const TargetInstrInfo * TII
Target instruction information.
Definition: ScheduleDAG.h:575
std::vector< SUnit > SUnits
The scheduling units.
Definition: ScheduleDAG.h:579
MachineFunction & MF
Machine function.
Definition: ScheduleDAG.h:577
size_t size() const
Definition: SmallVector.h:78
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
TargetInstrInfo - Interface to description of machine instruction set.
TargetSubtargetInfo - Generic base class for all target subtargets.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
CanBeVOPD getCanBeVOPD(unsigned Opc)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
bool hasVOPD(const MCSubtargetInfo &STI)
void apply(Opt *O, const Mod &M, const Mods &... Ms)
Definition: CommandLine.h:1309
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
std::unique_ptr< ScheduleDAGMutation > createVOPDPairingMutation()
bool fuseInstructionPair(ScheduleDAGInstrs &DAG, SUnit &FirstSU, SUnit &SecondSU)
Create an artificial edge between FirstSU and SecondSU.
Definition: MacroFusion.cpp:53
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool(*)(const TargetInstrInfo &TII, const TargetSubtargetInfo &STI, const MachineInstr *FirstMI, const MachineInstr &SecondMI) MacroFusionPredTy
Check if the instr pair, FirstMI and SecondMI, should be fused together.
Definition: MacroFusion.h:35
DWARFExpression::Operation Op
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1903
bool hasLessThanNumFused(const SUnit &SU, unsigned FuseLimit)
Checks if the number of cluster edges between SU and its predecessors is less than FuseLimit.
Definition: MacroFusion.cpp:46
bool checkVOPDRegConstraints(const SIInstrInfo &TII, const MachineInstr &FirstMI, const MachineInstr &SecondMI)