LLVM 18.0.0git
GCNVOPDUtils.cpp
Go to the documentation of this file.
1//===- GCNVOPDUtils.cpp - GCN VOPD Utils ------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This file contains the AMDGPU DAG scheduling
10/// mutation to pair VOPD instructions back to back. It also contains
11// subroutines useful in the creation of VOPD instructions
12//
13//===----------------------------------------------------------------------===//
14
15#include "GCNVOPDUtils.h"
16#include "AMDGPUSubtarget.h"
17#include "GCNSubtarget.h"
19#include "SIInstrInfo.h"
21#include "llvm/ADT/STLExtras.h"
31#include "llvm/MC/MCInst.h"
32
33using namespace llvm;
34
35#define DEBUG_TYPE "gcn-vopd-utils"
36
38 const MachineInstr &FirstMI,
39 const MachineInstr &SecondMI) {
40 namespace VOPD = AMDGPU::VOPD;
41
42 const MachineFunction *MF = FirstMI.getMF();
43 const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
44 const SIRegisterInfo *TRI = dyn_cast<SIRegisterInfo>(ST.getRegisterInfo());
45 const MachineRegisterInfo &MRI = MF->getRegInfo();
46 // Literals also count against scalar bus limit
48 auto addLiteral = [&](const MachineOperand &Op) {
49 for (auto &Literal : UniqueLiterals) {
50 if (Literal->isIdenticalTo(Op))
51 return;
52 }
53 UniqueLiterals.push_back(&Op);
54 };
55 SmallVector<Register> UniqueScalarRegs;
56 assert([&]() -> bool {
57 for (auto MII = MachineBasicBlock::const_iterator(&FirstMI);
58 MII != FirstMI.getParent()->instr_end(); ++MII) {
59 if (&*MII == &SecondMI)
60 return true;
61 }
62 return false;
63 }() && "Expected FirstMI to precede SecondMI");
64 // Cannot pair dependent instructions
65 for (const auto &Use : SecondMI.uses())
66 if (Use.isReg() && FirstMI.modifiesRegister(Use.getReg(), TRI))
67 return false;
68
69 auto getVRegIdx = [&](unsigned OpcodeIdx, unsigned OperandIdx) {
70 const MachineInstr &MI = (OpcodeIdx == VOPD::X) ? FirstMI : SecondMI;
71 const MachineOperand &Operand = MI.getOperand(OperandIdx);
72 if (Operand.isReg() && TRI->isVectorRegister(MRI, Operand.getReg()))
73 return Operand.getReg();
74 return Register();
75 };
76
77 auto InstInfo =
78 AMDGPU::getVOPDInstInfo(FirstMI.getDesc(), SecondMI.getDesc());
79
80 for (auto CompIdx : VOPD::COMPONENTS) {
81 const MachineInstr &MI = (CompIdx == VOPD::X) ? FirstMI : SecondMI;
82
83 const MachineOperand &Src0 = MI.getOperand(VOPD::Component::SRC0);
84 if (Src0.isReg()) {
85 if (!TRI->isVectorRegister(MRI, Src0.getReg())) {
86 if (!is_contained(UniqueScalarRegs, Src0.getReg()))
87 UniqueScalarRegs.push_back(Src0.getReg());
88 }
89 } else {
90 if (!TII.isInlineConstant(MI, VOPD::Component::SRC0))
91 addLiteral(Src0);
92 }
93
94 if (InstInfo[CompIdx].hasMandatoryLiteral()) {
95 auto CompOprIdx = InstInfo[CompIdx].getMandatoryLiteralCompOperandIndex();
96 addLiteral(MI.getOperand(CompOprIdx));
97 }
98 if (MI.getDesc().hasImplicitUseOfPhysReg(AMDGPU::VCC))
99 UniqueScalarRegs.push_back(AMDGPU::VCC_LO);
100 }
101
102 if (UniqueLiterals.size() > 1)
103 return false;
104 if ((UniqueLiterals.size() + UniqueScalarRegs.size()) > 2)
105 return false;
106 if (InstInfo.hasInvalidOperand(getVRegIdx))
107 return false;
108
109 LLVM_DEBUG(dbgs() << "VOPD Reg Constraints Passed\n\tX: " << FirstMI
110 << "\n\tY: " << SecondMI << "\n");
111 return true;
112}
113
114/// Check if the instr pair, FirstMI and SecondMI, should be scheduled
115/// together. Given SecondMI, when FirstMI is unspecified, then check if
116/// SecondMI may be part of a fused pair at all.
118 const TargetSubtargetInfo &TSI,
119 const MachineInstr *FirstMI,
120 const MachineInstr &SecondMI) {
121 const SIInstrInfo &STII = static_cast<const SIInstrInfo &>(TII);
122 unsigned Opc2 = SecondMI.getOpcode();
123 auto SecondCanBeVOPD = AMDGPU::getCanBeVOPD(Opc2);
124
125 // One instruction case
126 if (!FirstMI)
127 return SecondCanBeVOPD.Y;
128
129 unsigned Opc = FirstMI->getOpcode();
130 auto FirstCanBeVOPD = AMDGPU::getCanBeVOPD(Opc);
131
132 if (!((FirstCanBeVOPD.X && SecondCanBeVOPD.Y) ||
133 (FirstCanBeVOPD.Y && SecondCanBeVOPD.X)))
134 return false;
135
136 return checkVOPDRegConstraints(STII, *FirstMI, SecondMI);
137}
138
139namespace {
140/// Adapts design from MacroFusion
141/// Puts valid candidate instructions back-to-back so they can easily
142/// be turned into VOPD instructions
143/// Greedily pairs instruction candidates. O(n^2) algorithm.
144struct VOPDPairingMutation : ScheduleDAGMutation {
145 ShouldSchedulePredTy shouldScheduleAdjacent; // NOLINT: function pointer
146
147 VOPDPairingMutation(
148 ShouldSchedulePredTy shouldScheduleAdjacent) // NOLINT: function pointer
150
151 void apply(ScheduleDAGInstrs *DAG) override {
152 const TargetInstrInfo &TII = *DAG->TII;
153 const GCNSubtarget &ST = DAG->MF.getSubtarget<GCNSubtarget>();
154 if (!AMDGPU::hasVOPD(ST) || !ST.isWave32()) {
155 LLVM_DEBUG(dbgs() << "Target does not support VOPDPairingMutation\n");
156 return;
157 }
158
159 std::vector<SUnit>::iterator ISUI, JSUI;
160 for (ISUI = DAG->SUnits.begin(); ISUI != DAG->SUnits.end(); ++ISUI) {
161 const MachineInstr *IMI = ISUI->getInstr();
162 if (!shouldScheduleAdjacent(TII, ST, nullptr, *IMI))
163 continue;
164 if (!hasLessThanNumFused(*ISUI, 2))
165 continue;
166
167 for (JSUI = ISUI + 1; JSUI != DAG->SUnits.end(); ++JSUI) {
168 if (JSUI->isBoundaryNode())
169 continue;
170 const MachineInstr *JMI = JSUI->getInstr();
171 if (!hasLessThanNumFused(*JSUI, 2) ||
172 !shouldScheduleAdjacent(TII, ST, IMI, *JMI))
173 continue;
174 if (fuseInstructionPair(*DAG, *ISUI, *JSUI))
175 break;
176 }
177 }
178 LLVM_DEBUG(dbgs() << "Completed VOPDPairingMutation\n");
179 }
180};
181} // namespace
182
183std::unique_ptr<ScheduleDAGMutation> llvm::createVOPDPairingMutation() {
184 return std::make_unique<VOPDPairingMutation>(shouldScheduleVOPDAdjacent);
185}
unsigned const MachineRegisterInfo * MRI
static bool shouldScheduleAdjacent(const TargetInstrInfo &TII, const TargetSubtargetInfo &TSI, const MachineInstr *FirstMI, const MachineInstr &SecondMI)
Check if the instr pair, FirstMI and SecondMI, should be fused together.
Provides AMDGPU specific target descriptions.
Base class for AMDGPU specific classes of TargetSubtarget.
#define LLVM_DEBUG(X)
Definition: Debug.h:101
AMD GCN specific subclass of TargetSubtarget.
static bool shouldScheduleVOPDAdjacent(const TargetInstrInfo &TII, const TargetSubtargetInfo &TSI, const MachineInstr *FirstMI, const MachineInstr &SecondMI)
Check if the instr pair, FirstMI and SecondMI, should be scheduled together.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
unsigned const TargetRegisterInfo * TRI
Interface definition for SIInstrInfo.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
This class represents an Operation in the Expression.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Representation of each machine instruction.
Definition: MachineInstr.h:68
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:543
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:326
iterator_range< mop_iterator > uses()
Returns a range that includes all operands that are register uses.
Definition: MachineInstr.h:707
bool modifiesRegister(Register Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr modifies (fully define or partially define) the specified register.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:540
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
A ScheduleDAG for scheduling lists of MachineInstr.
Mutate the DAG as a postpass after normal DAG building.
const TargetInstrInfo * TII
Target instruction information.
Definition: ScheduleDAG.h:557
std::vector< SUnit > SUnits
The scheduling units.
Definition: ScheduleDAG.h:561
MachineFunction & MF
Machine function.
Definition: ScheduleDAG.h:559
size_t size() const
Definition: SmallVector.h:91
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
TargetInstrInfo - Interface to description of machine instruction set.
TargetSubtargetInfo - Generic base class for all target subtargets.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
CanBeVOPD getCanBeVOPD(unsigned Opc)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
bool hasVOPD(const MCSubtargetInfo &STI)
void apply(Opt *O, const Mod &M, const Mods &... Ms)
Definition: CommandLine.h:1303
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
std::function< bool(const TargetInstrInfo &TII, const TargetSubtargetInfo &TSI, const MachineInstr *FirstMI, const MachineInstr &SecondMI)> ShouldSchedulePredTy
Check if the instr pair, FirstMI and SecondMI, should be fused together.
Definition: MacroFusion.h:35
std::unique_ptr< ScheduleDAGMutation > createVOPDPairingMutation()
bool fuseInstructionPair(ScheduleDAGInstrs &DAG, SUnit &FirstSU, SUnit &SecondSU)
Create an artificial edge between FirstSU and SecondSU.
Definition: MacroFusion.cpp:53
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
DWARFExpression::Operation Op
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1883
bool hasLessThanNumFused(const SUnit &SU, unsigned FuseLimit)
Checks if the number of cluster edges between SU and its predecessors is less than FuseLimit.
Definition: MacroFusion.cpp:46
bool checkVOPDRegConstraints(const SIInstrInfo &TII, const MachineInstr &FirstMI, const MachineInstr &SecondMI)