LLVM 19.0.0git
SIPreAllocateWWMRegs.cpp
Go to the documentation of this file.
1//===- SIPreAllocateWWMRegs.cpp - WWM Register Pre-allocation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// Pass to pre-allocated WWM registers
11//
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPU.h"
15#include "GCNSubtarget.h"
26
27using namespace llvm;
28
29#define DEBUG_TYPE "si-pre-allocate-wwm-regs"
30
31static cl::opt<bool>
32 EnablePreallocateSGPRSpillVGPRs("amdgpu-prealloc-sgpr-spill-vgprs",
33 cl::init(false), cl::Hidden);
34
35namespace {
36
37class SIPreAllocateWWMRegs : public MachineFunctionPass {
38private:
39 const SIInstrInfo *TII;
40 const SIRegisterInfo *TRI;
42 LiveIntervals *LIS;
44 VirtRegMap *VRM;
45 RegisterClassInfo RegClassInfo;
46
47 std::vector<unsigned> RegsToRewrite;
48#ifndef NDEBUG
49 void printWWMInfo(const MachineInstr &MI);
50#endif
51
52public:
53 static char ID;
54
55 SIPreAllocateWWMRegs() : MachineFunctionPass(ID) {
57 }
58
59 bool runOnMachineFunction(MachineFunction &MF) override;
60
61 void getAnalysisUsage(AnalysisUsage &AU) const override {
65 AU.setPreservesAll();
67 }
68
69private:
70 bool processDef(MachineOperand &MO);
71 void rewriteRegs(MachineFunction &MF);
72};
73
74} // End anonymous namespace.
75
76INITIALIZE_PASS_BEGIN(SIPreAllocateWWMRegs, DEBUG_TYPE,
77 "SI Pre-allocate WWM Registers", false, false)
81INITIALIZE_PASS_END(SIPreAllocateWWMRegs, DEBUG_TYPE,
82 "SI Pre-allocate WWM Registers", false, false)
83
84char SIPreAllocateWWMRegs::ID = 0;
85
86char &llvm::SIPreAllocateWWMRegsID = SIPreAllocateWWMRegs::ID;
87
89 return new SIPreAllocateWWMRegs();
90}
91
92bool SIPreAllocateWWMRegs::processDef(MachineOperand &MO) {
93 Register Reg = MO.getReg();
94 if (Reg.isPhysical())
95 return false;
96
97 if (!TRI->isVGPR(*MRI, Reg))
98 return false;
99
100 if (VRM->hasPhys(Reg))
101 return false;
102
103 LiveInterval &LI = LIS->getInterval(Reg);
104
105 for (MCRegister PhysReg : RegClassInfo.getOrder(MRI->getRegClass(Reg))) {
106 if (!MRI->isPhysRegUsed(PhysReg, /*SkipRegMaskTest=*/true) &&
107 Matrix->checkInterference(LI, PhysReg) == LiveRegMatrix::IK_Free) {
108 Matrix->assign(LI, PhysReg);
109 assert(PhysReg != 0);
110 RegsToRewrite.push_back(Reg);
111 return true;
112 }
113 }
114
115 llvm_unreachable("physreg not found for WWM expression");
116}
117
118void SIPreAllocateWWMRegs::rewriteRegs(MachineFunction &MF) {
119 for (MachineBasicBlock &MBB : MF) {
120 for (MachineInstr &MI : MBB) {
121 for (MachineOperand &MO : MI.operands()) {
122 if (!MO.isReg())
123 continue;
124
125 const Register VirtReg = MO.getReg();
126 if (VirtReg.isPhysical())
127 continue;
128
129 if (!VRM->hasPhys(VirtReg))
130 continue;
131
132 Register PhysReg = VRM->getPhys(VirtReg);
133 const unsigned SubReg = MO.getSubReg();
134 if (SubReg != 0) {
135 PhysReg = TRI->getSubReg(PhysReg, SubReg);
136 MO.setSubReg(0);
137 }
138
139 MO.setReg(PhysReg);
140 MO.setIsRenamable(false);
141 }
142 }
143 }
144
145 SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
146
147 for (unsigned Reg : RegsToRewrite) {
148 LIS->removeInterval(Reg);
149
150 const Register PhysReg = VRM->getPhys(Reg);
151 assert(PhysReg != 0);
152
153 MFI->reserveWWMRegister(PhysReg);
154 }
155
156 RegsToRewrite.clear();
157
158 // Update the set of reserved registers to include WWM ones.
159 MRI->freezeReservedRegs();
160}
161
162#ifndef NDEBUG
164SIPreAllocateWWMRegs::printWWMInfo(const MachineInstr &MI) {
165
166 unsigned Opc = MI.getOpcode();
167
168 if (Opc == AMDGPU::ENTER_STRICT_WWM || Opc == AMDGPU::ENTER_STRICT_WQM ||
169 Opc == AMDGPU::ENTER_PSEUDO_WM) {
170 dbgs() << "Entering ";
171 } else {
172 assert(Opc == AMDGPU::EXIT_STRICT_WWM || Opc == AMDGPU::EXIT_STRICT_WQM ||
173 Opc == AMDGPU::EXIT_PSEUDO_WM);
174 dbgs() << "Exiting ";
175 }
176
177 if (Opc == AMDGPU::ENTER_STRICT_WWM || Opc == AMDGPU::EXIT_STRICT_WWM) {
178 dbgs() << "Strict WWM ";
179 } else if (Opc == AMDGPU::ENTER_PSEUDO_WM || Opc == AMDGPU::EXIT_PSEUDO_WM) {
180 dbgs() << "Pseudo WWM/WQM ";
181 } else {
182 assert(Opc == AMDGPU::ENTER_STRICT_WQM || Opc == AMDGPU::EXIT_STRICT_WQM);
183 dbgs() << "Strict WQM ";
184 }
185
186 dbgs() << "region: " << MI;
187}
188
189#endif
190
191bool SIPreAllocateWWMRegs::runOnMachineFunction(MachineFunction &MF) {
192 LLVM_DEBUG(dbgs() << "SIPreAllocateWWMRegs: function " << MF.getName() << "\n");
193
195
196 TII = ST.getInstrInfo();
197 TRI = &TII->getRegisterInfo();
198 MRI = &MF.getRegInfo();
199
200 LIS = &getAnalysis<LiveIntervals>();
201 Matrix = &getAnalysis<LiveRegMatrix>();
202 VRM = &getAnalysis<VirtRegMap>();
203
204 RegClassInfo.runOnMachineFunction(MF);
205
206 bool PreallocateSGPRSpillVGPRs =
208 MF.getFunction().hasFnAttribute("amdgpu-prealloc-sgpr-spill-vgprs");
209
210 bool RegsAssigned = false;
211
212 // We use a reverse post-order traversal of the control-flow graph to
213 // guarantee that we visit definitions in dominance order. Since WWM
214 // expressions are guaranteed to never involve phi nodes, and we can only
215 // escape WWM through the special WWM instruction, this means that this is a
216 // perfect elimination order, so we can never do any better.
218
219 for (MachineBasicBlock *MBB : RPOT) {
220 bool InWWM = false;
221 for (MachineInstr &MI : *MBB) {
222 if (MI.getOpcode() == AMDGPU::V_SET_INACTIVE_B32 ||
223 MI.getOpcode() == AMDGPU::V_SET_INACTIVE_B64)
224 RegsAssigned |= processDef(MI.getOperand(0));
225
226 if (MI.getOpcode() == AMDGPU::SI_SPILL_S32_TO_VGPR) {
227 if (!PreallocateSGPRSpillVGPRs)
228 continue;
229 RegsAssigned |= processDef(MI.getOperand(0));
230 }
231
232 if (MI.getOpcode() == AMDGPU::ENTER_STRICT_WWM ||
233 MI.getOpcode() == AMDGPU::ENTER_STRICT_WQM ||
234 MI.getOpcode() == AMDGPU::ENTER_PSEUDO_WM) {
235 LLVM_DEBUG(printWWMInfo(MI));
236 InWWM = true;
237 continue;
238 }
239
240 if (MI.getOpcode() == AMDGPU::EXIT_STRICT_WWM ||
241 MI.getOpcode() == AMDGPU::EXIT_STRICT_WQM ||
242 MI.getOpcode() == AMDGPU::EXIT_PSEUDO_WM) {
243 LLVM_DEBUG(printWWMInfo(MI));
244 InWWM = false;
245 }
246
247 if (!InWWM)
248 continue;
249
250 LLVM_DEBUG(dbgs() << "Processing " << MI);
251
252 for (MachineOperand &DefOpnd : MI.defs()) {
253 RegsAssigned |= processDef(DefOpnd);
254 }
255 }
256 }
257
258 if (!RegsAssigned)
259 return false;
260
261 rewriteRegs(MF);
262 return true;
263}
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
MachineBasicBlock & MBB
Provides AMDGPU specific target descriptions.
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:529
#define LLVM_DEBUG(X)
Definition: Debug.h:101
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
Live Register Matrix
unsigned const TargetRegisterInfo * TRI
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SI Pre allocate WWM Registers
#define DEBUG_TYPE
static cl::opt< bool > EnablePreallocateSGPRSpillVGPRs("amdgpu-prealloc-sgpr-spill-vgprs", cl::init(false), cl::Hidden)
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesAll()
Set by analyses that do not transform their input at all.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:675
LiveInterval - This class represents the liveness of a register, or stack slot.
Definition: LiveInterval.h:687
LiveInterval & getInterval(Register Reg)
void removeInterval(Register Reg)
Interval removal.
@ IK_Free
No interference, go ahead and assign.
Definition: LiveRegMatrix.h:85
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Representation of each machine instruction.
Definition: MachineInstr.h:69
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
void setIsRenamable(bool Val=true)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setReg(Register Reg)
Change the register this operand corresponds to.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
void runOnMachineFunction(const MachineFunction &MF)
runOnFunction - Prepare to answer questions about MF.
ArrayRef< MCPhysReg > getOrder(const TargetRegisterClass *RC) const
getOrder - Returns the preferred allocation order for RC.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:95
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
MCRegister getPhys(Register virtReg) const
returns the physical register mapped to the specified virtual register
Definition: VirtRegMap.h:105
bool hasPhys(Register virtReg) const
returns true if the specified virtual register is mapped to a physical register
Definition: VirtRegMap.h:99
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
char & SIPreAllocateWWMRegsID
FunctionPass * createSIPreAllocateWWMRegsPass()
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void initializeSIPreAllocateWWMRegsPass(PassRegistry &)