LLVM 20.0.0git
SIPreAllocateWWMRegs.cpp
Go to the documentation of this file.
1//===- SIPreAllocateWWMRegs.cpp - WWM Register Pre-allocation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// Pass to pre-allocated WWM registers
11//
12//===----------------------------------------------------------------------===//
13
15#include "AMDGPU.h"
16#include "GCNSubtarget.h"
26
27using namespace llvm;
28
29#define DEBUG_TYPE "si-pre-allocate-wwm-regs"
30
31static cl::opt<bool>
32 EnablePreallocateSGPRSpillVGPRs("amdgpu-prealloc-sgpr-spill-vgprs",
33 cl::init(false), cl::Hidden);
34
35namespace {
36
37class SIPreAllocateWWMRegs {
38private:
39 const SIInstrInfo *TII;
40 const SIRegisterInfo *TRI;
42 LiveIntervals *LIS;
44 VirtRegMap *VRM;
45 RegisterClassInfo RegClassInfo;
46
47 std::vector<unsigned> RegsToRewrite;
48#ifndef NDEBUG
49 void printWWMInfo(const MachineInstr &MI);
50#endif
51 bool processDef(MachineOperand &MO);
52 void rewriteRegs(MachineFunction &MF);
53
54public:
55 SIPreAllocateWWMRegs(LiveIntervals *LIS, LiveRegMatrix *Matrix,
56 VirtRegMap *VRM)
57 : LIS(LIS), Matrix(Matrix), VRM(VRM) {}
58 bool run(MachineFunction &MF);
59};
60
61class SIPreAllocateWWMRegsLegacy : public MachineFunctionPass {
62public:
63 static char ID;
64
65 SIPreAllocateWWMRegsLegacy() : MachineFunctionPass(ID) {}
66
67 bool runOnMachineFunction(MachineFunction &MF) override;
68
69 void getAnalysisUsage(AnalysisUsage &AU) const override {
73 AU.setPreservesAll();
75 }
76};
77
78} // End anonymous namespace.
79
80INITIALIZE_PASS_BEGIN(SIPreAllocateWWMRegsLegacy, DEBUG_TYPE,
81 "SI Pre-allocate WWM Registers", false, false)
85INITIALIZE_PASS_END(SIPreAllocateWWMRegsLegacy, DEBUG_TYPE,
86 "SI Pre-allocate WWM Registers", false, false)
87
88char SIPreAllocateWWMRegsLegacy::ID = 0;
89
90char &llvm::SIPreAllocateWWMRegsLegacyID = SIPreAllocateWWMRegsLegacy::ID;
91
93 return new SIPreAllocateWWMRegsLegacy();
94}
95
96bool SIPreAllocateWWMRegs::processDef(MachineOperand &MO) {
97 Register Reg = MO.getReg();
98 if (Reg.isPhysical())
99 return false;
100
101 if (!TRI->isVGPR(*MRI, Reg))
102 return false;
103
104 if (VRM->hasPhys(Reg))
105 return false;
106
107 LiveInterval &LI = LIS->getInterval(Reg);
108
109 for (MCRegister PhysReg : RegClassInfo.getOrder(MRI->getRegClass(Reg))) {
110 if (!MRI->isPhysRegUsed(PhysReg, /*SkipRegMaskTest=*/true) &&
111 Matrix->checkInterference(LI, PhysReg) == LiveRegMatrix::IK_Free) {
112 Matrix->assign(LI, PhysReg);
113 assert(PhysReg != 0);
114 RegsToRewrite.push_back(Reg);
115 return true;
116 }
117 }
118
119 llvm_unreachable("physreg not found for WWM expression");
120}
121
122void SIPreAllocateWWMRegs::rewriteRegs(MachineFunction &MF) {
123 for (MachineBasicBlock &MBB : MF) {
124 for (MachineInstr &MI : MBB) {
125 for (MachineOperand &MO : MI.operands()) {
126 if (!MO.isReg())
127 continue;
128
129 const Register VirtReg = MO.getReg();
130 if (VirtReg.isPhysical())
131 continue;
132
133 if (!VRM->hasPhys(VirtReg))
134 continue;
135
136 Register PhysReg = VRM->getPhys(VirtReg);
137 const unsigned SubReg = MO.getSubReg();
138 if (SubReg != 0) {
139 PhysReg = TRI->getSubReg(PhysReg, SubReg);
140 MO.setSubReg(0);
141 }
142
143 MO.setReg(PhysReg);
144 MO.setIsRenamable(false);
145 }
146 }
147 }
148
149 SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
150
151 for (unsigned Reg : RegsToRewrite) {
152 LIS->removeInterval(Reg);
153
154 const Register PhysReg = VRM->getPhys(Reg);
155 assert(PhysReg != 0);
156
157 MFI->reserveWWMRegister(PhysReg);
158 }
159
160 RegsToRewrite.clear();
161
162 // Update the set of reserved registers to include WWM ones.
163 MRI->freezeReservedRegs();
164}
165
166#ifndef NDEBUG
168SIPreAllocateWWMRegs::printWWMInfo(const MachineInstr &MI) {
169
170 unsigned Opc = MI.getOpcode();
171
172 if (Opc == AMDGPU::ENTER_STRICT_WWM || Opc == AMDGPU::ENTER_STRICT_WQM) {
173 dbgs() << "Entering ";
174 } else {
175 assert(Opc == AMDGPU::EXIT_STRICT_WWM || Opc == AMDGPU::EXIT_STRICT_WQM);
176 dbgs() << "Exiting ";
177 }
178
179 if (Opc == AMDGPU::ENTER_STRICT_WWM || Opc == AMDGPU::EXIT_STRICT_WWM) {
180 dbgs() << "Strict WWM ";
181 } else {
182 assert(Opc == AMDGPU::ENTER_STRICT_WQM || Opc == AMDGPU::EXIT_STRICT_WQM);
183 dbgs() << "Strict WQM ";
184 }
185
186 dbgs() << "region: " << MI;
187}
188
189#endif
190
191bool SIPreAllocateWWMRegsLegacy::runOnMachineFunction(MachineFunction &MF) {
192 auto *LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
193 auto *Matrix = &getAnalysis<LiveRegMatrixWrapperLegacy>().getLRM();
194 auto *VRM = &getAnalysis<VirtRegMapWrapperLegacy>().getVRM();
195 return SIPreAllocateWWMRegs(LIS, Matrix, VRM).run(MF);
196}
197
198bool SIPreAllocateWWMRegs::run(MachineFunction &MF) {
199 LLVM_DEBUG(dbgs() << "SIPreAllocateWWMRegs: function " << MF.getName() << "\n");
200
202
203 TII = ST.getInstrInfo();
204 TRI = &TII->getRegisterInfo();
205 MRI = &MF.getRegInfo();
206
207 RegClassInfo.runOnMachineFunction(MF);
208
209 bool PreallocateSGPRSpillVGPRs =
211 MF.getFunction().hasFnAttribute("amdgpu-prealloc-sgpr-spill-vgprs");
212
213 bool RegsAssigned = false;
214
215 // We use a reverse post-order traversal of the control-flow graph to
216 // guarantee that we visit definitions in dominance order. Since WWM
217 // expressions are guaranteed to never involve phi nodes, and we can only
218 // escape WWM through the special WWM instruction, this means that this is a
219 // perfect elimination order, so we can never do any better.
221
222 for (MachineBasicBlock *MBB : RPOT) {
223 bool InWWM = false;
224 for (MachineInstr &MI : *MBB) {
225 if (MI.getOpcode() == AMDGPU::SI_SPILL_S32_TO_VGPR) {
226 if (PreallocateSGPRSpillVGPRs)
227 RegsAssigned |= processDef(MI.getOperand(0));
228 continue;
229 }
230
231 if (MI.getOpcode() == AMDGPU::ENTER_STRICT_WWM ||
232 MI.getOpcode() == AMDGPU::ENTER_STRICT_WQM) {
233 LLVM_DEBUG(printWWMInfo(MI));
234 InWWM = true;
235 continue;
236 }
237
238 if (MI.getOpcode() == AMDGPU::EXIT_STRICT_WWM ||
239 MI.getOpcode() == AMDGPU::EXIT_STRICT_WQM) {
240 LLVM_DEBUG(printWWMInfo(MI));
241 InWWM = false;
242 }
243
244 if (!InWWM)
245 continue;
246
247 LLVM_DEBUG(dbgs() << "Processing " << MI);
248
249 for (MachineOperand &DefOpnd : MI.defs()) {
250 RegsAssigned |= processDef(DefOpnd);
251 }
252 }
253 }
254
255 if (!RegsAssigned)
256 return false;
257
258 rewriteRegs(MF);
259 return true;
260}
261
265 auto *LIS = &MFAM.getResult<LiveIntervalsAnalysis>(MF);
266 auto *Matrix = &MFAM.getResult<LiveRegMatrixAnalysis>(MF);
267 auto *VRM = &MFAM.getResult<VirtRegMapAnalysis>(MF);
268 SIPreAllocateWWMRegs(LIS, Matrix, VRM).run(MF);
269 return PreservedAnalyses::all();
270}
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
Provides AMDGPU specific target descriptions.
MachineBasicBlock & MBB
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:622
#define LLVM_DEBUG(...)
Definition: Debug.h:106
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
Live Register Matrix
unsigned const TargetRegisterInfo * TRI
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SI Pre allocate WWM Registers
#define DEBUG_TYPE
static cl::opt< bool > EnablePreallocateSGPRSpillVGPRs("amdgpu-prealloc-sgpr-spill-vgprs", cl::init(false), cl::Hidden)
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:410
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesAll()
Set by analyses that do not transform their input at all.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:731
LiveInterval - This class represents the liveness of a register, or stack slot.
Definition: LiveInterval.h:687
@ IK_Free
No interference, go ahead and assign.
Definition: LiveRegMatrix.h:89
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Representation of each machine instruction.
Definition: MachineInstr.h:69
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
void setIsRenamable(bool Val=true)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setReg(Register Reg)
Change the register this operand corresponds to.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:95
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
VirtRegMap run(MachineFunction &MF, MachineFunctionAnalysisManager &MAM)
Definition: VirtRegMap.cpp:184
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
FunctionPass * createSIPreAllocateWWMRegsLegacyPass()
char & SIPreAllocateWWMRegsLegacyID