LLVM 23.0.0git
GCNNSAReassign.cpp
Go to the documentation of this file.
1//===-- GCNNSAReassign.cpp - Reassign registers in NSA instructions -------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// \brief Try to reassign registers on GFX10+ from non-sequential to sequential
11/// in NSA image instructions. Later SIShrinkInstructions pass will replace NSA
12/// with sequential versions where possible.
13///
14//===----------------------------------------------------------------------===//
15
16#include "GCNNSAReassign.h"
17#include "AMDGPU.h"
18#include "GCNSubtarget.h"
20#include "SIRegisterInfo.h"
21#include "llvm/ADT/Statistic.h"
27
28using namespace llvm;
29
30#define DEBUG_TYPE "amdgpu-nsa-reassign"
31
32STATISTIC(NumNSAInstructions,
33 "Number of NSA instructions with non-sequential address found");
34STATISTIC(NumNSAConverted,
35 "Number of NSA instructions changed to sequential");
36
37namespace {
38class GCNNSAReassignImpl {
39public:
40 GCNNSAReassignImpl(VirtRegMap *VM, LiveRegMatrix *LM, LiveIntervals *LS)
41 : VRM(VM), LRM(LM), LIS(LS) {}
42
43 bool run(MachineFunction &MF);
44
45private:
46 enum NSA_Status {
47 NOT_NSA, // Not an NSA instruction
48 FIXED, // NSA which we cannot modify
49 NON_CONTIGUOUS, // NSA with non-sequential address which we can try
50 // to optimize.
51 CONTIGUOUS // NSA with all sequential address registers
52 };
53
54 const GCNSubtarget *ST;
55
56 const MachineRegisterInfo *MRI;
57
58 const SIRegisterInfo *TRI;
59
60 VirtRegMap *VRM;
61
62 LiveRegMatrix *LRM;
63
64 LiveIntervals *LIS;
65
66 unsigned MaxNumVGPRs;
67
68 const MCPhysReg *CSRegs;
69
70 NSA_Status CheckNSA(const MachineInstr &MI, bool Fast = false) const;
71
72 bool tryAssignRegisters(SmallVectorImpl<LiveInterval *> &Intervals,
73 unsigned StartReg) const;
74
75 bool canAssign(unsigned StartReg, unsigned NumRegs) const;
76
77 bool scavengeRegs(SmallVectorImpl<LiveInterval *> &Intervals) const;
78};
79
80class GCNNSAReassignLegacy : public MachineFunctionPass {
81public:
82 static char ID;
83
84 GCNNSAReassignLegacy() : MachineFunctionPass(ID) {}
85
86 bool runOnMachineFunction(MachineFunction &MF) override;
87
88 StringRef getPassName() const override { return "GCN NSA Reassign"; };
89
90 void getAnalysisUsage(AnalysisUsage &AU) const override {
91 AU.addRequired<LiveIntervalsWrapperPass>();
92 AU.addRequired<VirtRegMapWrapperLegacy>();
93 AU.addRequired<LiveRegMatrixWrapperLegacy>();
94 AU.setPreservesAll();
96 }
97};
98
99} // End anonymous namespace.
100
101INITIALIZE_PASS_BEGIN(GCNNSAReassignLegacy, DEBUG_TYPE, "GCN NSA Reassign",
102 false, false)
106INITIALIZE_PASS_END(GCNNSAReassignLegacy, DEBUG_TYPE, "GCN NSA Reassign", false,
107 false)
108
109char GCNNSAReassignLegacy::ID = 0;
110
111char &llvm::GCNNSAReassignID = GCNNSAReassignLegacy::ID;
112
113bool GCNNSAReassignImpl::tryAssignRegisters(
114 SmallVectorImpl<LiveInterval *> &Intervals, unsigned StartReg) const {
115 unsigned NumRegs = Intervals.size();
116
117 for (unsigned N = 0; N < NumRegs; ++N)
118 if (VRM->hasPhys(Intervals[N]->reg()))
119 LRM->unassign(*Intervals[N]);
120
121 for (unsigned N = 0; N < NumRegs; ++N)
122 if (LRM->checkInterference(*Intervals[N], MCRegister::from(StartReg + N)))
123 return false;
124
125 for (unsigned N = 0; N < NumRegs; ++N)
126 LRM->assign(*Intervals[N], MCRegister::from(StartReg + N));
127
128 return true;
129}
130
131bool GCNNSAReassignImpl::canAssign(unsigned StartReg, unsigned NumRegs) const {
132 for (unsigned N = 0; N < NumRegs; ++N) {
133 unsigned Reg = StartReg + N;
134 if (!MRI->isAllocatable(Reg))
135 return false;
136
137 for (unsigned I = 0; CSRegs[I]; ++I)
138 if (TRI->isSubRegisterEq(Reg, CSRegs[I]) &&
139 !LRM->isPhysRegUsed(CSRegs[I]))
140 return false;
141 }
142
143 return true;
144}
145
146bool GCNNSAReassignImpl::scavengeRegs(
147 SmallVectorImpl<LiveInterval *> &Intervals) const {
148 unsigned NumRegs = Intervals.size();
149
150 if (NumRegs > MaxNumVGPRs)
151 return false;
152 unsigned MaxReg = MaxNumVGPRs - NumRegs + AMDGPU::VGPR0;
153
154 for (unsigned Reg = AMDGPU::VGPR0; Reg <= MaxReg; ++Reg) {
155 if (!canAssign(Reg, NumRegs))
156 continue;
157
158 if (tryAssignRegisters(Intervals, Reg))
159 return true;
160 }
161
162 return false;
163}
164
165GCNNSAReassignImpl::NSA_Status
166GCNNSAReassignImpl::CheckNSA(const MachineInstr &MI, bool Fast) const {
167 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
168 if (!Info)
169 return NSA_Status::NOT_NSA;
170
171 switch (Info->MIMGEncoding) {
172 case AMDGPU::MIMGEncGfx10NSA:
173 case AMDGPU::MIMGEncGfx11NSA:
174 break;
175 default:
176 return NSA_Status::NOT_NSA;
177 }
178
179 int VAddr0Idx =
180 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
181
182 unsigned VgprBase = 0;
183 bool NSA = false;
184 for (unsigned I = 0; I < Info->VAddrOperands; ++I) {
185 const MachineOperand &Op = MI.getOperand(VAddr0Idx + I);
186 Register Reg = Op.getReg();
187 if (Reg.isPhysical() || !VRM->isAssignedReg(Reg))
188 return NSA_Status::FIXED;
189
190 Register PhysReg = VRM->getPhys(Reg);
191
192 if (!Fast) {
193 if (!PhysReg)
194 return NSA_Status::FIXED;
195
196 // TODO: address the below limitation to handle GFX11 BVH instructions
197 // Bail if address is not a VGPR32. That should be possible to extend the
198 // optimization to work with subregs of a wider register tuples, but the
199 // logic to find free registers will be much more complicated with much
200 // less chances for success. That seems reasonable to assume that in most
201 // cases a tuple is used because a vector variable contains different
202 // parts of an address and it is either already consecutive or cannot
203 // be reassigned if not. If needed it is better to rely on register
204 // coalescer to process such address tuples.
205 if (TRI->getRegSizeInBits(*MRI->getRegClass(Reg)) != 32 || Op.getSubReg())
206 return NSA_Status::FIXED;
207
208 // InlineSpiller does not call LRM::assign() after an LI split leaving
209 // it in an inconsistent state, so we cannot call LRM::unassign().
210 // See llvm bug #48911.
211 // Skip reassign if a register has originated from such split.
212 // FIXME: Remove the workaround when bug #48911 is fixed.
213 if (VRM->getPreSplitReg(Reg))
214 return NSA_Status::FIXED;
215
216 const MachineInstr *Def = MRI->getUniqueVRegDef(Reg);
217
218 if (Def && Def->isCopy() && Def->getOperand(1).getReg() == PhysReg)
219 return NSA_Status::FIXED;
220
221 for (auto U : MRI->use_nodbg_operands(Reg)) {
222 if (U.isImplicit())
223 return NSA_Status::FIXED;
224 const MachineInstr *UseInst = U.getParent();
225 if (UseInst->isCopy() && UseInst->getOperand(0).getReg() == PhysReg)
226 return NSA_Status::FIXED;
227 }
228
229 if (!LIS->hasInterval(Reg))
230 return NSA_Status::FIXED;
231 }
232
233 if (I == 0)
234 VgprBase = PhysReg;
235 else if (VgprBase + I != PhysReg)
236 NSA = true;
237 }
238
239 return NSA ? NSA_Status::NON_CONTIGUOUS : NSA_Status::CONTIGUOUS;
240}
241
242bool GCNNSAReassignImpl::run(MachineFunction &MF) {
243 ST = &MF.getSubtarget<GCNSubtarget>();
244 if (!ST->hasNSAEncoding() || !ST->hasNonNSAEncoding())
245 return false;
246
247 MRI = &MF.getRegInfo();
248 TRI = ST->getRegisterInfo();
249
250 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
251 MaxNumVGPRs = ST->getMaxNumVGPRs(MF);
252 MaxNumVGPRs = std::min(
254 MaxNumVGPRs);
255 CSRegs = MRI->getCalleeSavedRegs();
256
257 using Candidate = std::pair<const MachineInstr*, bool>;
259 for (const MachineBasicBlock &MBB : MF) {
260 for (const MachineInstr &MI : MBB) {
261 switch (CheckNSA(MI)) {
262 default:
263 continue;
264 case NSA_Status::CONTIGUOUS:
265 Candidates.push_back(std::pair(&MI, true));
266 break;
267 case NSA_Status::NON_CONTIGUOUS:
268 Candidates.push_back(std::pair(&MI, false));
269 ++NumNSAInstructions;
270 break;
271 }
272 }
273 }
274
275 bool Changed = false;
276 for (auto &C : Candidates) {
277 if (C.second)
278 continue;
279
280 const MachineInstr *MI = C.first;
281 if (CheckNSA(*MI, true) == NSA_Status::CONTIGUOUS) {
282 // Already happen to be fixed.
283 C.second = true;
284 ++NumNSAConverted;
285 continue;
286 }
287
288 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI->getOpcode());
289 int VAddr0Idx =
290 AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::vaddr0);
291
294 SlotIndex MinInd, MaxInd;
295 for (unsigned I = 0; I < Info->VAddrOperands; ++I) {
296 const MachineOperand &Op = MI->getOperand(VAddr0Idx + I);
297 Register Reg = Op.getReg();
298 LiveInterval *LI = &LIS->getInterval(Reg);
299 if (llvm::is_contained(Intervals, LI)) {
300 // Same register used, unable to make sequential
301 Intervals.clear();
302 break;
303 }
304 Intervals.push_back(LI);
305 OrigRegs.push_back(VRM->getPhys(Reg));
306 if (LI->empty()) {
307 // The address input is undef, so it doesn't contribute to the relevant
308 // range. Seed a reasonable index range if required.
309 if (I == 0)
310 MinInd = MaxInd = LIS->getInstructionIndex(*MI);
311 continue;
312 }
313 MinInd = I != 0 ? std::min(MinInd, LI->beginIndex()) : LI->beginIndex();
314 MaxInd = I != 0 ? std::max(MaxInd, LI->endIndex()) : LI->endIndex();
315 }
316
317 if (Intervals.empty())
318 continue;
319
320 LLVM_DEBUG(dbgs() << "Attempting to reassign NSA: " << *MI
321 << "\tOriginal allocation:\t";
322 for (auto *LI
323 : Intervals) dbgs()
324 << " " << llvm::printReg((VRM->getPhys(LI->reg())), TRI);
325 dbgs() << '\n');
326
327 bool Success = scavengeRegs(Intervals);
328 if (!Success) {
329 LLVM_DEBUG(dbgs() << "\tCannot reallocate.\n");
330 if (VRM->hasPhys(Intervals.back()->reg())) // Did not change allocation.
331 continue;
332 } else {
333 // Check we did not make it worse for other instructions.
334 auto *I =
335 std::lower_bound(Candidates.begin(), &C, MinInd,
336 [this](const Candidate &C, SlotIndex I) {
337 return LIS->getInstructionIndex(*C.first) < I;
338 });
339 for (auto *E = Candidates.end();
340 Success && I != E && LIS->getInstructionIndex(*I->first) < MaxInd;
341 ++I) {
342 if (I->second && CheckNSA(*I->first, true) < NSA_Status::CONTIGUOUS) {
343 Success = false;
344 LLVM_DEBUG(dbgs() << "\tNSA conversion conflict with " << *I->first);
345 }
346 }
347 }
348
349 if (!Success) {
350 for (unsigned I = 0; I < Info->VAddrOperands; ++I)
351 if (VRM->hasPhys(Intervals[I]->reg()))
352 LRM->unassign(*Intervals[I]);
353
354 for (unsigned I = 0; I < Info->VAddrOperands; ++I)
355 LRM->assign(*Intervals[I], OrigRegs[I]);
356
357 continue;
358 }
359
360 C.second = true;
361 ++NumNSAConverted;
363 dbgs() << "\tNew allocation:\t\t ["
364 << llvm::printReg((VRM->getPhys(Intervals.front()->reg())), TRI)
365 << " : "
366 << llvm::printReg((VRM->getPhys(Intervals.back()->reg())), TRI)
367 << "]\n");
368 Changed = true;
369 }
370
371 return Changed;
372}
373
374bool GCNNSAReassignLegacy::runOnMachineFunction(MachineFunction &MF) {
375 auto *VRM = &getAnalysis<VirtRegMapWrapperLegacy>().getVRM();
376 auto *LRM = &getAnalysis<LiveRegMatrixWrapperLegacy>().getLRM();
377 auto *LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
378
379 GCNNSAReassignImpl Impl(VRM, LRM, LIS);
380 return Impl.run(MF);
381}
382
383PreservedAnalyses
386 auto &VRM = MFAM.getResult<VirtRegMapAnalysis>(MF);
387 auto &LRM = MFAM.getResult<LiveRegMatrixAnalysis>(MF);
388 auto &LIS = MFAM.getResult<LiveIntervalsAnalysis>(MF);
389
390 GCNNSAReassignImpl Impl(&VRM, &LRM, &LIS);
391 Impl.run(MF);
392 return PreservedAnalyses::all();
393}
unsigned const MachineRegisterInfo * MRI
#define Success
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
MachineBasicBlock & MBB
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
AMD GCN specific subclass of TargetSubtarget.
#define DEBUG_TYPE
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
Interface definition for SIRegisterInfo.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addRequired()
void setPreservesAll()
Set by analyses that do not transform their input at all.
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
bool hasNonNSAEncoding() const
const SIRegisterInfo * getRegisterInfo() const override
unsigned getMaxNumVGPRs(unsigned WavesPerEU, unsigned DynamicVGPRBlockSize) const
LiveInterval - This class represents the liveness of a register, or stack slot.
Register reg() const
bool hasInterval(Register Reg) const
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
LiveInterval & getInterval(Register Reg)
bool empty() const
SlotIndex beginIndex() const
beginIndex - Return the lowest numbered slot covered.
SlotIndex endIndex() const
endNumber - return the maximum point of the range of the whole, exclusive.
bool isPhysRegUsed(MCRegister PhysReg) const
Returns true if the given PhysReg has any live intervals assigned.
void unassign(const LiveInterval &VirtReg, bool ClearAllReferencingSegments=false)
Unassign VirtReg from its PhysReg.
void assign(const LiveInterval &VirtReg, MCRegister PhysReg)
Assign VirtReg to PhysReg.
static MCRegister from(unsigned Val)
Check the provided unsigned value is a valid MCRegister.
Definition MCRegister.h:77
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
bool isCopy() const
const MachineOperand & getOperand(unsigned i) const
Register getReg() const
getReg - Returns the register number.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:83
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
Register getPreSplitReg(Register virtReg) const
returns the live interval virtReg is split from.
Definition VirtRegMap.h:147
MCRegister getPhys(Register virtReg) const
returns the physical register mapped to the specified virtual register
Definition VirtRegMap.h:91
bool hasPhys(Register virtReg) const
returns true if the specified virtual register is mapped to a physical register
Definition VirtRegMap.h:87
bool isAssignedReg(Register virtReg) const
returns true if the specified virtual register is not mapped to a stack slot or rematerialized.
Definition VirtRegMap.h:162
Changed
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
NodeAddr< DefNode * > Def
Definition RDFGraph.h:384
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
char & GCNNSAReassignID
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
DWARFExpression::Operation Op
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1947
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
#define N