29#define DEBUG_TYPE "amdgpu-nsa-reassign"
32 "Number of NSA instructions with non-sequential address found");
34 "Number of NSA instructions changed to sequential");
59 using NSA_Status =
enum {
86 unsigned StartReg)
const;
88 bool canAssign(
unsigned StartReg,
unsigned NumRegs)
const;
104char GCNNSAReassign::
ID = 0;
110 unsigned StartReg)
const {
111 unsigned NumRegs = Intervals.size();
113 for (
unsigned N = 0;
N < NumRegs; ++
N)
114 if (VRM->hasPhys(Intervals[
N]->reg()))
115 LRM->unassign(*Intervals[
N]);
117 for (
unsigned N = 0;
N < NumRegs; ++
N)
121 for (
unsigned N = 0;
N < NumRegs; ++
N)
127bool GCNNSAReassign::canAssign(
unsigned StartReg,
unsigned NumRegs)
const {
128 for (
unsigned N = 0;
N < NumRegs; ++
N) {
129 unsigned Reg = StartReg +
N;
130 if (!
MRI->isAllocatable(Reg))
133 for (
unsigned I = 0; CSRegs[
I]; ++
I)
134 if (
TRI->isSubRegisterEq(Reg, CSRegs[
I]) &&
135 !LRM->isPhysRegUsed(CSRegs[
I]))
144 unsigned NumRegs = Intervals.
size();
146 if (NumRegs > MaxNumVGPRs)
148 unsigned MaxReg = MaxNumVGPRs - NumRegs + AMDGPU::VGPR0;
150 for (
unsigned Reg = AMDGPU::VGPR0;
Reg <= MaxReg; ++
Reg) {
151 if (!canAssign(Reg, NumRegs))
154 if (tryAssignRegisters(Intervals, Reg))
161GCNNSAReassign::NSA_Status
165 return NSA_Status::NOT_NSA;
167 switch (
Info->MIMGEncoding) {
168 case AMDGPU::MIMGEncGfx10NSA:
169 case AMDGPU::MIMGEncGfx11NSA:
172 return NSA_Status::NOT_NSA;
178 unsigned VgprBase = 0;
180 for (
unsigned I = 0;
I <
Info->VAddrOperands; ++
I) {
183 if (
Reg.isPhysical() || !VRM->isAssignedReg(Reg))
184 return NSA_Status::FIXED;
186 Register PhysReg = VRM->getPhys(Reg);
190 return NSA_Status::FIXED;
201 if (
TRI->getRegSizeInBits(*
MRI->getRegClass(Reg)) != 32 ||
Op.getSubReg())
202 return NSA_Status::FIXED;
209 if (VRM->getPreSplitReg(Reg))
210 return NSA_Status::FIXED;
214 if (Def &&
Def->isCopy() &&
Def->getOperand(1).getReg() == PhysReg)
215 return NSA_Status::FIXED;
217 for (
auto U :
MRI->use_nodbg_operands(Reg)) {
219 return NSA_Status::FIXED;
222 return NSA_Status::FIXED;
225 if (!LIS->hasInterval(Reg))
226 return NSA_Status::FIXED;
231 else if (VgprBase +
I != PhysReg)
235 return NSA ? NSA_Status::NON_CONTIGUOUS : NSA_Status::CONTIGUOUS;
240 if (!
ST->hasNSAEncoding() || !
ST->hasNonNSAEncoding())
244 TRI =
ST->getRegisterInfo();
245 VRM = &getAnalysis<VirtRegMap>();
246 LRM = &getAnalysis<LiveRegMatrix>();
247 LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
250 MaxNumVGPRs =
ST->getMaxNumVGPRs(MF);
251 MaxNumVGPRs = std::min(
ST->getMaxNumVGPRs(MFI->
getOccupancy()), MaxNumVGPRs);
252 CSRegs =
MRI->getCalleeSavedRegs();
254 using Candidate = std::pair<const MachineInstr*, bool>;
258 switch (CheckNSA(
MI)) {
261 case NSA_Status::CONTIGUOUS:
264 case NSA_Status::NON_CONTIGUOUS:
266 ++NumNSAInstructions;
272 bool Changed =
false;
273 for (
auto &
C : Candidates) {
278 if (CheckNSA(*
MI,
true) == NSA_Status::CONTIGUOUS) {
292 for (
unsigned I = 0;
I <
Info->VAddrOperands; ++
I) {
307 MinInd = MaxInd = LIS->getInstructionIndex(*
MI);
310 MinInd =
I != 0 ? std::min(MinInd, LI->
beginIndex()) : LI->beginIndex();
311 MaxInd =
I != 0 ? std::max(MaxInd, LI->
endIndex()) : LI->endIndex();
314 if (Intervals.
empty())
318 <<
"\tOriginal allocation:\t";
324 bool Success = scavengeRegs(Intervals);
327 if (VRM->hasPhys(Intervals.back()->reg()))
331 auto I = std::lower_bound(Candidates.begin(), &
C, MinInd,
333 return LIS->getInstructionIndex(*C.first) < I;
335 for (
auto E = Candidates.end();
Success &&
I != E &&
336 LIS->getInstructionIndex(*
I->first) < MaxInd; ++
I) {
337 if (
I->second && CheckNSA(*
I->first,
true) < NSA_Status::CONTIGUOUS) {
345 for (
unsigned I = 0;
I <
Info->VAddrOperands; ++
I)
346 if (VRM->hasPhys(Intervals[
I]->reg()))
347 LRM->unassign(*Intervals[
I]);
349 for (
unsigned I = 0;
I <
Info->VAddrOperands; ++
I)
350 LRM->assign(*Intervals[
I], OrigRegs[
I]);
358 dbgs() <<
"\tNew allocation:\t\t ["
unsigned const MachineRegisterInfo * MRI
Analysis containing CSE Info
AMD GCN specific subclass of TargetSubtarget.
unsigned const TargetRegisterInfo * TRI
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Interface definition for SIRegisterInfo.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesAll()
Set by analyses that do not transform their input at all.
This class represents an Operation in the Expression.
LiveInterval - This class represents the liveness of a register, or stack slot.
SlotIndex beginIndex() const
beginIndex - Return the lowest numbered slot covered.
SlotIndex endIndex() const
endNumber - return the maximum point of the range of the whole, exclusive.
static MCRegister from(unsigned Val)
Check the provided unsigned value is a valid MCRegister.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Representation of each machine instruction.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Wrapper class representing virtual and physical registers.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
unsigned getOccupancy() const
SlotIndex - An opaque wrapper around machine indexes.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ C
The default llvm calling convention, compatible with C.
Reg
All possible values of the reg field in the ModR/M byte.
NodeAddr< DefNode * > Def
This is an optimization pass for GlobalISel generic memory operations.
void initializeGCNNSAReassignPass(PassRegistry &)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.