30#define DEBUG_TYPE "amdgpu-nsa-reassign"
33 "Number of NSA instructions with non-sequential address found");
35 "Number of NSA instructions changed to sequential");
38class GCNNSAReassignImpl {
41 : VRM(VM), LRM(LM), LIS(LS) {}
43 bool run(MachineFunction &MF);
54 const GCNSubtarget *ST;
56 const MachineRegisterInfo *MRI;
58 const SIRegisterInfo *TRI;
70 NSA_Status CheckNSA(
const MachineInstr &
MI,
bool Fast =
false)
const;
72 bool tryAssignRegisters(SmallVectorImpl<LiveInterval *> &Intervals,
73 unsigned StartReg)
const;
75 bool canAssign(
unsigned StartReg,
unsigned NumRegs)
const;
77 bool scavengeRegs(SmallVectorImpl<LiveInterval *> &Intervals)
const;
84 GCNNSAReassignLegacy() : MachineFunctionPass(ID) {}
86 bool runOnMachineFunction(MachineFunction &MF)
override;
88 StringRef getPassName()
const override {
return "GCN NSA Reassign"; };
90 void getAnalysisUsage(AnalysisUsage &AU)
const override {
109char GCNNSAReassignLegacy::
ID = 0;
113bool GCNNSAReassignImpl::tryAssignRegisters(
115 unsigned NumRegs = Intervals.size();
117 for (
unsigned N = 0;
N < NumRegs; ++
N)
118 if (VRM->hasPhys(Intervals[
N]->reg()))
119 LRM->unassign(*Intervals[
N]);
121 for (
unsigned N = 0;
N < NumRegs; ++
N)
125 for (
unsigned N = 0;
N < NumRegs; ++
N)
131bool GCNNSAReassignImpl::canAssign(
unsigned StartReg,
unsigned NumRegs)
const {
132 for (
unsigned N = 0;
N < NumRegs; ++
N) {
133 unsigned Reg = StartReg +
N;
134 if (!
MRI->isAllocatable(
Reg))
137 for (
unsigned I = 0; CSRegs[
I]; ++
I)
138 if (
TRI->isSubRegisterEq(
Reg, CSRegs[
I]) &&
146bool GCNNSAReassignImpl::scavengeRegs(
147 SmallVectorImpl<LiveInterval *> &Intervals)
const {
148 unsigned NumRegs = Intervals.
size();
150 if (NumRegs > MaxNumVGPRs)
152 unsigned MaxReg = MaxNumVGPRs - NumRegs + AMDGPU::VGPR0;
154 for (
unsigned Reg = AMDGPU::VGPR0;
Reg <= MaxReg; ++
Reg) {
155 if (!canAssign(
Reg, NumRegs))
158 if (tryAssignRegisters(Intervals,
Reg))
165GCNNSAReassignImpl::NSA_Status
166GCNNSAReassignImpl::CheckNSA(
const MachineInstr &
MI,
bool Fast)
const {
169 return NSA_Status::NOT_NSA;
171 switch (
Info->MIMGEncoding) {
172 case AMDGPU::MIMGEncGfx10NSA:
173 case AMDGPU::MIMGEncGfx11NSA:
176 return NSA_Status::NOT_NSA;
180 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::vaddr0);
182 unsigned VgprBase = 0;
184 for (
unsigned I = 0;
I <
Info->VAddrOperands; ++
I) {
185 const MachineOperand &
Op =
MI.getOperand(VAddr0Idx +
I);
188 return NSA_Status::FIXED;
194 return NSA_Status::FIXED;
205 if (
TRI->getRegSizeInBits(*
MRI->getRegClass(
Reg)) != 32 ||
Op.getSubReg())
206 return NSA_Status::FIXED;
214 return NSA_Status::FIXED;
216 const MachineInstr *
Def =
MRI->getUniqueVRegDef(
Reg);
218 if (Def &&
Def->isCopy() &&
Def->getOperand(1).getReg() == PhysReg)
219 return NSA_Status::FIXED;
221 for (
auto U :
MRI->use_nodbg_operands(
Reg)) {
223 return NSA_Status::FIXED;
224 const MachineInstr *UseInst =
U.getParent();
226 return NSA_Status::FIXED;
230 return NSA_Status::FIXED;
235 else if (VgprBase +
I != PhysReg)
239 return NSA ? NSA_Status::NON_CONTIGUOUS : NSA_Status::CONTIGUOUS;
242bool GCNNSAReassignImpl::run(MachineFunction &MF) {
250 const SIMachineFunctionInfo *MFI = MF.
getInfo<SIMachineFunctionInfo>();
252 MaxNumVGPRs = std::min(
255 CSRegs =
MRI->getCalleeSavedRegs();
257 using Candidate = std::pair<const MachineInstr*, bool>;
259 for (
const MachineBasicBlock &
MBB : MF) {
260 for (
const MachineInstr &
MI :
MBB) {
261 switch (CheckNSA(
MI)) {
264 case NSA_Status::CONTIGUOUS:
267 case NSA_Status::NON_CONTIGUOUS:
269 ++NumNSAInstructions;
276 for (
auto &
C : Candidates) {
280 const MachineInstr *
MI =
C.first;
281 if (CheckNSA(*
MI,
true) == NSA_Status::CONTIGUOUS) {
290 AMDGPU::getNamedOperandIdx(
MI->getOpcode(), AMDGPU::OpName::vaddr0);
294 SlotIndex MinInd, MaxInd;
295 for (
unsigned I = 0;
I <
Info->VAddrOperands; ++
I) {
296 const MachineOperand &
Op =
MI->getOperand(VAddr0Idx +
I);
313 MinInd =
I != 0 ? std::min(MinInd, LI->
beginIndex()) : LI->beginIndex();
314 MaxInd =
I != 0 ? std::max(MaxInd, LI->
endIndex()) : LI->endIndex();
317 if (Intervals.
empty())
321 <<
"\tOriginal allocation:\t";
327 bool Success = scavengeRegs(Intervals);
330 if (VRM->
hasPhys(Intervals.back()->reg()))
335 std::lower_bound(Candidates.begin(), &
C, MinInd,
336 [
this](
const Candidate &
C, SlotIndex
I) {
337 return LIS->getInstructionIndex(*C.first) < I;
339 for (
auto *
E = Candidates.end();
342 if (
I->second && CheckNSA(*
I->first,
true) < NSA_Status::CONTIGUOUS) {
350 for (
unsigned I = 0;
I <
Info->VAddrOperands; ++
I)
351 if (VRM->
hasPhys(Intervals[
I]->reg()))
354 for (
unsigned I = 0;
I <
Info->VAddrOperands; ++
I)
355 LRM->
assign(*Intervals[
I], OrigRegs[
I]);
363 dbgs() <<
"\tNew allocation:\t\t ["
374bool GCNNSAReassignLegacy::runOnMachineFunction(MachineFunction &MF) {
375 auto *VRM = &getAnalysis<VirtRegMapWrapperLegacy>().getVRM();
376 auto *LRM = &getAnalysis<LiveRegMatrixWrapperLegacy>().getLRM();
377 auto *LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
379 GCNNSAReassignImpl Impl(VRM, LRM, LIS);
390 GCNNSAReassignImpl Impl(&VRM, &LRM, &LIS);
unsigned const MachineRegisterInfo * MRI
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
AMD GCN specific subclass of TargetSubtarget.
Register const TargetRegisterInfo * TRI
Promote Memory to Register
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Interface definition for SIRegisterInfo.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addRequired()
void setPreservesAll()
Set by analyses that do not transform their input at all.
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
bool hasNonNSAEncoding() const
const SIRegisterInfo * getRegisterInfo() const override
unsigned getMaxNumVGPRs(unsigned WavesPerEU, unsigned DynamicVGPRBlockSize) const
LiveInterval - This class represents the liveness of a register, or stack slot.
bool hasInterval(Register Reg) const
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
LiveInterval & getInterval(Register Reg)
SlotIndex beginIndex() const
beginIndex - Return the lowest numbered slot covered.
SlotIndex endIndex() const
endNumber - return the maximum point of the range of the whole, exclusive.
bool isPhysRegUsed(MCRegister PhysReg) const
Returns true if the given PhysReg has any live intervals assigned.
void unassign(const LiveInterval &VirtReg, bool ClearAllReferencingSegments=false)
Unassign VirtReg from its PhysReg.
void assign(const LiveInterval &VirtReg, MCRegister PhysReg)
Assign VirtReg to PhysReg.
static MCRegister from(unsigned Val)
Check the provided unsigned value is a valid MCRegister.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineOperand & getOperand(unsigned i) const
Register getReg() const
getReg - Returns the register number.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
unsigned getOccupancy() const
unsigned getDynamicVGPRBlockSize() const
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
Register getPreSplitReg(Register virtReg) const
returns the live interval virtReg is split from.
MCRegister getPhys(Register virtReg) const
returns the physical register mapped to the specified virtual register
bool hasPhys(Register virtReg) const
returns true if the specified virtual register is mapped to a physical register
bool isAssignedReg(Register virtReg) const
returns true if the specified virtual register is not mapped to a stack slot or rematerialized.
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ C
The default llvm calling convention, compatible with C.
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
NodeAddr< DefNode * > Def
This is an optimization pass for GlobalISel generic memory operations.
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
DWARFExpression::Operation Op
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.