30#define DEBUG_TYPE "si-lower-sgpr-spills"
37 "amdgpu-num-vgprs-for-wwm-alloc",
38 cl::desc(
"Max num VGPRs for whole-wave register allocation."),
41class SILowerSGPRSpills {
57 : LIS(LIS), Indexes(Indexes), MDT(MDT) {}
62 void updateLaneVGPRDomInstr(
85 .
set(MachineFunctionProperties::Property::IsSSA)
86 .
set(MachineFunctionProperties::Property::NoVRegs);
92char SILowerSGPRSpillsLegacy::ID = 0;
95 "SI lower SGPR spill instructions",
false,
false)
125 Reg, Reg == RI->getReturnAddressReg(MF) ? MVT::i64 : MVT::i32);
131 const bool IsLiveIn =
MRI.isLiveIn(Reg);
138 Indexes->insertMachineInstrInMaps(Inst);
142 LIS->removeAllRegUnitsForPhysReg(Reg);
166 Reg, Reg == RI->getReturnAddressReg(MF) ? MVT::i64 : MVT::i32);
171 "loadRegFromStackSlot didn't insert any code!");
188void SILowerSGPRSpills::calculateSaveRestoreBlocks(
MachineFunction &MF) {
209 SaveBlocks.push_back(&MF.
front());
212 SaveBlocks.push_back(&
MBB);
214 RestoreBlocks.push_back(&
MBB);
228bool SILowerSGPRSpills::spillCalleeSavedRegs(
239 TFI->determineCalleeSavesSGPR(MF, SavedRegs, RS);
242 if (!
F.hasFnAttribute(Attribute::Naked)) {
247 std::vector<CalleeSavedInfo> CSI;
250 for (
unsigned I = 0; CSRegs[
I]; ++
I) {
253 if (SavedRegs.
test(Reg)) {
255 TRI->getMinimalPhysRegClass(Reg, MVT::i32);
257 TRI->getSpillAlign(*RC),
true);
259 CSI.emplace_back(Reg, JunkFI);
269 assert(SaveBlocks.size() == 1 &&
"shrink wrapping not fully implemented");
281void SILowerSGPRSpills::updateLaneVGPRDomInstr(
295 for (
auto &Spill : VGPRSpills) {
296 if (PrevLaneVGPR ==
Spill.VGPR)
299 PrevLaneVGPR =
Spill.VGPR;
301 if (
Spill.Lane == 0 &&
I == LaneVGPRDomInstr.
end()) {
303 LaneVGPRDomInstr[
Spill.VGPR] = InsertPt;
306 auto PrevInsertPt =
I->second;
313 if (MDT->dominates(&*InsertPt, &*PrevInsertPt))
314 I->second = InsertPt;
321 DomMBB = MDT->findNearestCommonDominator(DomMBB,
MBB);
323 I->second = InsertPt;
324 else if (DomMBB != PrevInsertPt->getParent())
330void SILowerSGPRSpills::determineRegsForWWMAllocation(
MachineFunction &MF,
342 unsigned NumRegs = MaxNumVGPRsForWwmAllocation;
346 auto [MaxNumVGPRs, MaxNumAGPRs] =
TRI->getMaxNumVectorRegs(MF);
350 for (
unsigned Reg = AMDGPU::VGPR0 + MaxNumVGPRs - 1;
351 (
I < NumRegs) && (Reg >= AMDGPU::VGPR0); --
Reg) {
352 if (!ReservedRegs.
test(Reg) &&
353 !
MRI.isPhysRegUsed(Reg,
true)) {
354 TRI->markSuperRegs(RegMask, Reg);
361 TRI->markSuperRegs(RegMask, AMDGPU::VGPR0);
363 "can't find enough VGPRs for wwm-regalloc");
367bool SILowerSGPRSpillsLegacy::runOnMachineFunction(
MachineFunction &MF) {
368 auto *LISWrapper = getAnalysisIfAvailable<LiveIntervalsWrapperPass>();
369 LiveIntervals *LIS = LISWrapper ? &LISWrapper->getLIS() :
nullptr;
370 auto *SIWrapper = getAnalysisIfAvailable<SlotIndexesWrapperPass>();
371 SlotIndexes *Indexes = SIWrapper ? &SIWrapper->getSI() :
nullptr;
373 &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
374 return SILowerSGPRSpills(LIS, Indexes, MDT).run(MF);
379 TII =
ST.getInstrInfo();
380 TRI = &
TII->getRegisterInfo();
382 assert(SaveBlocks.empty() && RestoreBlocks.empty());
386 calculateSaveRestoreBlocks(MF);
388 bool HasCSRs = spillCalleeSavedRegs(MF, CalleeSavedFIs);
396 RestoreBlocks.clear();
400 bool MadeChange =
false;
401 bool SpilledToVirtVGPRLanes =
false;
405 const bool HasSGPRSpillToVGPR =
TRI->spillSGPRToVGPR() &&
407 if (HasSGPRSpillToVGPR) {
422 if (!
TII->isSGPRSpill(
MI))
425 if (
MI.getOperand(0).isUndef()) {
428 MI.eraseFromParent();
432 int FI =
TII->getNamedOperand(
MI, AMDGPU::OpName::addr)->getIndex();
436 if (IsCalleeSaveSGPRSpill) {
449 bool Spilled =
TRI->eliminateSGPRToVGPRSpillFrameIndex(
450 MI, FI,
nullptr, Indexes, LIS,
true);
453 "failed to spill SGPR to physical VGPR lane when allocated");
458 bool Spilled =
TRI->eliminateSGPRToVGPRSpillFrameIndex(
459 MI, FI,
nullptr, Indexes, LIS);
462 "failed to spill SGPR to virtual VGPR lane when allocated");
464 updateLaneVGPRDomInstr(FI, &
MBB, MIS.
begin(), LaneVGPRDomInstr);
465 SpilledToVirtVGPRLanes =
true;
472 auto InsertPt = LaneVGPRDomInstr[
Reg];
495 determineRegsForWWMAllocation(MF, WwmRegMask);
498 NonWwmRegMask.flip().clearBitsNotInMask(
TRI->getAllVGPRRegMask());
511 if (
MI.isDebugValue()) {
512 uint32_t StackOperandIdx =
MI.isDebugValueList() ? 2 : 0;
513 if (
MI.getOperand(StackOperandIdx).isFI() &&
515 MI.getOperand(StackOperandIdx).getIndex()) &&
516 SpillFIs[
MI.getOperand(StackOperandIdx).getIndex()]) {
517 MI.getOperand(StackOperandIdx)
518 .ChangeToRegister(
Register(),
false );
534 if (SpilledToVirtVGPRLanes) {
540 if (UnusedLowSGPR &&
TRI->getHWRegIndex(UnusedLowSGPR) <
550 RestoreBlocks.clear();
562 SILowerSGPRSpills(LIS, Indexes, MDT).
run(MF);
unsigned const MachineRegisterInfo * MRI
Provides AMDGPU specific target descriptions.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
unsigned const TargetRegisterInfo * TRI
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file declares the machine register scavenger class.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static void updateLiveness(MachineFunction &MF, ArrayRef< CalleeSavedInfo > CSI)
static void insertCSRRestores(MachineBasicBlock &RestoreBlock, MutableArrayRef< CalleeSavedInfo > CSI, SlotIndexes *Indexes, LiveIntervals *LIS)
Insert restore code for the callee-saved registers used in the function.
SI lower SGPR spill instructions
static void insertCSRSaves(MachineBasicBlock &SaveBlock, ArrayRef< CalleeSavedInfo > CSI, SlotIndexes *Indexes, LiveIntervals *LIS)
Insert spill code for the callee-saved registers used in the function.
A container for analyses that lazily runs them and caches their results.
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesAll()
Set by analyses that do not transform their input at all.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
bool test(unsigned Idx) const
The CalleeSavedInfo class tracks the information need to locate where a callee saved register is in t...
iterator find(const_arg_type_t< KeyT > Val)
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
Store the specified register of the given register class to the specified stack frame index.
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
Load the specified register of the given register class from the specified stack frame index.
void emitError(const Instruction *I, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
void removeAllRegUnitsForPhysReg(MCRegister Reg)
Remove associated live ranges for the register units associated with Reg.
SlotIndex InsertMachineInstrInMaps(MachineInstr &MI)
LiveInterval & createAndComputeVirtRegInterval(Register Reg)
Wrapper class representing physical registers. Should be passed by value.
An RAII based helper class to modify MachineFunctionProperties when running pass.
void push_back(MachineInstr *MI)
bool isEHFuncletEntry() const
Returns true if this is the entry block of an EH funclet.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
bool isReturnBlock() const
Convenience function that returns true if the block ends in a return instruction.
void sortUniqueLiveIns()
Sorts and uniques the LiveIns vector.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Analysis pass which computes a MachineDominatorTree.
Result run(MachineFunction &MF, MachineFunctionAnalysisManager &)
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
MachineBasicBlock * getRestorePoint() const
void setCalleeSavedInfoValid(bool v)
int getObjectIndexEnd() const
Return one past the maximum frame object index.
bool hasStackObjects() const
Return true if there are any stack objects in this function.
uint8_t getStackID(int ObjectIdx) const
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
MachineBasicBlock * getSavePoint() const
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
virtual MachineFunctionProperties getClearedProperties() const
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
Properties which a MachineFunction may have at a given point in time.
MachineFunctionProperties & set(Property P)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineBasicBlock & front() const
MachineInstrSpan provides an interface to get an iteration range containing the instruction it was in...
MachineBasicBlock::iterator begin()
Representation of each machine instruction.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Wrapper class representing virtual and physical registers.
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
void setSGPRForEXECCopy(Register Reg)
void setFlag(Register Reg, uint8_t Flag)
ArrayRef< SIRegisterInfo::SpilledReg > getSGPRSpillToVirtualVGPRLanes(int FrameIndex) const
Register getSGPRForEXECCopy() const
bool allocateSGPRSpillToVGPRLane(MachineFunction &MF, int FI, bool SpillToPhysVGPRLane=false, bool IsPrologEpilog=false)
bool removeDeadFrameIndices(MachineFrameInfo &MFI, bool ResetSGPRSpillStackIDs)
If ResetSGPRSpillStackIDs is true, reset the stack ID from sgpr-spill to the default stack.
void updateNonWWMRegMask(BitVector &RegMask)
bool hasSpilledSGPRs() const
ArrayRef< Register > getSGPRSpillVGPRs() const
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
void removeMachineInstrFromMaps(MachineInstr &MI, bool AllowBundled=false)
Removes machine instruction (bundle) MI from the mapping.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Information about stack frame layout on the target.
virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const
spillCalleeSavedRegisters - Issues instruction(s) to spill all callee saved registers and returns tru...
virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, MutableArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const
restoreCalleeSavedRegisters - Issues instruction(s) to restore all callee saved registers and returns...
TargetInstrInfo - Interface to description of machine instruction set.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetFrameLowering * getFrameLowering() const
virtual const TargetInstrInfo * getInstrInfo() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
auto reverse(ContainerTy &&C)
char & SILowerSGPRSpillsLegacyID
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.