38#define DEBUG_TYPE "amdgpu-preload-kern-arg-prolog"
50class AMDGPUPreloadKernArgProlog {
66 void createBackCompatBlock(
unsigned NumKernArgPreloadSGPRs);
71 unsigned NumKernArgPreloadSGPRs);
81 return "AMDGPU Preload Kernel Arguments Prolog";
89char AMDGPUPreloadKernArgPrologLegacy::ID = 0;
92 "AMDGPU Preload Kernel Arguments Prolog",
false,
false)
95 AMDGPUPreloadKernArgPrologLegacy::
ID;
98 return new AMDGPUPreloadKernArgPrologLegacy();
101bool AMDGPUPreloadKernArgPrologLegacy::runOnMachineFunction(
103 return AMDGPUPreloadKernArgProlog(MF).run();
106AMDGPUPreloadKernArgProlog::AMDGPUPreloadKernArgProlog(
MachineFunction &MF)
109 TRI(*
ST.getRegisterInfo()) {}
111bool AMDGPUPreloadKernArgProlog::run() {
112 if (!
ST.hasKernargPreload())
115 unsigned NumKernArgPreloadSGPRs = MFI.getNumKernargPreloadedSGPRs();
116 if (!NumKernArgPreloadSGPRs)
119 createBackCompatBlock(NumKernArgPreloadSGPRs);
123void AMDGPUPreloadKernArgProlog::createBackCompatBlock(
124 unsigned NumKernArgPreloadSGPRs) {
125 auto KernelEntryMBB = MF.
begin();
127 MF.
insert(KernelEntryMBB, BackCompatMBB);
129 assert(MFI.getUserSGPRInfo().hasKernargSegmentPtr() &&
130 "Kernel argument segment pointer register not set.");
131 Register KernArgSegmentPtr = MFI.getArgInfo().KernargSegmentPtr.getRegister();
132 BackCompatMBB->
addLiveIn(KernArgSegmentPtr);
135 addBackCompatLoads(BackCompatMBB, KernArgSegmentPtr, NumKernArgPreloadSGPRs);
146 .
addMBB(&*KernelEntryMBB);
159 unsigned NumKernArgPreloadSGPRs) {
160 static constexpr LoadConfig Configs[] = {
161 {8, &AMDGPU::SReg_256RegClass, AMDGPU::S_LOAD_DWORDX8_IMM},
162 {4, &AMDGPU::SReg_128RegClass, AMDGPU::S_LOAD_DWORDX4_IMM},
163 {2, &AMDGPU::SReg_64RegClass, AMDGPU::S_LOAD_DWORDX2_IMM}};
165 for (
const auto &
Config : Configs) {
166 if (NumKernArgPreloadSGPRs >=
Config.Size) {
167 Register LoadReg =
TRI.getMatchingSuperReg(KernArgPreloadSGPR,
168 AMDGPU::sub0,
Config.RegClass);
178 return LoadConfig{1, &AMDGPU::SReg_32RegClass, AMDGPU::S_LOAD_DWORD_IMM,
182void AMDGPUPreloadKernArgProlog::addBackCompatLoads(
184 unsigned NumKernArgPreloadSGPRs) {
185 Register KernArgPreloadSGPR = MFI.getArgInfo().FirstKernArgPreloadReg;
189 while (NumKernArgPreloadSGPRs > 0) {
194 .
addReg(KernArgSegmentPtr)
199 KernArgPreloadSGPR = KernArgPreloadSGPR.
asMCReg() +
Config.Size;
200 NumKernArgPreloadSGPRs -=
Config.Size;
207 if (!AMDGPUPreloadKernArgProlog(MF).
run())
static LoadConfig getLoadParameters(const TargetRegisterInfo &TRI, Register KernArgPreloadSGPR, unsigned NumKernArgPreloadSGPRs)
Find the largest possible load size that fits with SGPR alignment.
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
unsigned const TargetRegisterInfo * TRI
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static const uint32_t IV[8]
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &AM)
A container for analyses that lazily runs them and caches their results.
FunctionPass class - This class is used to implement most global optimizations.
void setAlignment(Align A)
Set alignment of the basic block.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Wrapper class representing virtual and physical registers.
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
StringRef - Represent a constant reference to a string, i.e.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
self_iterator getIterator()
IsaVersion getIsaVersion(StringRef GPU)
unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt)
Encodes Vmcnt, Expcnt and Lgkmcnt into Waitcnt for given isa Version.
unsigned getVmcntBitMask(const IsaVersion &Version)
unsigned getExpcntBitMask(const IsaVersion &Version)
@ C
The default llvm calling convention, compatible with C.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
This is an optimization pass for GlobalISel generic memory operations.
char & AMDGPUPreloadKernArgPrologLegacyID
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
FunctionPass * createAMDGPUPreloadKernArgPrologLegacyPass()
Instruction set architecture version.
This struct is a compact representation of a valid (non-zero power of two) alignment.