24AMDGPUVariadicMCExpr::AMDGPUVariadicMCExpr(VariadicKind Kind,
28 assert(
Args.size() >= 1 &&
"Needs a minimum of one expression.");
29 assert(Kind != AGVK_None &&
30 "Cannot construct AMDGPUVariadicMCExpr of kind none.");
37 RawArgs =
static_cast<const MCExpr **
>(
39 std::uninitialized_copy(
Args.begin(),
Args.end(), RawArgs);
43AMDGPUVariadicMCExpr::~AMDGPUVariadicMCExpr() { Ctx.
deallocate(RawArgs); }
53 "Indexing out of bounds AMDGPUVariadicMCExpr sub-expr");
72 OS <<
"totalnumvgprs(";
81 for (
auto It = Args.begin(); It != Args.end(); ++It) {
82 (*It)->print(
OS, MAI,
false);
83 if ((It + 1) != Args.end())
95 return std::max(Arg1, Arg2);
101bool AMDGPUVariadicMCExpr::evaluateExtraSGPRs(
MCValue &Res,
104 auto TryGetMCExprValue = [&](
const MCExpr *Arg,
uint64_t &ConstantValue) {
115 "AMDGPUVariadic Argument count incorrect for ExtraSGPRs");
117 uint64_t VCCUsed = 0, FlatScrUsed = 0, XNACKUsed = 0;
119 bool Success = TryGetMCExprValue(Args[2], XNACKUsed);
121 assert(
Success &&
"Arguments 3 for ExtraSGPRs should be a known constant");
122 if (!
Success || !TryGetMCExprValue(Args[0], VCCUsed) ||
123 !TryGetMCExprValue(Args[1], FlatScrUsed))
127 STI, (
bool)VCCUsed, (
bool)FlatScrUsed, (
bool)XNACKUsed);
132bool AMDGPUVariadicMCExpr::evaluateTotalNumVGPR(
MCValue &Res,
135 auto TryGetMCExprValue = [&](
const MCExpr *Arg,
uint64_t &ConstantValue) {
145 "AMDGPUVariadic Argument count incorrect for TotalNumVGPRs");
151 if (!TryGetMCExprValue(Args[0], NumAGPR) ||
152 !TryGetMCExprValue(Args[1], NumVGPR))
155 uint64_t TotalNum = Has90AInsts && NumAGPR ?
alignTo(NumVGPR, 4) + NumAGPR
156 : std::max(NumVGPR, NumAGPR);
161bool AMDGPUVariadicMCExpr::evaluateAlignTo(
MCValue &Res,
164 auto TryGetMCExprValue = [&](
const MCExpr *Arg,
uint64_t &ConstantValue) {
175 "AMDGPUVariadic Argument count incorrect for AlignTo");
177 if (!TryGetMCExprValue(Args[0],
Value) || !TryGetMCExprValue(Args[1],
Align))
184bool AMDGPUVariadicMCExpr::evaluateOccupancy(
MCValue &Res,
187 auto TryGetMCExprValue = [&](
const MCExpr *Arg,
uint64_t &ConstantValue) {
197 "AMDGPUVariadic Argument count incorrect for Occupancy");
198 uint64_t InitOccupancy, MaxWaves, Granule, TargetTotalNumVGPRs, Generation,
202 Success &= TryGetMCExprValue(Args[0], MaxWaves);
203 Success &= TryGetMCExprValue(Args[1], Granule);
204 Success &= TryGetMCExprValue(Args[2], TargetTotalNumVGPRs);
205 Success &= TryGetMCExprValue(Args[3], Generation);
206 Success &= TryGetMCExprValue(Args[4], InitOccupancy);
208 assert(
Success &&
"Arguments 1 to 5 for Occupancy should be known constants");
210 if (!
Success || !TryGetMCExprValue(Args[5], NumSGPRs) ||
211 !TryGetMCExprValue(Args[6], NumVGPRs))
214 unsigned Occupancy = InitOccupancy;
216 Occupancy = std::min(
221 Occupancy = std::min(Occupancy,
223 NumVGPRs, Granule, MaxWaves, TargetTotalNumVGPRs));
231 std::optional<int64_t>
Total;
237 return evaluateExtraSGPRs(Res, Layout,
Fixup);
239 return evaluateAlignTo(Res, Layout,
Fixup);
241 return evaluateTotalNumVGPR(Res, Layout,
Fixup);
243 return evaluateOccupancy(Res, Layout,
Fixup);
246 for (
const MCExpr *Arg : Args) {
252 if (!
Total.has_value())
262 for (
const MCExpr *Arg : Args)
267 for (
const MCExpr *Arg : Args) {
280 const MCExpr *FlatScrUsed,
307 auto CreateExpr = [&Ctx](
unsigned Value) {
312 {CreateExpr(MaxWaves), CreateExpr(Granule),
313 CreateExpr(TargetTotalNumVGPRs), CreateExpr(Generation),
314 CreateExpr(InitOcc), NumSGPRs, NumVGPRs},
This file defines the BumpPtrAllocator interface.
AMD GCN specific subclass of TargetSubtarget.
PowerPC TLS Dynamic Call Fixup
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
AMDGPU target specific variadic MCExpr operations.
static const AMDGPUVariadicMCExpr * create(VariadicKind Kind, ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUVariadicMCExpr * createTotalNumVGPR(const MCExpr *NumAGPR, const MCExpr *NumVGPR, MCContext &Ctx)
static const AMDGPUVariadicMCExpr * createOccupancy(unsigned InitOcc, const MCExpr *NumSGPRs, const MCExpr *NumVGPRs, const GCNSubtarget &STM, MCContext &Ctx)
Mimics GCNSubtarget::computeOccupancy for MCExpr.
bool evaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout, const MCFixup *Fixup) const override
void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override
MCFragment * findAssociatedFragment() const override
const MCExpr * getSubExpr(size_t Index) const
static const AMDGPUVariadicMCExpr * createExtraSGPRs(const MCExpr *VCCUsed, const MCExpr *FlatScrUsed, bool XNACKUsed, MCContext &Ctx)
Allow delayed MCExpr resolve of ExtraSGPRs (in case VCCUsed or FlatScrUsed are unresolvable but neede...
void visitUsedExpr(MCStreamer &Streamer) const override
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Generation getGeneration() const
This class is intended to be used as a base class for asm properties and features specific to the tar...
Encapsulates the layout of an assembly file at a particular point in time.
static const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Context object for machine code objects.
void * allocate(unsigned Size, unsigned Align=8)
void deallocate(void *Ptr)
const MCSubtargetInfo * getSubtargetInfo() const
Base class for the full range of assembler expressions which are needed for parsing.
bool evaluateAsRelocatable(MCValue &Res, const MCAsmLayout *Layout, const MCFixup *Fixup) const
Try to evaluate the expression to a relocatable value, i.e.
MCFragment * findAssociatedFragment() const
Find the "associated section" for this expression, which is currently defined as the absolute section...
Encode information on a single operation to perform on a byte sequence (e.g., an encoded instruction)...
Streaming machine code generation interface.
void visitUsedExpr(const MCExpr &Expr)
Generic base class for all target subtargets.
This represents an "assembler immediate".
int64_t getConstant() const
static MCValue get(const MCSymbolRefExpr *SymA, const MCSymbolRefExpr *SymB=nullptr, int64_t Val=0, uint32_t RefKind=0)
bool isAbsolute() const
Is this an absolute (as opposed to relocatable) value.
LLVM Value Representation.
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char NumVGPRs[]
Key for Kernel::CodeProps::Metadata::mNumVGPRs.
constexpr char NumSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSGPRs.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI)
unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI)
unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, bool FlatScrUsed, bool XNACKUsed)
unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI, unsigned NumVGPRs)
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs, unsigned MaxWaves, AMDGPUSubtarget::Generation Gen)
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
bool isGFX90A(const MCSubtargetInfo &STI)
This is an optimization pass for GlobalISel generic memory operations.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
This struct is a compact representation of a valid (non-zero power of two) alignment.