23 #define DEBUG_TYPE "amdgpu-subtarget"
25 #define GET_SUBTARGETINFO_ENUM
26 #define GET_SUBTARGETINFO_TARGET_DESC
27 #define GET_SUBTARGETINFO_CTOR
28 #include "AMDGPUGenSubtargetInfo.inc"
46 FullFS +=
"+flat-for-global,+unaligned-buffer-access,";
78 Gen(TT.getArch() ==
Triple::amdgcn ? SOUTHERN_ISLANDS : R600),
79 IsaVersion(ISAVersion0_0_0),
83 MaxPrivateElementSize(0),
93 UnalignedScratchAccess(
false),
94 UnalignedBufferAccess(
false),
97 DebuggerInsertNops(
false),
98 DebuggerReserveRegs(
false),
99 DebuggerEmitPrologue(
false),
101 EnableVGPRSpilling(
false),
102 EnablePromoteAlloca(
false),
104 EnableUnsafeDSOffsetFolding(
false),
105 EnableSIScheduler(
false),
114 HasSMemRealTime(
false),
115 Has16BitInsts(
false),
117 HasVGPRIndexMode(
false),
118 HasScalarStores(
false),
119 HasInv2PiInlineImm(
false),
120 FlatAddressSpace(
false),
125 HasVertexCache(
false),
129 FeatureDisable(
false),
130 InstrItins(getInstrItineraryForCPU(GPU)) {
195 std::pair<unsigned, unsigned>
Default =
204 F,
"amdgpu-max-work-group-size", Default.second);
205 Default.first =
std::min(Default.first, Default.second);
209 F,
"amdgpu-flat-work-group-size", Default);
212 if (Requested.first > Requested.second)
227 std::pair<unsigned, unsigned>
Default(1, 0);
236 unsigned MinImpliedByFlatWorkGroupSize =
238 bool RequestedFlatWorkGroupSize =
false;
244 Default.first = MinImpliedByFlatWorkGroupSize;
245 RequestedFlatWorkGroupSize =
true;
250 F,
"amdgpu-waves-per-eu", Default,
true);
253 if (Requested.second && Requested.first > Requested.second)
265 if (RequestedFlatWorkGroupSize &&
266 Requested.first > MinImpliedByFlatWorkGroupSize)
287 unsigned NumRegionInstrs)
const {
308 unsigned ExplicitArgBytes)
const {
310 if (ImplicitBytes == 0)
311 return ExplicitArgBytes;
314 return alignTo(ExplicitArgBytes, Alignment) + ImplicitBytes;
AMDGPU specific subclass of TargetSubtarget.
bool isVGPRSpillingEnabled(const Function &F) const
AMDGPUSubtarget & initializeSubtargetDependencies(const Triple &TT, StringRef GPU, StringRef FS)
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount) const
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount...
uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the next integer (mod 2**64) that is greater than or equal to Value and is a multiple of Alig...
int getLocalMemorySize() const
unsigned getImplicitArgNumBytes(const MachineFunction &MF) const
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
std::pair< int, int > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< int, int > Default, bool OnlyFirstRequired)
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const
Return the maximum number of waves per SIMD for kernels using SGPRs SGPRs.
bool enableSIScheduler() const
std::pair< unsigned, unsigned > getWavesPerEU(const Function &F) const
SISubtarget(const Triple &TT, StringRef CPU, StringRef FS, const TargetMachine &TM)
Function Alias Analysis false
Generation getGeneration() const
Maximum length of the test input libFuzzer tries to guess a good value based on the corpus and reports it always prefer smaller inputs during the corpus shuffle When libFuzzer itself reports a bug this exit code will be used If indicates the maximal total time in seconds to run the fuzzer minimizes the provided crash input Use with etc Experimental Use value profile to guide fuzzing Number of simultaneous worker processes to run the jobs If min(jobs, NumberOfCpuCores()/2)\" is used.") FUZZER_FLAG_INT(reload
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE bool contains(StringRef Other) const
Return true if the given string is a substring of *this, and false otherwise.
void ParseSubtargetFeatures(StringRef CPU, StringRef FS)
bool hasSGPRInitBug() const
unsigned getMaxWavesPerEU() const
bool isShader(CallingConv::ID cc)
unsigned getKernArgSegmentSize(const MachineFunction &MF, unsigned ExplictArgBytes) const
R600Subtarget(const Triple &TT, StringRef CPU, StringRef FS, const TargetMachine &TM)
unsigned getMinFlatWorkGroupSize() const
bool isCompute(CallingConv::ID cc)
static cl::opt< bool > ScalarizeGlobal("amdgpu-scalarize-global-loads", cl::desc("Enable global load scalarization"), cl::init(false), cl::Hidden)
bool ShouldTrackLaneMasks
Track LaneMasks to allow reordering of independent subregister writes of the same vreg...
unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const
Return the maximum number of waves per SIMD for kernels using VGPRs VGPRs.
unsigned getMaxFlatWorkGroupSize() const
Triple - Helper class for working with autoconf configuration names.
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes) const
Inverse of getMaxLocalMemWithWaveCount.
unsigned getMinWavesPerEU() const
Information about stack frame layout on the target.
Define a generic scheduling policy for targets that don't provide their own MachineSchedStrategy.
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
int getIntegerAttribute(const Function &F, StringRef Name, int Default)
void overrideSchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const override
~AMDGPUSubtarget() override
unsigned getWavefrontSize() const
AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, const TargetMachine &TM)
static cl::opt< bool > EnableLoadStoreOpt("aarch64-enable-ldst-opt", cl::desc("Enable the load/store pair"" optimization pass"), cl::init(true), cl::Hidden)
unsigned MaxPrivateElementSize
Primary interface to the complete machine description for the target machine.
StringRef - Represent a constant reference to a string, i.e.
std::pair< unsigned, unsigned > getFlatWorkGroupSizes(const Function &F) const
unsigned getMaxNumSGPRs() const
unsigned getAlignmentForImplicitArgPtr() const