LLVM 18.0.0git
AMDGPUMachineFunction.cpp
Go to the documentation of this file.
1//===-- AMDGPUMachineFunctionInfo.cpp ---------------------------------------=//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "AMDGPU.h"
12#include "AMDGPUSubtarget.h"
16#include "llvm/IR/Constants.h"
17#include "llvm/IR/Metadata.h"
19
20using namespace llvm;
21
23 const AMDGPUSubtarget &ST)
24 : IsEntryFunction(AMDGPU::isEntryFunctionCC(F.getCallingConv())),
25 IsModuleEntryFunction(
26 AMDGPU::isModuleEntryFunctionCC(F.getCallingConv())),
27 IsChainFunction(AMDGPU::isChainCC(F.getCallingConv())),
28 NoSignedZerosFPMath(false) {
29
30 // FIXME: Should initialize KernArgSize based on ExplicitKernelArgOffset,
31 // except reserved size is not correctly aligned.
32
33 Attribute MemBoundAttr = F.getFnAttribute("amdgpu-memory-bound");
34 MemoryBound = MemBoundAttr.getValueAsBool();
35
36 Attribute WaveLimitAttr = F.getFnAttribute("amdgpu-wave-limiter");
37 WaveLimiter = WaveLimitAttr.getValueAsBool();
38
39 // FIXME: How is this attribute supposed to interact with statically known
40 // global sizes?
41 StringRef S = F.getFnAttribute("amdgpu-gds-size").getValueAsString();
42 if (!S.empty())
44
45 // Assume the attribute allocates before any known GDS globals.
47
48 // Second value, if present, is the maximum value that can be assigned.
49 // Useful in PromoteAlloca or for LDS spills. Could be used for diagnostics
50 // during codegen.
51 std::pair<unsigned, unsigned> LDSSizeRange = AMDGPU::getIntegerPairAttribute(
52 F, "amdgpu-lds-size", {0, UINT32_MAX}, true);
53
54 // The two separate variables are only profitable when the LDS module lowering
55 // pass is disabled. If graphics does not use dynamic LDS, this is never
56 // profitable. Leaving cleanup for a later change.
57 LDSSize = LDSSizeRange.first;
59
60 CallingConv::ID CC = F.getCallingConv();
62 ExplicitKernArgSize = ST.getExplicitKernArgSize(F, MaxKernArgAlign);
63
64 // FIXME: Shouldn't be target specific
65 Attribute NSZAttr = F.getFnAttribute("no-signed-zeros-fp-math");
67 NSZAttr.isStringAttribute() && NSZAttr.getValueAsString() == "true";
68}
69
71 const GlobalVariable &GV,
72 Align Trailing) {
73 auto Entry = LocalMemoryObjects.insert(std::pair(&GV, 0));
74 if (!Entry.second)
75 return Entry.first->second;
76
77 Align Alignment =
78 DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType());
79
80 unsigned Offset;
82
83 std::optional<uint32_t> MaybeAbs = getLDSAbsoluteAddress(GV);
84 if (MaybeAbs) {
85 // Absolute address LDS variables that exist prior to the LDS lowering
86 // pass raise a fatal error in that pass. These failure modes are only
87 // reachable if that lowering pass is disabled or broken. If/when adding
88 // support for absolute addresses on user specified variables, the
89 // alignment check moves to the lowering pass and the frame calculation
90 // needs to take the user variables into consideration.
91
92 uint32_t ObjectStart = *MaybeAbs;
93
94 if (ObjectStart != alignTo(ObjectStart, Alignment)) {
95 report_fatal_error("Absolute address LDS variable inconsistent with "
96 "variable alignment");
97 }
98
100 // If this is a module entry function, we can also sanity check against
101 // the static frame. Strictly it would be better to check against the
102 // attribute, i.e. that the variable is within the always-allocated
103 // section, and not within some other non-absolute-address object
104 // allocated here, but the extra error detection is minimal and we would
105 // have to pass the Function around or cache the attribute value.
106 uint32_t ObjectEnd =
107 ObjectStart + DL.getTypeAllocSize(GV.getValueType());
108 if (ObjectEnd > StaticLDSSize) {
110 "Absolute address LDS variable outside of static frame");
111 }
112 }
113
114 Entry.first->second = ObjectStart;
115 return ObjectStart;
116 }
117
118 /// TODO: We should sort these to minimize wasted space due to alignment
119 /// padding. Currently the padding is decided by the first encountered use
120 /// during lowering.
122
123 StaticLDSSize += DL.getTypeAllocSize(GV.getValueType());
124
125 // Align LDS size to trailing, e.g. for aligning dynamic shared memory
126 LDSSize = alignTo(StaticLDSSize, Trailing);
127 } else {
129 "expected region address space");
130
132 StaticGDSSize += DL.getTypeAllocSize(GV.getValueType());
133
134 // FIXME: Apply alignment of dynamic GDS
136 }
137
138 Entry.first->second = Offset;
139 return Offset;
140}
141
142static const GlobalVariable *
144 const Module *M = F.getParent();
145 std::string KernelDynLDSName = "llvm.amdgcn.";
146 KernelDynLDSName += F.getName();
147 KernelDynLDSName += ".dynlds";
148 return M->getNamedGlobal(KernelDynLDSName);
149}
150
151std::optional<uint32_t>
153 // TODO: Would be more consistent with the abs symbols to use a range
154 MDNode *MD = F.getMetadata("llvm.amdgcn.lds.kernel.id");
155 if (MD && MD->getNumOperands() == 1) {
156 if (ConstantInt *KnownSize =
157 mdconst::extract<ConstantInt>(MD->getOperand(0))) {
158 uint64_t ZExt = KnownSize->getZExtValue();
159 if (ZExt <= UINT32_MAX) {
160 return ZExt;
161 }
162 }
163 }
164 return {};
165}
166
167std::optional<uint32_t>
170 return {};
171
172 std::optional<ConstantRange> AbsSymRange = GV.getAbsoluteSymbolRange();
173 if (!AbsSymRange)
174 return {};
175
176 if (const APInt *V = AbsSymRange->getSingleElement()) {
177 std::optional<uint64_t> ZExt = V->tryZExtValue();
178 if (ZExt && (*ZExt <= UINT32_MAX)) {
179 return *ZExt;
180 }
181 }
182
183 return {};
184}
185
187 const GlobalVariable &GV) {
188 const Module *M = F.getParent();
189 const DataLayout &DL = M->getDataLayout();
190 assert(DL.getTypeAllocSize(GV.getValueType()).isZero());
191
192 Align Alignment =
193 DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType());
194 if (Alignment <= DynLDSAlign)
195 return;
196
197 LDSSize = alignTo(StaticLDSSize, Alignment);
198 DynLDSAlign = Alignment;
199
200 // If there is a dynamic LDS variable associated with this function F, every
201 // further dynamic LDS instance (allocated by calling setDynLDSAlign) must
202 // map to the same address. This holds because no LDS is allocated after the
203 // lowering pass if there are dynamic LDS variables present.
205 if (Dyn) {
206 unsigned Offset = LDSSize; // return this?
207 std::optional<uint32_t> Expect = getLDSAbsoluteAddress(*Dyn);
208 if (!Expect || (Offset != *Expect)) {
209 report_fatal_error("Inconsistent metadata on dynamic LDS variable");
210 }
211 }
212}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static const GlobalVariable * getKernelDynLDSGlobalFromFunction(const Function &F)
Analyzes if a function potentially memory bound and if a kernel kernel may benefit from limiting numb...
Base class for AMDGPU specific classes of TargetSubtarget.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
#define F(x, y, z)
Definition: MD5.cpp:55
This file contains the declarations for metadata subclasses.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
AMDGPUMachineFunction(const Function &F, const AMDGPUSubtarget &ST)
static std::optional< uint32_t > getLDSKernelIdMetadata(const Function &F)
Align DynLDSAlign
Align for dynamic shared memory if any.
uint32_t LDSSize
Number of bytes in the LDS that are being used.
void setDynLDSAlign(const Function &F, const GlobalVariable &GV)
static std::optional< uint32_t > getLDSAbsoluteAddress(const GlobalValue &GV)
unsigned allocateLDSGlobal(const DataLayout &DL, const GlobalVariable &GV)
uint32_t StaticLDSSize
Number of bytes in the LDS allocated statically.
Class for arbitrary precision integers.
Definition: APInt.h:76
bool isStringAttribute() const
Return true if the attribute is a string (target-dependent) attribute.
Definition: Attributes.cpp:282
bool getValueAsBool() const
Return the attribute's value as a boolean.
Definition: Attributes.cpp:304
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:318
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
MaybeAlign getAlign() const
Returns the alignment of the given variable or function.
Definition: GlobalObject.h:80
unsigned getAddressSpace() const
Definition: GlobalValue.h:201
std::optional< ConstantRange > getAbsoluteSymbolRange() const
If this is an absolute symbol reference, returns the range of the symbol, otherwise returns std::null...
Definition: Globals.cpp:380
Type * getValueType() const
Definition: GlobalValue.h:292
Metadata node.
Definition: Metadata.h:1037
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1391
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1397
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
bool consumeInteger(unsigned Radix, T &Result)
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:503
constexpr bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:134
@ REGION_ADDRESS
Address space for region memory. (GDS)
Definition: AMDGPU.h:408
@ LOCAL_ADDRESS
Address space for local memory.
Definition: AMDGPU.h:411
std::pair< unsigned, unsigned > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< unsigned, unsigned > Default, bool OnlyFirstRequired)
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
Definition: CallingConv.h:197
@ SPIR_KERNEL
Used for SPIR kernel functions.
Definition: CallingConv.h:141
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:456
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39