LLVM  14.0.0git
AMDGPUMachineFunction.cpp
Go to the documentation of this file.
1 //===-- AMDGPUMachineFunctionInfo.cpp ---------------------------------------=//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
10 #include "AMDGPUPerfHintAnalysis.h"
11 #include "AMDGPUSubtarget.h"
14 
15 using namespace llvm;
16 
19  IsEntryFunction(
20  AMDGPU::isEntryFunctionCC(MF.getFunction().getCallingConv())),
21  IsModuleEntryFunction(
22  AMDGPU::isModuleEntryFunctionCC(MF.getFunction().getCallingConv())),
23  NoSignedZerosFPMath(MF.getTarget().Options.NoSignedZerosFPMath) {
25 
26  // FIXME: Should initialize KernArgSize based on ExplicitKernelArgOffset,
27  // except reserved size is not correctly aligned.
28  const Function &F = MF.getFunction();
29 
30  Attribute MemBoundAttr = F.getFnAttribute("amdgpu-memory-bound");
31  MemoryBound = MemBoundAttr.getValueAsBool();
32 
33  Attribute WaveLimitAttr = F.getFnAttribute("amdgpu-wave-limiter");
34  WaveLimiter = WaveLimitAttr.getValueAsBool();
35 
36  CallingConv::ID CC = F.getCallingConv();
38  ExplicitKernArgSize = ST.getExplicitKernArgSize(F, MaxKernArgAlign);
39 }
40 
42  const GlobalVariable &GV) {
43  auto Entry = LocalMemoryObjects.insert(std::make_pair(&GV, 0));
44  if (!Entry.second)
45  return Entry.first->second;
46 
47  Align Alignment =
48  DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType());
49 
50  /// TODO: We should sort these to minimize wasted space due to alignment
51  /// padding. Currently the padding is decided by the first encountered use
52  /// during lowering.
53  unsigned Offset = StaticLDSSize = alignTo(StaticLDSSize, Alignment);
54 
55  Entry.first->second = Offset;
56  StaticLDSSize += DL.getTypeAllocSize(GV.getValueType());
57 
58  // Update the LDS size considering the padding to align the dynamic shared
59  // memory.
61 
62  return Offset;
63 }
64 
66  if (isModuleEntryFunction()) {
67  const GlobalVariable *GV = M->getNamedGlobal("llvm.amdgcn.module.lds");
68  if (GV) {
69  unsigned Offset = allocateLDSGlobal(M->getDataLayout(), *GV);
70  (void)Offset;
71  assert(Offset == 0 &&
72  "Module LDS expected to be allocated before other LDS");
73  }
74  }
75 }
76 
78  const GlobalVariable &GV) {
79  assert(DL.getTypeAllocSize(GV.getValueType()).isZero());
80 
81  Align Alignment =
82  DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType());
83  if (Alignment <= DynLDSAlign)
84  return;
85 
86  LDSSize = alignTo(StaticLDSSize, Alignment);
87  DynLDSAlign = Alignment;
88 }
llvm::alignTo
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:148
llvm::AMDGPUMachineFunction::allocateLDSGlobal
unsigned allocateLDSGlobal(const DataLayout &DL, const GlobalVariable &GV)
Definition: AMDGPUMachineFunction.cpp:41
llvm
---------------------— PointerInfo ------------------------------------—
Definition: AllocatorList.h:23
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::AMDGPUMachineFunction::ExplicitKernArgSize
uint64_t ExplicitKernArgSize
Definition: AMDGPUMachineFunction.h:26
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:112
llvm::Function
Definition: Function.h:61
getFunction
static Function * getFunction(Constant *C)
Definition: Evaluator.cpp:255
llvm::Attribute
Definition: Attributes.h:52
llvm::GlobalObject::getAlign
MaybeAlign getAlign() const
Returns the alignment of the given variable or function.
Definition: GlobalObject.h:80
llvm::AMDGPUMachineFunction::AMDGPUMachineFunction
AMDGPUMachineFunction(const MachineFunction &MF)
Definition: AMDGPUMachineFunction.cpp:17
llvm::GlobalVariable
Definition: GlobalVariable.h:40
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:81
llvm::AMDGPUMachineFunction::LDSSize
unsigned LDSSize
Number of bytes in the LDS that are being used.
Definition: AMDGPUMachineFunction.h:30
llvm::AMDGPUMachineFunction::WaveLimiter
bool WaveLimiter
Definition: AMDGPUMachineFunction.h:59
llvm::Attribute::getValueAsBool
bool getValueAsBool() const
Return the attribute's value as a boolean.
Definition: Attributes.cpp:287
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::CallingConv::AMDGPU_KERNEL
@ AMDGPU_KERNEL
Calling convention for AMDGPU code object kernels.
Definition: CallingConv.h:216
llvm::AMDGPUSubtarget::get
static const AMDGPUSubtarget & get(const MachineFunction &MF)
Definition: AMDGPUSubtarget.cpp:1111
TargetMachine.h
llvm::AMDGPUMachineFunction::StaticLDSSize
unsigned StaticLDSSize
Number of bytes in the LDS allocated statically.
Definition: AMDGPUMachineFunction.h:34
AMDGPUMachineFunction.h
AMDGPUSubtarget.h
Options
const char LLVMTargetMachineRef LLVMPassBuilderOptionsRef Options
Definition: PassBuilderBindings.cpp:48
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::AMDGPU::isEntryFunctionCC
bool isEntryFunctionCC(CallingConv::ID CC)
Definition: AMDGPUBaseInfo.cpp:1381
llvm::AMDGPUMachineFunction::MemoryBound
bool MemoryBound
Definition: AMDGPUMachineFunction.h:56
llvm::AMDGPUMachineFunction::allocateModuleLDSGlobal
void allocateModuleLDSGlobal(const Module *M)
Definition: AMDGPUMachineFunction.cpp:65
llvm::AMDGPUSubtarget
Definition: AMDGPUSubtarget.h:29
AMDGPUPerfHintAnalysis.h
Analyzes if a function potentially memory bound and if a kernel kernel may benefit from limiting numb...
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
llvm::CallingConv::SPIR_KERNEL
@ SPIR_KERNEL
SPIR_KERNEL - Calling convention for SPIR kernel functions.
Definition: CallingConv.h:152
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Mode
SI Whole Quad Mode
Definition: SIWholeQuadMode.cpp:262
MachineModuleInfo.h
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
llvm::MachineFunction
Definition: MachineFunction.h:230
llvm::AMDGPU::isModuleEntryFunctionCC
bool isModuleEntryFunctionCC(CallingConv::ID CC)
Definition: AMDGPUBaseInfo.cpp:1398
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:592
llvm::AMDGPUMachineFunction::isModuleEntryFunction
bool isModuleEntryFunction() const
Definition: AMDGPUMachineFunction.h:82
llvm::GlobalValue::getValueType
Type * getValueType() const
Definition: GlobalValue.h:273
llvm::MachineFunctionInfo
MachineFunctionInfo - This class can be derived from and used by targets to hold private target-speci...
Definition: MachineFunction.h:95
llvm::AMDGPUMachineFunction::MaxKernArgAlign
Align MaxKernArgAlign
Definition: AMDGPUMachineFunction.h:27
llvm::AMDGPUMachineFunction::DynLDSAlign
Align DynLDSAlign
Align for dynamic shared memory if any.
Definition: AMDGPUMachineFunction.h:41
llvm::AMDGPUMachineFunction::setDynLDSAlign
void setDynLDSAlign(const DataLayout &DL, const GlobalVariable &GV)
Definition: AMDGPUMachineFunction.cpp:77