17#include "llvm/IR/IntrinsicsAMDGPU.h"
21#define DEBUG_TYPE "amdgpu-lower-kernel-arguments"
47 AllocaInst *AI = dyn_cast<AllocaInst>(&*InsPt);
58bool AMDGPULowerKernelArguments::runOnFunction(
Function &
F) {
63 auto &TPC = getAnalysis<TargetPassConfig>();
72 const Align KernArgBaseAlign(16);
73 const uint64_t BaseOffset =
ST.getExplicitKernelArgOffset(
F);
77 const uint64_t TotalKernArgSize =
ST.getKernArgSegmentSize(
F, MaxAlign);
78 if (TotalKernArgSize == 0)
82 Builder.CreateIntrinsic(Intrinsic::amdgcn_kernarg_segment_ptr, {}, {},
83 nullptr,
F.getName() +
".kernarg.segment");
85 KernArgSegment->
addRetAttr(Attribute::NonNull);
93 const bool IsByRef =
Arg.hasByRefAttr();
94 Type *ArgTy = IsByRef ?
Arg.getParamByRefType() :
Arg.getType();
95 MaybeAlign ParamAlign = IsByRef ?
Arg.getParamAlign() : std::nullopt;
96 Align ABITypeAlign =
DL.getValueOrABITypeAlignment(ParamAlign, ArgTy);
99 uint64_t AllocSize =
DL.getTypeAllocSize(ArgTy);
101 uint64_t EltOffset =
alignTo(ExplicitArgOffset, ABITypeAlign) + BaseOffset;
102 ExplicitArgOffset =
alignTo(ExplicitArgOffset, ABITypeAlign) + AllocSize;
110 Value *ArgOffsetPtr =
Builder.CreateConstInBoundsGEP1_64(
111 Builder.getInt8Ty(), KernArgSegment, EltOffset,
112 Arg.getName() +
".byval.kernarg.offset");
114 Value *CastOffsetPtr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
115 ArgOffsetPtr,
Arg.getType());
116 Arg.replaceAllUsesWith(CastOffsetPtr);
120 if (
PointerType *PT = dyn_cast<PointerType>(ArgTy)) {
127 !
ST.hasUsableDSOffset())
132 if (
Arg.hasNoAliasAttr())
136 auto *VT = dyn_cast<FixedVectorType>(ArgTy);
137 bool IsV3 = VT && VT->getNumElements() == 3;
142 int64_t AlignDownOffset =
alignDown(EltOffset, 4);
143 int64_t OffsetDiff = EltOffset - AlignDownOffset;
145 KernArgBaseAlign, DoShiftOpt ? AlignDownOffset : EltOffset);
156 ArgPtr =
Builder.CreateConstInBoundsGEP1_64(
157 Builder.getInt8Ty(), KernArgSegment, AlignDownOffset,
158 Arg.getName() +
".kernarg.offset.align.down");
159 AdjustedArgTy =
Builder.getInt32Ty();
161 ArgPtr =
Builder.CreateConstInBoundsGEP1_64(
162 Builder.getInt8Ty(), KernArgSegment, EltOffset,
163 Arg.getName() +
".kernarg.offset");
164 AdjustedArgTy = ArgTy;
167 if (IsV3 &&
Size >= 32) {
170 AdjustedArgTy = V4Ty;
176 Builder.CreateAlignedLoad(AdjustedArgTy, ArgPtr, AdjustedAlign);
177 Load->setMetadata(LLVMContext::MD_invariant_load,
MDNode::get(Ctx, {}));
181 if (isa<PointerType>(ArgTy)) {
182 if (
Arg.hasNonNullAttr())
185 uint64_t DerefBytes =
Arg.getDereferenceableBytes();
186 if (DerefBytes != 0) {
188 LLVMContext::MD_dereferenceable,
194 uint64_t DerefOrNullBytes =
Arg.getDereferenceableOrNullBytes();
195 if (DerefOrNullBytes != 0) {
197 LLVMContext::MD_dereferenceable_or_null,
200 DerefOrNullBytes))));
205 LLVMContext::MD_align,
207 Builder.getInt64Ty(), ParamAlign->value()))));
214 Value *ExtractBits = OffsetDiff == 0 ?
218 Value *Trunc =
Builder.CreateTrunc(ExtractBits, ArgIntTy);
220 Arg.getName() +
".load");
221 Arg.replaceAllUsesWith(NewVal);
224 Arg.getName() +
".load");
225 Arg.replaceAllUsesWith(Shuf);
227 Load->setName(
Arg.getName() +
".load");
228 Arg.replaceAllUsesWith(Load);
239 "AMDGPU Lower Kernel Arguments",
false,
false)
243char AMDGPULowerKernelArguments::
ID = 0;
246 return new AMDGPULowerKernelArguments();
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
AMDGPU Lower Kernel Arguments
static BasicBlock::iterator getInsertPt(BasicBlock &BB)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
AMD GCN specific subclass of TargetSubtarget.
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Target-Independent Code Generator Pass Configuration Options pass.
an instruction to allocate memory on the stack
bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesAll()
Set by analyses that do not transform their input at all.
This class represents an incoming formal argument to a Function.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
static Attribute getWithDereferenceableBytes(LLVMContext &Context, uint64_t Bytes)
static Attribute getWithAlignment(LLVMContext &Context, Align Alignment)
Return a uniquified Attribute object that has the specific alignment set.
LLVM Basic Block Representation.
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
InstListType::iterator iterator
Instruction iterators...
void addRetAttr(Attribute::AttrKind Kind)
Adds the attribute to the return value.
This class represents a function call, abstracting a target machine's calling convention.
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
A parsed version of the target data layout string in and methods for querying it.
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
FunctionPass class - This class is used to implement most global optimizations.
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Class to represent integer types.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Primary interface to the complete machine description for the target machine.
Target-Independent Code Generator Pass Configuration Options.
The instances of the Type class are immutable: once they are created, they are never changed.
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
bool isAggregateType() const
Return true if the type is an aggregate type.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
StringRef getName() const
Return a constant reference to the value's name.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
This is an optimization pass for GlobalISel generic memory operations.
FunctionPass * createAMDGPULowerKernelArgumentsPass()
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
This struct is a compact representation of a valid (non-zero power of two) alignment.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.