24#include "llvm/IR/IntrinsicsAMDGPU.h" 
   29#define DEBUG_TYPE "amdgpu-lower-kernel-attributes" 
   36enum DispatchPackedOffsets {
 
   47enum ImplicitArgOffsets {
 
   48  HIDDEN_BLOCK_COUNT_X = 0,
 
   49  HIDDEN_BLOCK_COUNT_Y = 4,
 
   50  HIDDEN_BLOCK_COUNT_Z = 8,
 
   52  HIDDEN_GROUP_SIZE_X = 12,
 
   53  HIDDEN_GROUP_SIZE_Y = 14,
 
   54  HIDDEN_GROUP_SIZE_Z = 16,
 
   56  HIDDEN_REMAINDER_X = 18,
 
   57  HIDDEN_REMAINDER_Y = 20,
 
   58  HIDDEN_REMAINDER_Z = 22,
 
   61class AMDGPULowerKernelAttributes : 
public ModulePass {
 
   67  bool runOnModule(
Module &M) 
override;
 
   70    return "AMDGPU Kernel Attributes";
 
   79  auto IntrinsicId = IsV5OrAbove ? Intrinsic::amdgcn_implicitarg_ptr
 
   80                                 : Intrinsic::amdgcn_dispatch_ptr;
 
   88  if (MaxNumGroups == 0 || MaxNumGroups == std::numeric_limits<uint32_t>::max())
 
   91  if (!Load->getType()->isIntegerTy(32))
 
   97  Load->setMetadata(LLVMContext::MD_range, 
Range);
 
 
  103  auto *MD = 
F->getMetadata(
"reqd_work_group_size");
 
  104  const bool HasReqdWorkGroupSize = MD && MD->getNumOperands() == 3;
 
  106  const bool HasUniformWorkGroupSize =
 
  107    F->getFnAttribute(
"uniform-work-group-size").getValueAsBool();
 
  113  if (!HasReqdWorkGroupSize && !HasUniformWorkGroupSize &&
 
  114      none_of(MaxNumWorkgroups, [](
unsigned X) { 
return X != 0; }))
 
  117  Value *BlockCounts[3] = {
nullptr, 
nullptr, 
nullptr};
 
  118  Value *GroupSizes[3]  = {
nullptr, 
nullptr, 
nullptr};
 
  119  Value *Remainders[3]  = {
nullptr, 
nullptr, 
nullptr};
 
  120  Value *GridSizes[3]   = {
nullptr, 
nullptr, 
nullptr};
 
  141      if (!BCI->hasOneUse())
 
  146    if (!Load || !Load->isSimple())
 
  149    unsigned LoadSize = 
DL.getTypeStoreSize(Load->getType());
 
  154      case HIDDEN_BLOCK_COUNT_X:
 
  156          BlockCounts[0] = Load;
 
  160      case HIDDEN_BLOCK_COUNT_Y:
 
  162          BlockCounts[1] = Load;
 
  166      case HIDDEN_BLOCK_COUNT_Z:
 
  168          BlockCounts[2] = Load;
 
  172      case HIDDEN_GROUP_SIZE_X:
 
  174          GroupSizes[0] = Load;
 
  176      case HIDDEN_GROUP_SIZE_Y:
 
  178          GroupSizes[1] = Load;
 
  180      case HIDDEN_GROUP_SIZE_Z:
 
  182          GroupSizes[2] = Load;
 
  184      case HIDDEN_REMAINDER_X:
 
  186          Remainders[0] = Load;
 
  188      case HIDDEN_REMAINDER_Y:
 
  190          Remainders[1] = Load;
 
  192      case HIDDEN_REMAINDER_Z:
 
  194          Remainders[2] = Load;
 
  201      case WORKGROUP_SIZE_X:
 
  203          GroupSizes[0] = Load;
 
  205      case WORKGROUP_SIZE_Y:
 
  207          GroupSizes[1] = Load;
 
  209      case WORKGROUP_SIZE_Z:
 
  211          GroupSizes[2] = Load;
 
  231  bool MadeChange = 
false;
 
  232  if (IsV5OrAbove && HasUniformWorkGroupSize) {
 
  240    for (
int I = 0; 
I < 3; ++
I) {
 
  241      Value *BlockCount = BlockCounts[
I];
 
  261    for (
Value *Remainder : Remainders) {
 
  267  } 
else if (HasUniformWorkGroupSize) { 
 
  287    for (
int I = 0; 
I < 3; ++
I) {
 
  288      Value *GroupSize = GroupSizes[
I];
 
  289      Value *GridSize = GridSizes[
I];
 
  290      if (!GroupSize || !GridSize)
 
  304        for (
User *
UMin : ZextGroupSize->users()) {
 
  309            if (HasReqdWorkGroupSize) {
 
  313                  KnownSize, 
UMin->getType(), 
false, 
DL));
 
  315              UMin->replaceAllUsesWith(ZextGroupSize);
 
  326  if (!HasReqdWorkGroupSize)
 
  329  for (
int I = 0; 
I < 3; 
I++) {
 
  330    Value *GroupSize = GroupSizes[
I];
 
 
  346bool AMDGPULowerKernelAttributes::runOnModule(
Module &M) {
 
  347  bool MadeChange = 
false;
 
  355  SmallPtrSet<Instruction *, 4> HandledUses;
 
  356  for (
auto *U : 
BasePtr->users()) {
 
  358    if (HandledUses.
insert(CI).second) {
 
  369                      "AMDGPU Kernel Attributes", 
false, 
false)
 
  373char AMDGPULowerKernelAttributes::
ID = 0;
 
  376  return new AMDGPULowerKernelAttributes();
 
 
  383  Function *BasePtr = getBasePtrIntrinsic(*
F.getParent(), IsV5OrAbove);
 
 
static void annotateGridSizeLoadWithRangeMD(LoadInst *Load, uint32_t MaxNumGroups)
static bool processUse(CallInst *CI, bool IsV5OrAbove)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Expand Atomic instructions
This file contains the declarations for the subclasses of Constant, which represent the different fla...
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
Class for arbitrary precision integers.
Represent the analysis usage information of a pass.
void setPreservesAll()
Set by analyses that do not transform their input at all.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
This class represents a function call, abstracting a target machine's calling convention.
@ ICMP_ULT
unsigned less than
This is the shared class of boolean and integer constants.
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
An instruction for reading from memory.
LLVM_ABI MDNode * createRange(const APInt &Lo, const APInt &Hi)
Return metadata describing the range [Lo, Hi).
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
A Module instance is used to store all the information related to an LLVM module.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
const ParentTy * getParent() const
unsigned getAMDHSACodeObjectVersion(const Module &M)
SmallVector< unsigned > getIntegerVecAttribute(const Function &F, StringRef Name, unsigned Size, unsigned DefaultVal)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
LLVM_ABI Function * getDeclarationIfExists(const Module *M, ID id)
Look up the Function declaration of the intrinsic id in the Module M and return it if it exists.
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > m_UMin(const LHS &L, const RHS &R)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
This is an optimization pass for GlobalISel generic memory operations.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Value * GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, const DataLayout &DL, bool AllowNonInbounds=true)
Analyze the specified pointer to see if it can be expressed as a base pointer plus a constant offset.
ModulePass * createAMDGPULowerKernelAttributesPass()
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI Constant * ConstantFoldIntegerCast(Constant *C, Type *DestTy, bool IsSigned, const DataLayout &DL)
Constant fold a zext, sext or trunc, depending on IsSigned and whether the DestTy is wider or narrowe...
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)