doxygen/AMDGPUAttributor_8cpp_source.html

//===- AMDGPUAttributor.cpp -----------------------------------------------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

/// \file This pass uses Attributor framework to deduce AMDGPU attributes.

//

//===----------------------------------------------------------------------===//


#include "AMDGPU.h"

#include "GCNSubtarget.h"

#include "Utils/AMDGPUBaseInfo.h"

#include "llvm/Analysis/CycleAnalysis.h"

#include "llvm/CodeGen/TargetPassConfig.h"

#include "llvm/IR/IntrinsicsAMDGPU.h"

#include "llvm/IR/IntrinsicsR600.h"

#include "llvm/Target/TargetMachine.h"

#include "llvm/Transforms/IPO/Attributor.h"


#define DEBUG_TYPE "amdgpu-attributor"


namespace llvm {

void initializeCycleInfoWrapperPassPass(PassRegistry &);

} // namespace llvm


using namespace llvm;


static cl::opt<unsigned> KernargPreloadCount(

    "amdgpu-kernarg-preload-count",

    cl::desc("How many kernel arguments to preload onto SGPRs"), cl::init(0));


static cl::opt<unsigned> IndirectCallSpecializationThreshold(

    "amdgpu-indirect-call-specialization-threshold",

    cl::desc(

        "A threshold controls whether an indirect call will be specialized"),

    cl::init(3));


#define AMDGPU_ATTRIBUTE(Name, Str) Name##_POS,


enum ImplicitArgumentPositions {

#include "AMDGPUAttributes.def"

  LAST_ARG_POS

};


#define AMDGPU_ATTRIBUTE(Name, Str) Name = 1 << Name##_POS,


enum ImplicitArgumentMask {

  NOT_IMPLICIT_INPUT = 0,

#include "AMDGPUAttributes.def"

  ALL_ARGUMENT_MASK = (1 << LAST_ARG_POS) - 1

};


#define AMDGPU_ATTRIBUTE(Name, Str) {Name, Str},

static constexpr std::pair<ImplicitArgumentMask, StringLiteral>

    ImplicitAttrs[] = {

#include "AMDGPUAttributes.def"

};


// We do not need to note the x workitem or workgroup id because they are always

// initialized.

//

// TODO: We should not add the attributes if the known compile time workgroup

// size is 1 for y/z.

static ImplicitArgumentMask

intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit,

                    bool HasApertureRegs, bool SupportsGetDoorBellID,

                    unsigned CodeObjectVersion) {

  switch (ID) {

  case Intrinsic::amdgcn_workitem_id_x:

    NonKernelOnly = true;

    return WORKITEM_ID_X;

  case Intrinsic::amdgcn_workgroup_id_x:

    NonKernelOnly = true;

    return WORKGROUP_ID_X;

  case Intrinsic::amdgcn_workitem_id_y:

  case Intrinsic::r600_read_tidig_y:

    return WORKITEM_ID_Y;

  case Intrinsic::amdgcn_workitem_id_z:

  case Intrinsic::r600_read_tidig_z:

    return WORKITEM_ID_Z;

  case Intrinsic::amdgcn_workgroup_id_y:

  case Intrinsic::r600_read_tgid_y:

    return WORKGROUP_ID_Y;

  case Intrinsic::amdgcn_workgroup_id_z:

  case Intrinsic::r600_read_tgid_z:

    return WORKGROUP_ID_Z;

  case Intrinsic::amdgcn_lds_kernel_id:

    return LDS_KERNEL_ID;

  case Intrinsic::amdgcn_dispatch_ptr:

    return DISPATCH_PTR;

  case Intrinsic::amdgcn_dispatch_id:

    return DISPATCH_ID;

  case Intrinsic::amdgcn_implicitarg_ptr:

    return IMPLICIT_ARG_PTR;

  // Need queue_ptr anyway. But under V5, we also need implicitarg_ptr to access

  // queue_ptr.

  case Intrinsic::amdgcn_queue_ptr:

    NeedsImplicit = (CodeObjectVersion >= AMDGPU::AMDHSA_COV5);

    return QUEUE_PTR;

  case Intrinsic::amdgcn_is_shared:

  case Intrinsic::amdgcn_is_private:

    if (HasApertureRegs)

      return NOT_IMPLICIT_INPUT;

    // Under V5, we need implicitarg_ptr + offsets to access private_base or

    // shared_base. For pre-V5, however, need to access them through queue_ptr +

    // offsets.

    return CodeObjectVersion >= AMDGPU::AMDHSA_COV5 ? IMPLICIT_ARG_PTR

                                                    : QUEUE_PTR;

  case Intrinsic::trap:

    if (SupportsGetDoorBellID) // GetDoorbellID support implemented since V4.

      return CodeObjectVersion >= AMDGPU::AMDHSA_COV4 ? NOT_IMPLICIT_INPUT

                                                      : QUEUE_PTR;

    NeedsImplicit = (CodeObjectVersion >= AMDGPU::AMDHSA_COV5);

    return QUEUE_PTR;

  default:

    return NOT_IMPLICIT_INPUT;

  }

}


static bool castRequiresQueuePtr(unsigned SrcAS) {

  return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS;

}


static bool isDSAddress(const Constant *C) {

  const GlobalValue *GV = dyn_cast<GlobalValue>(C);

  if (!GV)

    return false;

  unsigned AS = GV->getAddressSpace();

  return AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS;

}


/// Returns true if the function requires the implicit argument be passed

/// regardless of the function contents.

static bool funcRequiresHostcallPtr(const Function &F) {

  // Sanitizers require the hostcall buffer passed in the implicit arguments.

  return F.hasFnAttribute(Attribute::SanitizeAddress) ||

         F.hasFnAttribute(Attribute::SanitizeThread) ||

         F.hasFnAttribute(Attribute::SanitizeMemory) ||

         F.hasFnAttribute(Attribute::SanitizeHWAddress) ||

         F.hasFnAttribute(Attribute::SanitizeMemTag);

}


namespace {

class AMDGPUInformationCache : public InformationCache {

public:

  AMDGPUInformationCache(const Module &M, AnalysisGetter &AG,

                         BumpPtrAllocator &Allocator,

                         SetVector<Function *> *CGSCC, TargetMachine &TM)

      : InformationCache(M, AG, Allocator, CGSCC), TM(TM),

        CodeObjectVersion(AMDGPU::getAMDHSACodeObjectVersion(M)) {}


  TargetMachine &TM;


  enum ConstantStatus : uint8_t {

    NONE = 0,

    DS_GLOBAL = 1 << 0,

    ADDR_SPACE_CAST_PRIVATE_TO_FLAT = 1 << 1,

    ADDR_SPACE_CAST_LOCAL_TO_FLAT = 1 << 2,

    ADDR_SPACE_CAST_BOTH_TO_FLAT =

        ADDR_SPACE_CAST_PRIVATE_TO_FLAT | ADDR_SPACE_CAST_LOCAL_TO_FLAT

  };


  /// Check if the subtarget has aperture regs.

  bool hasApertureRegs(Function &F) {

    const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);

    return ST.hasApertureRegs();

  }


  /// Check if the subtarget supports GetDoorbellID.

  bool supportsGetDoorbellID(Function &F) {

    const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);

    return ST.supportsGetDoorbellID();

  }


  std::optional<std::pair<unsigned, unsigned>>

  getFlatWorkGroupSizeAttr(const Function &F) const {

    auto R = AMDGPU::getIntegerPairAttribute(F, "amdgpu-flat-work-group-size");

    if (!R)

      return std::nullopt;

    return std::make_pair(R->first, *(R->second));

  }


  std::pair<unsigned, unsigned>

  getDefaultFlatWorkGroupSize(const Function &F) const {

    const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);

    return ST.getDefaultFlatWorkGroupSize(F.getCallingConv());

  }


  std::pair<unsigned, unsigned>

  getMaximumFlatWorkGroupRange(const Function &F) {

    const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);

    return {ST.getMinFlatWorkGroupSize(), ST.getMaxFlatWorkGroupSize()};

  }


  SmallVector<unsigned> getMaxNumWorkGroups(const Function &F) {

    const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);

    return ST.getMaxNumWorkGroups(F);

  }


  /// Get code object version.

  unsigned getCodeObjectVersion() const { return CodeObjectVersion; }


  /// Get the effective value of "amdgpu-waves-per-eu" for the function,

  /// accounting for the interaction with the passed value to use for

  /// "amdgpu-flat-work-group-size".

  std::pair<unsigned, unsigned>

  getWavesPerEU(const Function &F,

                std::pair<unsigned, unsigned> FlatWorkGroupSize) {

    const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);

    return ST.getWavesPerEU(F, FlatWorkGroupSize);

  }


  std::optional<std::pair<unsigned, unsigned>>

  getWavesPerEUAttr(const Function &F) {

    auto Val = AMDGPU::getIntegerPairAttribute(F, "amdgpu-waves-per-eu",

                                               /*OnlyFirstRequired=*/true);

    if (!Val)

      return std::nullopt;

    if (!Val->second) {

      const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);

      Val->second = ST.getMaxWavesPerEU();

    }

    return std::make_pair(Val->first, *(Val->second));

  }


  std::pair<unsigned, unsigned>

  getEffectiveWavesPerEU(const Function &F,

                         std::pair<unsigned, unsigned> WavesPerEU,

                         std::pair<unsigned, unsigned> FlatWorkGroupSize) {

    const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);

    return ST.getEffectiveWavesPerEU(WavesPerEU, FlatWorkGroupSize);

  }


  unsigned getMaxWavesPerEU(const Function &F) {

    const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);

    return ST.getMaxWavesPerEU();

  }


private:

  /// Check if the ConstantExpr \p CE uses an addrspacecast from private or

  /// local to flat. These casts may require the queue pointer.

  static uint8_t visitConstExpr(const ConstantExpr *CE) {

    uint8_t Status = NONE;


    if (CE->getOpcode() == Instruction::AddrSpaceCast) {

      unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();

      if (SrcAS == AMDGPUAS::PRIVATE_ADDRESS)

        Status |= ADDR_SPACE_CAST_PRIVATE_TO_FLAT;

      else if (SrcAS == AMDGPUAS::LOCAL_ADDRESS)

        Status |= ADDR_SPACE_CAST_LOCAL_TO_FLAT;

    }


    return Status;

  }


  /// Get the constant access bitmap for \p C.

  uint8_t getConstantAccess(const Constant *C,

                            SmallPtrSetImpl<const Constant *> &Visited) {

    auto It = ConstantStatus.find(C);

    if (It != ConstantStatus.end())

      return It->second;


    uint8_t Result = 0;

    if (isDSAddress(C))

      Result = DS_GLOBAL;


    if (const auto *CE = dyn_cast<ConstantExpr>(C))

      Result |= visitConstExpr(CE);


    for (const Use &U : C->operands()) {

      const auto *OpC = dyn_cast<Constant>(U);

      if (!OpC || !Visited.insert(OpC).second)

        continue;


      Result |= getConstantAccess(OpC, Visited);

    }

    return Result;

  }


public:

  /// Returns true if \p Fn needs the queue pointer because of \p C.

  bool needsQueuePtr(const Constant *C, Function &Fn) {

    bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(Fn.getCallingConv());

    bool HasAperture = hasApertureRegs(Fn);


    // No need to explore the constants.

    if (!IsNonEntryFunc && HasAperture)

      return false;


    SmallPtrSet<const Constant *, 8> Visited;

    uint8_t Access = getConstantAccess(C, Visited);


    // We need to trap on DS globals in non-entry functions.

    if (IsNonEntryFunc && (Access & DS_GLOBAL))

      return true;


    return !HasAperture && (Access & ADDR_SPACE_CAST_BOTH_TO_FLAT);

  }


  bool checkConstForAddrSpaceCastFromPrivate(const Constant *C) {

    SmallPtrSet<const Constant *, 8> Visited;

    uint8_t Access = getConstantAccess(C, Visited);

    return Access & ADDR_SPACE_CAST_PRIVATE_TO_FLAT;

  }


private:

  /// Used to determine if the Constant needs the queue pointer.

  DenseMap<const Constant *, uint8_t> ConstantStatus;

  const unsigned CodeObjectVersion;

};


struct AAAMDAttributes

    : public StateWrapper<BitIntegerState<uint32_t, ALL_ARGUMENT_MASK, 0>,

                          AbstractAttribute> {

  using Base = StateWrapper<BitIntegerState<uint32_t, ALL_ARGUMENT_MASK, 0>,

                            AbstractAttribute>;


  AAAMDAttributes(const IRPosition &IRP, Attributor &A) : Base(IRP) {}


  /// Create an abstract attribute view for the position \p IRP.

  static AAAMDAttributes &createForPosition(const IRPosition &IRP,

                                            Attributor &A);


  /// See AbstractAttribute::getName().

  const std::string getName() const override { return "AAAMDAttributes"; }


  /// See AbstractAttribute::getIdAddr().

  const char *getIdAddr() const override { return &ID; }


  /// This function should return true if the type of the \p AA is

  /// AAAMDAttributes.

  static bool classof(const AbstractAttribute *AA) {

    return (AA->getIdAddr() == &ID);

  }


  /// Unique ID (due to the unique address)

  static const char ID;

};

const char AAAMDAttributes::ID = 0;


struct AAUniformWorkGroupSize

    : public StateWrapper<BooleanState, AbstractAttribute> {

  using Base = StateWrapper<BooleanState, AbstractAttribute>;

  AAUniformWorkGroupSize(const IRPosition &IRP, Attributor &A) : Base(IRP) {}


  /// Create an abstract attribute view for the position \p IRP.

  static AAUniformWorkGroupSize &createForPosition(const IRPosition &IRP,

                                                   Attributor &A);


  /// See AbstractAttribute::getName().

  const std::string getName() const override {

    return "AAUniformWorkGroupSize";

  }


  /// See AbstractAttribute::getIdAddr().

  const char *getIdAddr() const override { return &ID; }


  /// This function should return true if the type of the \p AA is

  /// AAAMDAttributes.

  static bool classof(const AbstractAttribute *AA) {

    return (AA->getIdAddr() == &ID);

  }


  /// Unique ID (due to the unique address)

  static const char ID;

};

const char AAUniformWorkGroupSize::ID = 0;


struct AAUniformWorkGroupSizeFunction : public AAUniformWorkGroupSize {

  AAUniformWorkGroupSizeFunction(const IRPosition &IRP, Attributor &A)

      : AAUniformWorkGroupSize(IRP, A) {}


  void initialize(Attributor &A) override {

    Function *F = getAssociatedFunction();

    CallingConv::ID CC = F->getCallingConv();


    if (CC != CallingConv::AMDGPU_KERNEL)

      return;


    bool InitialValue = false;

    if (F->hasFnAttribute("uniform-work-group-size"))

      InitialValue =

          F->getFnAttribute("uniform-work-group-size").getValueAsString() ==

          "true";


    if (InitialValue)

      indicateOptimisticFixpoint();

    else

      indicatePessimisticFixpoint();

  }


  ChangeStatus updateImpl(Attributor &A) override {

    ChangeStatus Change = ChangeStatus::UNCHANGED;


    auto CheckCallSite = [&](AbstractCallSite CS) {

      Function *Caller = CS.getInstruction()->getFunction();

      LLVM_DEBUG(dbgs() << "[AAUniformWorkGroupSize] Call " << Caller->getName()

                        << "->" << getAssociatedFunction()->getName() << "\n");


      const auto *CallerInfo = A.getAAFor<AAUniformWorkGroupSize>(

          *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);

      if (!CallerInfo || !CallerInfo->isValidState())

        return false;


      Change = Change | clampStateAndIndicateChange(this->getState(),

                                                    CallerInfo->getState());


      return true;

    };


    bool AllCallSitesKnown = true;

    if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))

      return indicatePessimisticFixpoint();


    return Change;

  }


  ChangeStatus manifest(Attributor &A) override {

    SmallVector<Attribute, 8> AttrList;

    LLVMContext &Ctx = getAssociatedFunction()->getContext();


    AttrList.push_back(Attribute::get(Ctx, "uniform-work-group-size",

                                      getAssumed() ? "true" : "false"));

    return A.manifestAttrs(getIRPosition(), AttrList,

                           /* ForceReplace */ true);

  }


  bool isValidState() const override {

    // This state is always valid, even when the state is false.

    return true;

  }


  const std::string getAsStr(Attributor *) const override {

    return "AMDWorkGroupSize[" + std::to_string(getAssumed()) + "]";

  }


  /// See AbstractAttribute::trackStatistics()

  void trackStatistics() const override {}

};


AAUniformWorkGroupSize &

AAUniformWorkGroupSize::createForPosition(const IRPosition &IRP,

                                          Attributor &A) {

  if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)

    return *new (A.Allocator) AAUniformWorkGroupSizeFunction(IRP, A);

  llvm_unreachable(

      "AAUniformWorkGroupSize is only valid for function position");

}


struct AAAMDAttributesFunction : public AAAMDAttributes {

  AAAMDAttributesFunction(const IRPosition &IRP, Attributor &A)

      : AAAMDAttributes(IRP, A) {}


  void initialize(Attributor &A) override {

    Function *F = getAssociatedFunction();


    // If the function requires the implicit arg pointer due to sanitizers,

    // assume it's needed even if explicitly marked as not requiring it.

    const bool NeedsHostcall = funcRequiresHostcallPtr(*F);

    if (NeedsHostcall) {

      removeAssumedBits(IMPLICIT_ARG_PTR);

      removeAssumedBits(HOSTCALL_PTR);

    }


    for (auto Attr : ImplicitAttrs) {

      if (NeedsHostcall &&

          (Attr.first == IMPLICIT_ARG_PTR || Attr.first == HOSTCALL_PTR))

        continue;


      if (F->hasFnAttribute(Attr.second))

        addKnownBits(Attr.first);

    }


    if (F->isDeclaration())

      return;


    // Ignore functions with graphics calling conventions, these are currently

    // not allowed to have kernel arguments.

    if (AMDGPU::isGraphics(F->getCallingConv())) {

      indicatePessimisticFixpoint();

      return;

    }

  }


  ChangeStatus updateImpl(Attributor &A) override {

    Function *F = getAssociatedFunction();

    // The current assumed state used to determine a change.

    auto OrigAssumed = getAssumed();


    // Check for Intrinsics and propagate attributes.

    const AACallEdges *AAEdges = A.getAAFor<AACallEdges>(

        *this, this->getIRPosition(), DepClassTy::REQUIRED);

    if (!AAEdges || !AAEdges->isValidState() ||

        AAEdges->hasNonAsmUnknownCallee())

      return indicatePessimisticFixpoint();


    bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());


    bool NeedsImplicit = false;

    auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());

    bool HasApertureRegs = InfoCache.hasApertureRegs(*F);

    bool SupportsGetDoorbellID = InfoCache.supportsGetDoorbellID(*F);

    unsigned COV = InfoCache.getCodeObjectVersion();


    for (Function *Callee : AAEdges->getOptimisticEdges()) {

      Intrinsic::ID IID = Callee->getIntrinsicID();

      if (IID == Intrinsic::not_intrinsic) {

        const AAAMDAttributes *AAAMD = A.getAAFor<AAAMDAttributes>(

            *this, IRPosition::function(*Callee), DepClassTy::REQUIRED);

        if (!AAAMD || !AAAMD->isValidState())

          return indicatePessimisticFixpoint();

        *this &= *AAAMD;

        continue;

      }


      bool NonKernelOnly = false;

      ImplicitArgumentMask AttrMask =

          intrinsicToAttrMask(IID, NonKernelOnly, NeedsImplicit,

                              HasApertureRegs, SupportsGetDoorbellID, COV);

      if (AttrMask != NOT_IMPLICIT_INPUT) {

        if ((IsNonEntryFunc || !NonKernelOnly))

          removeAssumedBits(AttrMask);

      }

    }


    // Need implicitarg_ptr to acess queue_ptr, private_base, and shared_base.

    if (NeedsImplicit)

      removeAssumedBits(IMPLICIT_ARG_PTR);


    if (isAssumed(QUEUE_PTR) && checkForQueuePtr(A)) {

      // Under V5, we need implicitarg_ptr + offsets to access private_base or

      // shared_base. We do not actually need queue_ptr.

      if (COV >= 5)

        removeAssumedBits(IMPLICIT_ARG_PTR);

      else

        removeAssumedBits(QUEUE_PTR);

    }


    if (funcRetrievesMultigridSyncArg(A, COV)) {

      assert(!isAssumed(IMPLICIT_ARG_PTR) &&

             "multigrid_sync_arg needs implicitarg_ptr");

      removeAssumedBits(MULTIGRID_SYNC_ARG);

    }


    if (funcRetrievesHostcallPtr(A, COV)) {

      assert(!isAssumed(IMPLICIT_ARG_PTR) && "hostcall needs implicitarg_ptr");

      removeAssumedBits(HOSTCALL_PTR);

    }


    if (funcRetrievesHeapPtr(A, COV)) {

      assert(!isAssumed(IMPLICIT_ARG_PTR) && "heap_ptr needs implicitarg_ptr");

      removeAssumedBits(HEAP_PTR);

    }


    if (isAssumed(QUEUE_PTR) && funcRetrievesQueuePtr(A, COV)) {

      assert(!isAssumed(IMPLICIT_ARG_PTR) && "queue_ptr needs implicitarg_ptr");

      removeAssumedBits(QUEUE_PTR);

    }


    if (isAssumed(LDS_KERNEL_ID) && funcRetrievesLDSKernelId(A)) {

      removeAssumedBits(LDS_KERNEL_ID);

    }


    if (isAssumed(DEFAULT_QUEUE) && funcRetrievesDefaultQueue(A, COV))

      removeAssumedBits(DEFAULT_QUEUE);


    if (isAssumed(COMPLETION_ACTION) && funcRetrievesCompletionAction(A, COV))

      removeAssumedBits(COMPLETION_ACTION);


    if (isAssumed(FLAT_SCRATCH_INIT) && needFlatScratchInit(A))

      removeAssumedBits(FLAT_SCRATCH_INIT);


    return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED

                                       : ChangeStatus::UNCHANGED;

  }


  ChangeStatus manifest(Attributor &A) override {

    SmallVector<Attribute, 8> AttrList;

    LLVMContext &Ctx = getAssociatedFunction()->getContext();


    for (auto Attr : ImplicitAttrs) {

      if (isKnown(Attr.first))

        AttrList.push_back(Attribute::get(Ctx, Attr.second));

    }


    return A.manifestAttrs(getIRPosition(), AttrList,

                           /* ForceReplace */ true);

  }


  const std::string getAsStr(Attributor *) const override {

    std::string Str;

    raw_string_ostream OS(Str);

    OS << "AMDInfo[";

    for (auto Attr : ImplicitAttrs)

      if (isAssumed(Attr.first))

        OS << ' ' << Attr.second;

    OS << " ]";

    return OS.str();

  }


  /// See AbstractAttribute::trackStatistics()

  void trackStatistics() const override {}


private:

  bool checkForQueuePtr(Attributor &A) {

    Function *F = getAssociatedFunction();

    bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());


    auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());


    bool NeedsQueuePtr = false;


    auto CheckAddrSpaceCasts = [&](Instruction &I) {

      unsigned SrcAS = static_cast<AddrSpaceCastInst &>(I).getSrcAddressSpace();

      if (castRequiresQueuePtr(SrcAS)) {

        NeedsQueuePtr = true;

        return false;

      }

      return true;

    };


    bool HasApertureRegs = InfoCache.hasApertureRegs(*F);


    // `checkForAllInstructions` is much more cheaper than going through all

    // instructions, try it first.


    // The queue pointer is not needed if aperture regs is present.

    if (!HasApertureRegs) {

      bool UsedAssumedInformation = false;

      A.checkForAllInstructions(CheckAddrSpaceCasts, *this,

                                {Instruction::AddrSpaceCast},

                                UsedAssumedInformation);

    }


    // If we found  that we need the queue pointer, nothing else to do.

    if (NeedsQueuePtr)

      return true;


    if (!IsNonEntryFunc && HasApertureRegs)

      return false;


    for (BasicBlock &BB : *F) {

      for (Instruction &I : BB) {

        for (const Use &U : I.operands()) {

          if (const auto *C = dyn_cast<Constant>(U)) {

            if (InfoCache.needsQueuePtr(C, *F))

              return true;

          }

        }

      }

    }


    return false;

  }


  bool funcRetrievesMultigridSyncArg(Attributor &A, unsigned COV) {

    auto Pos = llvm::AMDGPU::getMultigridSyncArgImplicitArgPosition(COV);

    AA::RangeTy Range(Pos, 8);

    return funcRetrievesImplicitKernelArg(A, Range);

  }


  bool funcRetrievesHostcallPtr(Attributor &A, unsigned COV) {

    auto Pos = llvm::AMDGPU::getHostcallImplicitArgPosition(COV);

    AA::RangeTy Range(Pos, 8);

    return funcRetrievesImplicitKernelArg(A, Range);

  }


  bool funcRetrievesDefaultQueue(Attributor &A, unsigned COV) {

    auto Pos = llvm::AMDGPU::getDefaultQueueImplicitArgPosition(COV);

    AA::RangeTy Range(Pos, 8);

    return funcRetrievesImplicitKernelArg(A, Range);

  }


  bool funcRetrievesCompletionAction(Attributor &A, unsigned COV) {

    auto Pos = llvm::AMDGPU::getCompletionActionImplicitArgPosition(COV);

    AA::RangeTy Range(Pos, 8);

    return funcRetrievesImplicitKernelArg(A, Range);

  }


  bool funcRetrievesHeapPtr(Attributor &A, unsigned COV) {

    if (COV < 5)

      return false;

    AA::RangeTy Range(AMDGPU::ImplicitArg::HEAP_PTR_OFFSET, 8);

    return funcRetrievesImplicitKernelArg(A, Range);

  }


  bool funcRetrievesQueuePtr(Attributor &A, unsigned COV) {

    if (COV < 5)

      return false;

    AA::RangeTy Range(AMDGPU::ImplicitArg::QUEUE_PTR_OFFSET, 8);

    return funcRetrievesImplicitKernelArg(A, Range);

  }


  bool funcRetrievesImplicitKernelArg(Attributor &A, AA::RangeTy Range) {

    // Check if this is a call to the implicitarg_ptr builtin and it

    // is used to retrieve the hostcall pointer. The implicit arg for

    // hostcall is not used only if every use of the implicitarg_ptr

    // is a load that clearly does not retrieve any byte of the

    // hostcall pointer. We check this by tracing all the uses of the

    // initial call to the implicitarg_ptr intrinsic.

    auto DoesNotLeadToKernelArgLoc = [&](Instruction &I) {

      auto &Call = cast<CallBase>(I);

      if (Call.getIntrinsicID() != Intrinsic::amdgcn_implicitarg_ptr)

        return true;


      const auto *PointerInfoAA = A.getAAFor<AAPointerInfo>(

          *this, IRPosition::callsite_returned(Call), DepClassTy::REQUIRED);

      if (!PointerInfoAA || !PointerInfoAA->getState().isValidState())

        return false;


      return PointerInfoAA->forallInterferingAccesses(

          Range, [](const AAPointerInfo::Access &Acc, bool IsExact) {

            return Acc.getRemoteInst()->isDroppable();

          });

    };


    bool UsedAssumedInformation = false;

    return !A.checkForAllCallLikeInstructions(DoesNotLeadToKernelArgLoc, *this,

                                              UsedAssumedInformation);

  }


  bool funcRetrievesLDSKernelId(Attributor &A) {

    auto DoesNotRetrieve = [&](Instruction &I) {

      auto &Call = cast<CallBase>(I);

      return Call.getIntrinsicID() != Intrinsic::amdgcn_lds_kernel_id;

    };

    bool UsedAssumedInformation = false;

    return !A.checkForAllCallLikeInstructions(DoesNotRetrieve, *this,

                                              UsedAssumedInformation);

  }


  // Returns true if FlatScratchInit is needed, i.e., no-flat-scratch-init is

  // not to be set.

  bool needFlatScratchInit(Attributor &A) {

    assert(isAssumed(FLAT_SCRATCH_INIT)); // only called if the bit is still set


    // Check all AddrSpaceCast instructions. FlatScratchInit is needed if

    // there is a cast from PRIVATE_ADDRESS.

    auto AddrSpaceCastNotFromPrivate = [](Instruction &I) {

      return cast<AddrSpaceCastInst>(I).getSrcAddressSpace() !=

             AMDGPUAS::PRIVATE_ADDRESS;

    };


    bool UsedAssumedInformation = false;

    if (!A.checkForAllInstructions(AddrSpaceCastNotFromPrivate, *this,

                                   {Instruction::AddrSpaceCast},

                                   UsedAssumedInformation))

      return true;


    // Check for addrSpaceCast from PRIVATE_ADDRESS in constant expressions

    auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());


    Function *F = getAssociatedFunction();

    for (Instruction &I : instructions(F)) {

      for (const Use &U : I.operands()) {

        if (const auto *C = dyn_cast<Constant>(U)) {

          if (InfoCache.checkConstForAddrSpaceCastFromPrivate(C))

            return true;

        }

      }

    }


    // Finally check callees.


    // This is called on each callee; false means callee shouldn't have

    // no-flat-scratch-init.

    auto CheckForNoFlatScratchInit = [&](Instruction &I) {

      const auto &CB = cast<CallBase>(I);

      const Function *Callee = CB.getCalledFunction();


      // Callee == 0 for inline asm or indirect call with known callees.

      // In the latter case, updateImpl() already checked the callees and we

      // know their FLAT_SCRATCH_INIT bit is set.

      // If function has indirect call with unknown callees, the bit is

      // already removed in updateImpl() and execution won't reach here.

      if (!Callee)

        return true;


      return Callee->getIntrinsicID() !=

             Intrinsic::amdgcn_addrspacecast_nonnull;

    };


    UsedAssumedInformation = false;

    // If any callee is false (i.e. need FlatScratchInit),

    // checkForAllCallLikeInstructions returns false, in which case this

    // function returns true.

    return !A.checkForAllCallLikeInstructions(CheckForNoFlatScratchInit, *this,

                                              UsedAssumedInformation);

  }

};


AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP,

                                                    Attributor &A) {

  if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)

    return *new (A.Allocator) AAAMDAttributesFunction(IRP, A);

  llvm_unreachable("AAAMDAttributes is only valid for function position");

}


/// Base class to derive different size ranges.

struct AAAMDSizeRangeAttribute

    : public StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t> {

  using Base = StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t>;


  StringRef AttrName;


  AAAMDSizeRangeAttribute(const IRPosition &IRP, Attributor &A,

                          StringRef AttrName)

      : Base(IRP, 32), AttrName(AttrName) {}


  /// See AbstractAttribute::trackStatistics()

  void trackStatistics() const override {}


  template <class AttributeImpl> ChangeStatus updateImplImpl(Attributor &A) {

    ChangeStatus Change = ChangeStatus::UNCHANGED;


    auto CheckCallSite = [&](AbstractCallSite CS) {

      Function *Caller = CS.getInstruction()->getFunction();

      LLVM_DEBUG(dbgs() << '[' << getName() << "] Call " << Caller->getName()

                        << "->" << getAssociatedFunction()->getName() << '\n');


      const auto *CallerInfo = A.getAAFor<AttributeImpl>(

          *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);

      if (!CallerInfo || !CallerInfo->isValidState())

        return false;


      Change |=

          clampStateAndIndicateChange(this->getState(), CallerInfo->getState());


      return true;

    };


    bool AllCallSitesKnown = true;

    if (!A.checkForAllCallSites(CheckCallSite, *this,

                                /*RequireAllCallSites=*/true,

                                AllCallSitesKnown))

      return indicatePessimisticFixpoint();


    return Change;

  }


  /// Clamp the assumed range to the default value ([Min, Max]) and emit the

  /// attribute if it is not same as default.

  ChangeStatus

  emitAttributeIfNotDefaultAfterClamp(Attributor &A,

                                      std::pair<unsigned, unsigned> Default) {

    auto [Min, Max] = Default;

    unsigned Lower = getAssumed().getLower().getZExtValue();

    unsigned Upper = getAssumed().getUpper().getZExtValue();


    // Clamp the range to the default value.

    if (Lower < Min)

      Lower = Min;

    if (Upper > Max + 1)

      Upper = Max + 1;


    // No manifest if the value is invalid or same as default after clamp.

    if ((Lower == Min && Upper == Max + 1) || (Upper < Lower))

      return ChangeStatus::UNCHANGED;


    Function *F = getAssociatedFunction();

    LLVMContext &Ctx = F->getContext();

    SmallString<10> Buffer;

    raw_svector_ostream OS(Buffer);

    OS << Lower << ',' << Upper - 1;

    return A.manifestAttrs(getIRPosition(),

                           {Attribute::get(Ctx, AttrName, OS.str())},

                           /*ForceReplace=*/true);

  }


  const std::string getAsStr(Attributor *) const override {

    std::string Str;

    raw_string_ostream OS(Str);

    OS << getName() << '[';

    OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1;

    OS << ']';

    return OS.str();

  }

};


/// Propagate amdgpu-flat-work-group-size attribute.

struct AAAMDFlatWorkGroupSize : public AAAMDSizeRangeAttribute {

  AAAMDFlatWorkGroupSize(const IRPosition &IRP, Attributor &A)

      : AAAMDSizeRangeAttribute(IRP, A, "amdgpu-flat-work-group-size") {}


  void initialize(Attributor &A) override {

    Function *F = getAssociatedFunction();

    auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());


    bool HasAttr = false;

    auto Range = InfoCache.getDefaultFlatWorkGroupSize(*F);

    auto MaxRange = InfoCache.getMaximumFlatWorkGroupRange(*F);


    if (auto Attr = InfoCache.getFlatWorkGroupSizeAttr(*F)) {

      // We only consider an attribute that is not max range because the front

      // end always emits the attribute, unfortunately, and sometimes it emits

      // the max range.

      if (*Attr != MaxRange) {

        Range = *Attr;

        HasAttr = true;

      }

    }


    // We don't want to directly clamp the state if it's the max range because

    // that is basically the worst state.

    if (Range == MaxRange)

      return;


    auto [Min, Max] = Range;

    ConstantRange CR(APInt(32, Min), APInt(32, Max + 1));

    IntegerRangeState IRS(CR);

    clampStateAndIndicateChange(this->getState(), IRS);


    if (HasAttr || AMDGPU::isEntryFunctionCC(F->getCallingConv()))

      indicateOptimisticFixpoint();

  }


  ChangeStatus updateImpl(Attributor &A) override {

    return updateImplImpl<AAAMDFlatWorkGroupSize>(A);

  }


  /// Create an abstract attribute view for the position \p IRP.

  static AAAMDFlatWorkGroupSize &createForPosition(const IRPosition &IRP,

                                                   Attributor &A);


  ChangeStatus manifest(Attributor &A) override {

    Function *F = getAssociatedFunction();

    auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());

    return emitAttributeIfNotDefaultAfterClamp(

        A, InfoCache.getMaximumFlatWorkGroupRange(*F));

  }


  /// See AbstractAttribute::getName()

  const std::string getName() const override {

    return "AAAMDFlatWorkGroupSize";

  }


  /// See AbstractAttribute::getIdAddr()

  const char *getIdAddr() const override { return &ID; }


  /// This function should return true if the type of the \p AA is

  /// AAAMDFlatWorkGroupSize

  static bool classof(const AbstractAttribute *AA) {

    return (AA->getIdAddr() == &ID);

  }


  /// Unique ID (due to the unique address)

  static const char ID;

};


const char AAAMDFlatWorkGroupSize::ID = 0;


AAAMDFlatWorkGroupSize &

AAAMDFlatWorkGroupSize::createForPosition(const IRPosition &IRP,

                                          Attributor &A) {

  if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)

    return *new (A.Allocator) AAAMDFlatWorkGroupSize(IRP, A);

  llvm_unreachable(

      "AAAMDFlatWorkGroupSize is only valid for function position");

}


struct TupleDecIntegerRangeState : public AbstractState {

  DecIntegerState<uint32_t> X, Y, Z;


  bool isValidState() const override {

    return X.isValidState() && Y.isValidState() && Z.isValidState();

  }


  bool isAtFixpoint() const override {

    return X.isAtFixpoint() && Y.isAtFixpoint() && Z.isAtFixpoint();

  }


  ChangeStatus indicateOptimisticFixpoint() override {

    return X.indicateOptimisticFixpoint() | Y.indicateOptimisticFixpoint() |

           Z.indicateOptimisticFixpoint();

  }


  ChangeStatus indicatePessimisticFixpoint() override {

    return X.indicatePessimisticFixpoint() | Y.indicatePessimisticFixpoint() |

           Z.indicatePessimisticFixpoint();

  }


  TupleDecIntegerRangeState operator^=(const TupleDecIntegerRangeState &Other) {

    X ^= Other.X;

    Y ^= Other.Y;

    Z ^= Other.Z;

    return *this;

  }


  bool operator==(const TupleDecIntegerRangeState &Other) const {

    return X == Other.X && Y == Other.Y && Z == Other.Z;

  }


  TupleDecIntegerRangeState &getAssumed() { return *this; }

  const TupleDecIntegerRangeState &getAssumed() const { return *this; }

};


using AAAMDMaxNumWorkgroupsState =

    StateWrapper<TupleDecIntegerRangeState, AbstractAttribute, uint32_t>;


/// Propagate amdgpu-max-num-workgroups attribute.

struct AAAMDMaxNumWorkgroups

    : public StateWrapper<TupleDecIntegerRangeState, AbstractAttribute> {

  using Base = StateWrapper<TupleDecIntegerRangeState, AbstractAttribute>;


  AAAMDMaxNumWorkgroups(const IRPosition &IRP, Attributor &A) : Base(IRP) {}


  void initialize(Attributor &A) override {

    Function *F = getAssociatedFunction();

    auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());


    SmallVector<unsigned> MaxNumWorkgroups = InfoCache.getMaxNumWorkGroups(*F);


    X.takeKnownMinimum(MaxNumWorkgroups[0]);

    Y.takeKnownMinimum(MaxNumWorkgroups[1]);

    Z.takeKnownMinimum(MaxNumWorkgroups[2]);


    if (AMDGPU::isEntryFunctionCC(F->getCallingConv()))

      indicatePessimisticFixpoint();

  }


  ChangeStatus updateImpl(Attributor &A) override {

    ChangeStatus Change = ChangeStatus::UNCHANGED;


    auto CheckCallSite = [&](AbstractCallSite CS) {

      Function *Caller = CS.getInstruction()->getFunction();

      LLVM_DEBUG(dbgs() << "[AAAMDMaxNumWorkgroups] Call " << Caller->getName()

                        << "->" << getAssociatedFunction()->getName() << '\n');


      const auto *CallerInfo = A.getAAFor<AAAMDMaxNumWorkgroups>(

          *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);

      if (!CallerInfo || !CallerInfo->isValidState())

        return false;


      Change |=

          clampStateAndIndicateChange(this->getState(), CallerInfo->getState());

      return true;

    };


    bool AllCallSitesKnown = true;

    if (!A.checkForAllCallSites(CheckCallSite, *this,

                                /*RequireAllCallSites=*/true,

                                AllCallSitesKnown))

      return indicatePessimisticFixpoint();


    return Change;

  }


  /// Create an abstract attribute view for the position \p IRP.

  static AAAMDMaxNumWorkgroups &createForPosition(const IRPosition &IRP,

                                                  Attributor &A);


  ChangeStatus manifest(Attributor &A) override {

    Function *F = getAssociatedFunction();

    LLVMContext &Ctx = F->getContext();

    SmallString<32> Buffer;

    raw_svector_ostream OS(Buffer);

    OS << X.getAssumed() << ',' << Y.getAssumed() << ',' << Z.getAssumed();


    // TODO: Should annotate loads of the group size for this to do anything

    // useful.

    return A.manifestAttrs(

        getIRPosition(),

        {Attribute::get(Ctx, "amdgpu-max-num-workgroups", OS.str())},

        /* ForceReplace= */ true);

  }


  const std::string getName() const override { return "AAAMDMaxNumWorkgroups"; }


  const std::string getAsStr(Attributor *) const override {

    std::string Buffer = "AAAMDMaxNumWorkgroupsState[";

    raw_string_ostream OS(Buffer);

    OS << X.getAssumed() << ',' << Y.getAssumed() << ',' << Z.getAssumed()

       << ']';

    return OS.str();

  }


  const char *getIdAddr() const override { return &ID; }


  /// This function should return true if the type of the \p AA is

  /// AAAMDMaxNumWorkgroups

  static bool classof(const AbstractAttribute *AA) {

    return (AA->getIdAddr() == &ID);

  }


  void trackStatistics() const override {}


  /// Unique ID (due to the unique address)

  static const char ID;

};


const char AAAMDMaxNumWorkgroups::ID = 0;


AAAMDMaxNumWorkgroups &

AAAMDMaxNumWorkgroups::createForPosition(const IRPosition &IRP, Attributor &A) {

  if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)

    return *new (A.Allocator) AAAMDMaxNumWorkgroups(IRP, A);

  llvm_unreachable("AAAMDMaxNumWorkgroups is only valid for function position");

}


/// Propagate amdgpu-waves-per-eu attribute.

struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute {

  AAAMDWavesPerEU(const IRPosition &IRP, Attributor &A)

      : AAAMDSizeRangeAttribute(IRP, A, "amdgpu-waves-per-eu") {}


  void initialize(Attributor &A) override {

    Function *F = getAssociatedFunction();

    auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());


    auto TakeRange = [&](std::pair<unsigned, unsigned> R) {

      auto [Min, Max] = R;

      ConstantRange Range(APInt(32, Min), APInt(32, Max + 1));

      IntegerRangeState RangeState(Range);

      clampStateAndIndicateChange(this->getState(), RangeState);

      indicateOptimisticFixpoint();

    };


    std::pair<unsigned, unsigned> MaxWavesPerEURange{

        1U, InfoCache.getMaxWavesPerEU(*F)};


    // If the attribute exists, we will honor it if it is not the default.

    if (auto Attr = InfoCache.getWavesPerEUAttr(*F)) {

      if (*Attr != MaxWavesPerEURange) {

        TakeRange(*Attr);

        return;

      }

    }


    // Unlike AAAMDFlatWorkGroupSize, it's getting trickier here. Since the

    // calculation of waves per EU involves flat work group size, we can't

    // simply use an assumed flat work group size as a start point, because the

    // update of flat work group size is in an inverse direction of waves per

    // EU. However, we can still do something if it is an entry function. Since

    // an entry function is a terminal node, and flat work group size either

    // from attribute or default will be used anyway, we can take that value and

    // calculate the waves per EU based on it. This result can't be updated by

    // no means, but that could still allow us to propagate it.

    if (AMDGPU::isEntryFunctionCC(F->getCallingConv())) {

      std::pair<unsigned, unsigned> FlatWorkGroupSize;

      if (auto Attr = InfoCache.getFlatWorkGroupSizeAttr(*F))

        FlatWorkGroupSize = *Attr;

      else

        FlatWorkGroupSize = InfoCache.getDefaultFlatWorkGroupSize(*F);

      TakeRange(InfoCache.getEffectiveWavesPerEU(*F, MaxWavesPerEURange,

                                                 FlatWorkGroupSize));

    }

  }


  ChangeStatus updateImpl(Attributor &A) override {

    auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());

    ChangeStatus Change = ChangeStatus::UNCHANGED;


    auto CheckCallSite = [&](AbstractCallSite CS) {

      Function *Caller = CS.getInstruction()->getFunction();

      Function *Func = getAssociatedFunction();

      LLVM_DEBUG(dbgs() << '[' << getName() << "] Call " << Caller->getName()

                        << "->" << Func->getName() << '\n');


      const auto *CallerInfo = A.getAAFor<AAAMDWavesPerEU>(

          *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);

      const auto *AssumedGroupSize = A.getAAFor<AAAMDFlatWorkGroupSize>(

          *this, IRPosition::function(*Func), DepClassTy::REQUIRED);

      if (!CallerInfo || !AssumedGroupSize || !CallerInfo->isValidState() ||

          !AssumedGroupSize->isValidState())

        return false;


      unsigned Min, Max;

      std::tie(Min, Max) = InfoCache.getEffectiveWavesPerEU(

          *Caller,

          {CallerInfo->getAssumed().getLower().getZExtValue(),

           CallerInfo->getAssumed().getUpper().getZExtValue() - 1},

          {AssumedGroupSize->getAssumed().getLower().getZExtValue(),

           AssumedGroupSize->getAssumed().getUpper().getZExtValue() - 1});

      ConstantRange CallerRange(APInt(32, Min), APInt(32, Max + 1));

      IntegerRangeState CallerRangeState(CallerRange);

      Change |= clampStateAndIndicateChange(this->getState(), CallerRangeState);


      return true;

    };


    bool AllCallSitesKnown = true;

    if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))

      return indicatePessimisticFixpoint();


    return Change;

  }


  /// Create an abstract attribute view for the position \p IRP.

  static AAAMDWavesPerEU &createForPosition(const IRPosition &IRP,

                                            Attributor &A);


  ChangeStatus manifest(Attributor &A) override {

    Function *F = getAssociatedFunction();

    auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());

    return emitAttributeIfNotDefaultAfterClamp(

        A, {1U, InfoCache.getMaxWavesPerEU(*F)});

  }


  /// See AbstractAttribute::getName()

  const std::string getName() const override { return "AAAMDWavesPerEU"; }


  /// See AbstractAttribute::getIdAddr()

  const char *getIdAddr() const override { return &ID; }


  /// This function should return true if the type of the \p AA is

  /// AAAMDWavesPerEU

  static bool classof(const AbstractAttribute *AA) {

    return (AA->getIdAddr() == &ID);

  }


  /// Unique ID (due to the unique address)

  static const char ID;

};


const char AAAMDWavesPerEU::ID = 0;


AAAMDWavesPerEU &AAAMDWavesPerEU::createForPosition(const IRPosition &IRP,

                                                    Attributor &A) {

  if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)

    return *new (A.Allocator) AAAMDWavesPerEU(IRP, A);

  llvm_unreachable("AAAMDWavesPerEU is only valid for function position");

}


static bool inlineAsmUsesAGPRs(const InlineAsm *IA) {

  for (const auto &CI : IA->ParseConstraints()) {

    for (StringRef Code : CI.Codes) {

      Code.consume_front("{");

      if (Code.starts_with("a"))

        return true;

    }

  }


  return false;

}


struct AAAMDGPUNoAGPR

    : public IRAttribute<Attribute::NoUnwind,

                         StateWrapper<BooleanState, AbstractAttribute>,

                         AAAMDGPUNoAGPR> {

  AAAMDGPUNoAGPR(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}


  static AAAMDGPUNoAGPR &createForPosition(const IRPosition &IRP,

                                           Attributor &A) {

    if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)

      return *new (A.Allocator) AAAMDGPUNoAGPR(IRP, A);

    llvm_unreachable("AAAMDGPUNoAGPR is only valid for function position");

  }


  void initialize(Attributor &A) override {

    Function *F = getAssociatedFunction();

    if (F->hasFnAttribute("amdgpu-no-agpr"))

      indicateOptimisticFixpoint();

  }


  const std::string getAsStr(Attributor *A) const override {

    return getAssumed() ? "amdgpu-no-agpr" : "amdgpu-maybe-agpr";

  }


  void trackStatistics() const override {}


  ChangeStatus updateImpl(Attributor &A) override {

    // TODO: Use AACallEdges, but then we need a way to inspect asm edges.


    auto CheckForNoAGPRs = [&](Instruction &I) {

      const auto &CB = cast<CallBase>(I);

      const Value *CalleeOp = CB.getCalledOperand();

      const Function *Callee = dyn_cast<Function>(CalleeOp);

      if (!Callee) {

        if (const InlineAsm *IA = dyn_cast<InlineAsm>(CalleeOp))

          return !inlineAsmUsesAGPRs(IA);

        return false;

      }


      // Some intrinsics may use AGPRs, but if we have a choice, we are not

      // required to use AGPRs.

      if (Callee->isIntrinsic())

        return true;


      // TODO: Handle callsite attributes

      const auto *CalleeInfo = A.getAAFor<AAAMDGPUNoAGPR>(

          *this, IRPosition::function(*Callee), DepClassTy::REQUIRED);

      return CalleeInfo && CalleeInfo->isValidState() &&

             CalleeInfo->getAssumed();

    };


    bool UsedAssumedInformation = false;

    if (!A.checkForAllCallLikeInstructions(CheckForNoAGPRs, *this,

                                           UsedAssumedInformation))

      return indicatePessimisticFixpoint();

    return ChangeStatus::UNCHANGED;

  }


  ChangeStatus manifest(Attributor &A) override {

    if (!getAssumed())

      return ChangeStatus::UNCHANGED;

    LLVMContext &Ctx = getAssociatedFunction()->getContext();

    return A.manifestAttrs(getIRPosition(),

                           {Attribute::get(Ctx, "amdgpu-no-agpr")});

  }


  const std::string getName() const override { return "AAAMDGPUNoAGPR"; }

  const char *getIdAddr() const override { return &ID; }


  /// This function should return true if the type of the \p AA is

  /// AAAMDGPUNoAGPRs

  static bool classof(const AbstractAttribute *AA) {

    return (AA->getIdAddr() == &ID);

  }


  static const char ID;

};


const char AAAMDGPUNoAGPR::ID = 0;


static void addPreloadKernArgHint(Function &F, TargetMachine &TM) {

  const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);

  for (unsigned I = 0;

       I < F.arg_size() &&

       I < std::min(KernargPreloadCount.getValue(), ST.getMaxNumUserSGPRs());

       ++I) {

    Argument &Arg = *F.getArg(I);

    // Check for incompatible attributes.

    if (Arg.hasByRefAttr() || Arg.hasNestAttr())

      break;


    Arg.addAttr(Attribute::InReg);

  }

}


static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,

                    AMDGPUAttributorOptions Options) {

  SetVector<Function *> Functions;

  for (Function &F : M) {

    if (!F.isIntrinsic())

      Functions.insert(&F);

  }


  CallGraphUpdater CGUpdater;

  BumpPtrAllocator Allocator;

  AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, TM);

  DenseSet<const char *> Allowed(

      {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID,

       &AAPotentialValues::ID, &AAAMDFlatWorkGroupSize::ID,

       &AAAMDMaxNumWorkgroups::ID, &AAAMDWavesPerEU::ID, &AAAMDGPUNoAGPR::ID,

       &AACallEdges::ID, &AAPointerInfo::ID, &AAPotentialConstantValues::ID,

       &AAUnderlyingObjects::ID, &AAAddressSpace::ID, &AAIndirectCallInfo::ID,

       &AAInstanceInfo::ID});


  AttributorConfig AC(CGUpdater);

  AC.IsClosedWorldModule = Options.IsClosedWorld;

  AC.Allowed = &Allowed;

  AC.IsModulePass = true;

  AC.DefaultInitializeLiveInternals = false;

  AC.IndirectCalleeSpecializationCallback =

      [](Attributor &A, const AbstractAttribute &AA, CallBase &CB,

         Function &Callee, unsigned NumAssumedCallees) {

        return !AMDGPU::isEntryFunctionCC(Callee.getCallingConv()) &&

               (NumAssumedCallees <= IndirectCallSpecializationThreshold);

      };

  AC.IPOAmendableCB = [](const Function &F) {

    return F.getCallingConv() == CallingConv::AMDGPU_KERNEL;

  };


  Attributor A(Functions, InfoCache, AC);


  LLVM_DEBUG(dbgs() << "[AMDGPUAttributor] Module " << M.getName() << " is "

                    << (AC.IsClosedWorldModule ? "" : "not ")

                    << "assumed to be a closed world.\n");


  for (auto *F : Functions) {

    A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(*F));

    A.getOrCreateAAFor<AAUniformWorkGroupSize>(IRPosition::function(*F));

    A.getOrCreateAAFor<AAAMDMaxNumWorkgroups>(IRPosition::function(*F));

    A.getOrCreateAAFor<AAAMDGPUNoAGPR>(IRPosition::function(*F));

    CallingConv::ID CC = F->getCallingConv();

    if (!AMDGPU::isEntryFunctionCC(CC)) {

      A.getOrCreateAAFor<AAAMDFlatWorkGroupSize>(IRPosition::function(*F));

      A.getOrCreateAAFor<AAAMDWavesPerEU>(IRPosition::function(*F));

    } else if (CC == CallingConv::AMDGPU_KERNEL) {

      addPreloadKernArgHint(*F, TM);

    }


    for (auto &I : instructions(F)) {

      if (auto *LI = dyn_cast<LoadInst>(&I)) {

        A.getOrCreateAAFor<AAAddressSpace>(

            IRPosition::value(*LI->getPointerOperand()));

      } else if (auto *SI = dyn_cast<StoreInst>(&I)) {

        A.getOrCreateAAFor<AAAddressSpace>(

            IRPosition::value(*SI->getPointerOperand()));

      } else if (auto *RMW = dyn_cast<AtomicRMWInst>(&I)) {

        A.getOrCreateAAFor<AAAddressSpace>(

            IRPosition::value(*RMW->getPointerOperand()));

      } else if (auto *CmpX = dyn_cast<AtomicCmpXchgInst>(&I)) {

        A.getOrCreateAAFor<AAAddressSpace>(

            IRPosition::value(*CmpX->getPointerOperand()));

      }

    }

  }


  ChangeStatus Change = A.run();

  return Change == ChangeStatus::CHANGED;

}


class AMDGPUAttributorLegacy : public ModulePass {

public:

  AMDGPUAttributorLegacy() : ModulePass(ID) {}


  /// doInitialization - Virtual method overridden by subclasses to do

  /// any necessary initialization before any pass is run.

  bool doInitialization(Module &) override {

    auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();

    if (!TPC)

      report_fatal_error("TargetMachine is required");


    TM = &TPC->getTM<TargetMachine>();

    return false;

  }


  bool runOnModule(Module &M) override {

    AnalysisGetter AG(this);

    return runImpl(M, AG, *TM, /*Options=*/{});

  }


  void getAnalysisUsage(AnalysisUsage &AU) const override {

    AU.addRequired<CycleInfoWrapperPass>();

  }


  StringRef getPassName() const override { return "AMDGPU Attributor"; }

  TargetMachine *TM;

  static char ID;

};

} // namespace


PreservedAnalyses llvm::AMDGPUAttributorPass::run(Module &M,

                                                  ModuleAnalysisManager &AM) {


  FunctionAnalysisManager &FAM =

      AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();

  AnalysisGetter AG(FAM);


  // TODO: Probably preserves CFG

  return runImpl(M, AG, TM, Options) ? PreservedAnalyses::none()

                                     : PreservedAnalyses::all();

}


char AMDGPUAttributorLegacy::ID = 0;


Pass *llvm::createAMDGPUAttributorLegacyPass() {

  return new AMDGPUAttributorLegacy();

}

INITIALIZE_PASS_BEGIN(AMDGPUAttributorLegacy, DEBUG_TYPE, "AMDGPU Attributor",

                      false, false)

INITIALIZE_PASS_DEPENDENCY(CycleInfoWrapperPass);

INITIALIZE_PASS_END(AMDGPUAttributorLegacy, DEBUG_TYPE, "AMDGPU Attributor",

                    false, false)

KernargPreloadCount
static cl::opt< unsigned > KernargPreloadCount("amdgpu-kernarg-preload-count", cl::desc("How many kernel arguments to preload onto SGPRs"), cl::init(0))

isDSAddress
static bool isDSAddress(const Constant *C)
Definition: AMDGPUAttributor.cpp:127

ImplicitAttrs
static constexpr std::pair< ImplicitArgumentMask, StringLiteral > ImplicitAttrs[]
Definition: AMDGPUAttributor.cpp:58

IndirectCallSpecializationThreshold
static cl::opt< unsigned > IndirectCallSpecializationThreshold("amdgpu-indirect-call-specialization-threshold", cl::desc("A threshold controls whether an indirect call will be specialized"), cl::init(3))

intrinsicToAttrMask
static ImplicitArgumentMask intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit, bool HasApertureRegs, bool SupportsGetDoorBellID, unsigned CodeObjectVersion)
Definition: AMDGPUAttributor.cpp:68

ImplicitArgumentMask
ImplicitArgumentMask
Definition: AMDGPUAttributor.cpp:50

NOT_IMPLICIT_INPUT
@ NOT_IMPLICIT_INPUT
Definition: AMDGPUAttributor.cpp:51

ALL_ARGUMENT_MASK
@ ALL_ARGUMENT_MASK
Definition: AMDGPUAttributor.cpp:53

funcRequiresHostcallPtr
static bool funcRequiresHostcallPtr(const Function &F)
Returns true if the function requires the implicit argument be passed regardless of the function cont...
Definition: AMDGPUAttributor.cpp:137

ImplicitArgumentPositions
ImplicitArgumentPositions
Definition: AMDGPUAttributor.cpp:43

LAST_ARG_POS
@ LAST_ARG_POS
Definition: AMDGPUAttributor.cpp:45

castRequiresQueuePtr
static bool castRequiresQueuePtr(unsigned SrcAS)
Definition: AMDGPUAttributor.cpp:123

AMDGPUBaseInfo.h

AMDGPU.h

instructions
Expand Atomic instructions
Definition: AtomicExpandPass.cpp:172

Attributor.h

A
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

CycleAnalysis.h
This file declares an analysis pass that computes CycleInfo for LLVM IR, specialized from GenericCycl...

Access
DXIL Resource Access
Definition: DXILResourceAccess.cpp:195

LLVM_DEBUG
#define LLVM_DEBUG(...)
Definition: Debug.h:106

Default
@ Default
Definition: DwarfDebug.cpp:87

Other
std::optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1315

X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")

runImpl
static bool runImpl(Function &F, const TargetLowering &TLI)
Definition: ExpandLargeDivRem.cpp:79

GCNSubtarget.h
AMD GCN specific subclass of TargetSubtarget.

DEBUG_TYPE
#define DEBUG_TYPE
Definition: GenericCycleImpl.h:31

Options
static LVOptions Options
Definition: LVOptions.cpp:25

F
#define F(x, y, z)
Definition: MD5.cpp:55

I
#define I(x, y, z)
Definition: MD5.cpp:58

Range
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))

Y
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")

FAM
FunctionAnalysisManager FAM
Definition: PassBuilderBindings.cpp:61

INITIALIZE_PASS_DEPENDENCY
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55

INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57

INITIALIZE_PASS_BEGIN
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52

getName
static StringRef getName(Value *V)
Definition: ProvenanceAnalysisEvaluator.cpp:20

CC
auto CC
Definition: RISCVRedundantCopyElimination.cpp:79

Allocator
Basic Register Allocator
Definition: RegAllocBasic.cpp:146

assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

OS
raw_pwrite_stream & OS
Definition: SampleProfWriter.cpp:51

initialize
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
Definition: TargetLibraryInfo.cpp:917

TargetPassConfig.h
Target-Independent Code Generator Pass Configuration Options pass.

llvm::AMDGPUAttributorPass::run
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
Definition: AMDGPUAttributor.cpp:1436

llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:78

llvm::AbstractCallSite
AbstractCallSite.
Definition: AbstractCallSite.h:50

llvm::AddrSpaceCastInst
This class represents a conversion between pointers from one address space to another.
Definition: Instructions.h:4926

llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253

llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:410

llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47

llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75

llvm::Argument
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31

llvm::Argument::hasByRefAttr
bool hasByRefAttr() const
Return true if this argument has the byref attribute.
Definition: Function.cpp:149

llvm::Argument::addAttr
void addAttr(Attribute::AttrKind Kind)
Definition: Function.cpp:331

llvm::Argument::hasNestAttr
bool hasNestAttr() const
Return true if this argument has the nest attribute.
Definition: Function.cpp:278

llvm::AttributeImpl
Definition: AttributeImpl.h:42

llvm::Attribute::get
static Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Definition: Attributes.cpp:95

llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:61

llvm::BumpPtrAllocatorImpl
Allocate memory in an ever growing pool, as if by bump-pointer.
Definition: Allocator.h:66

llvm::CallBase
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1112

llvm::CallGraphUpdater
Wrapper to unify "old style" CallGraph and "new style" LazyCallGraph.
Definition: CallGraphUpdater.h:29

llvm::ConstantExpr
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:1108

llvm::ConstantRange
This class represents a range of values.
Definition: ConstantRange.h:47

llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:42

llvm::CycleInfoWrapperPass
Legacy analysis pass which computes a CycleInfo.
Definition: CycleAnalysis.h:25

llvm::DenseMap
Definition: DenseMap.h:727

llvm::DenseSet
Implements a dense probed hash-table based set.
Definition: DenseSet.h:278

llvm::Function
Definition: Function.h:63

llvm::Function::getCallingConv
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:277

llvm::GCNSubtarget
Definition: GCNSubtarget.h:34

llvm::GlobalValue
Definition: GlobalValue.h:48

llvm::GlobalValue::getAddressSpace
unsigned getAddressSpace() const
Definition: GlobalValue.h:206

llvm::InlineAsm
Definition: InlineAsm.h:34

llvm::InnerAnalysisManagerProxy
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:567

llvm::Instruction
Definition: Instruction.h:68

llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67

llvm::ModulePass
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:251

llvm::ModulePass::runOnModule
virtual bool runOnModule(Module &M)=0
runOnModule - Virtual method overriden by subclasses to process the module being operated on.

llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65

llvm::PassRegistry
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:37

llvm::Pass
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:94

llvm::Pass::getAnalysisUsage
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:98

llvm::Pass::doInitialization
virtual bool doInitialization(Module &)
doInitialization - Virtual method overridden by subclasses to do any necessary initialization before ...
Definition: Pass.h:119

llvm::Pass::getPassName
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81

llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111

llvm::PreservedAnalyses::none
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: Analysis.h:114

llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117

llvm::SetVector
A vector that has set insertion semantics.
Definition: SetVector.h:57

llvm::SetVector::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162

llvm::SmallPtrSetImpl
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:363

llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:384

llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519

llvm::SmallString
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26

llvm::SmallVectorTemplateBase::push_back
void push_back(const T &Elt)
Definition: SmallVector.h:413

llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196

llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51

llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77

llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43

llvm::User::isDroppable
bool isDroppable() const
A droppable user is a user for which uses can be dropped without affecting correctness and should be ...
Definition: User.cpp:115

llvm::Value
LLVM Value Representation.
Definition: Value.h:74

llvm::cl::opt
Definition: CommandLine.h:1423

llvm::raw_string_ostream
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:661

llvm::raw_svector_ostream
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:691

uint8_t

unsigned

llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143

TargetMachine.h

false
Definition: StackSlotColoring.cpp:193

llvm::AArch64PACKey::IA
@ IA
Definition: AArch64BaseInfo.h:875

llvm::AMDGPUAS::REGION_ADDRESS
@ REGION_ADDRESS
Address space for region memory. (GDS)
Definition: AMDGPUAddrSpace.h:32

llvm::AMDGPUAS::LOCAL_ADDRESS
@ LOCAL_ADDRESS
Address space for local memory.
Definition: AMDGPUAddrSpace.h:35

llvm::AMDGPUAS::PRIVATE_ADDRESS
@ PRIVATE_ADDRESS
Address space for private memory.
Definition: AMDGPUAddrSpace.h:36

llvm::AMDGPU::ImplicitArg::HEAP_PTR_OFFSET
@ HEAP_PTR_OFFSET
Definition: SIDefines.h:1039

llvm::AMDGPU::ImplicitArg::QUEUE_PTR_OFFSET
@ QUEUE_PTR_OFFSET
Definition: SIDefines.h:1046

llvm::AMDGPU::IsaInfo::getMaxWavesPerEU
unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:1002

llvm::AMDGPU::VOPD::X
@ X
Definition: AMDGPUBaseInfo.h:658

llvm::AMDGPU::VOPD::Y
@ Y
Definition: AMDGPUBaseInfo.h:658

llvm::AMDGPU::isEntryFunctionCC
bool isEntryFunctionCC(CallingConv::ID CC)
Definition: AMDGPUBaseInfo.cpp:2066

llvm::AMDGPU::getAMDHSACodeObjectVersion
unsigned getAMDHSACodeObjectVersion(const Module &M)
Definition: AMDGPUBaseInfo.cpp:172

llvm::AMDGPU::getDefaultQueueImplicitArgPosition
unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion)
Definition: AMDGPUBaseInfo.cpp:240

llvm::AMDGPU::getHostcallImplicitArgPosition
unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion)
Definition: AMDGPUBaseInfo.cpp:229

llvm::AMDGPU::AMDHSA_COV4
@ AMDHSA_COV4
Definition: AMDGPUBaseInfo.h:56

llvm::AMDGPU::AMDHSA_COV5
@ AMDHSA_COV5
Definition: AMDGPUBaseInfo.h:56

llvm::AMDGPU::getCompletionActionImplicitArgPosition
unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion)
Definition: AMDGPUBaseInfo.cpp:251

llvm::AMDGPU::getIntegerPairAttribute
std::pair< unsigned, unsigned > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< unsigned, unsigned > Default, bool OnlyFirstRequired)
Definition: AMDGPUBaseInfo.cpp:1332

llvm::AMDGPU::getMultigridSyncArgImplicitArgPosition
unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion)
Definition: AMDGPUBaseInfo.cpp:215

llvm::AMDGPU::isGraphics
bool isGraphics(CallingConv::ID cc)
Definition: AMDGPUBaseInfo.cpp:2058

llvm::ARCCC::Z
@ Z
Definition: ARCInfo.h:41

llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73

llvm::ARM::ProfileKind::M
@ M

llvm::BitmaskEnumDetail::operator^=
E & operator^=(E &LHS, E RHS)
Definition: BitmaskEnum.h:195

llvm::CSKYAttrs::NONE
@ NONE
Definition: CSKYAttributes.h:76

llvm::CallingConv::AMDGPU_KERNEL
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
Definition: CallingConv.h:200

llvm::CallingConv::C
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34

llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24

llvm::Intrinsic::not_intrinsic
@ not_intrinsic
Definition: Intrinsics.h:44

llvm::MCID::Call
@ Call
Definition: MCInstrDesc.h:156

llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:372

llvm::SIEncodingFamily::SI
@ SI
Definition: SIDefines.h:36

llvm::SystemZISD::TM
@ TM
Definition: SystemZISelLowering.h:66

llvm::WinEH::EncodingType::CE
@ CE
Windows NT (Windows on ARM)

llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443

llvm::ms_demangle::QualifierMangleMode::Result
@ Result

llvm::orc::Allowed
@ Allowed
Definition: LoadLinkableFile.h:32

llvm::pdb::PDB_SymType::Caller
@ Caller

llvm::pdb::PDB_SymType::Callee
@ Callee

llvm::pdb::DbgHeaderType::Max
@ Max

llvm::rdf::Func
NodeAddr< FuncNode * > Func
Definition: RDFGraph.h:393

llvm::rdf::Code
NodeAddr< CodeNode * > Code
Definition: RDFGraph.h:388

llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18

llvm::initializeCycleInfoWrapperPassPass
void initializeCycleInfoWrapperPassPass(PassRegistry &)

llvm::NONE
@ NONE
Definition: Attributor.h:6484

llvm::operator==
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
Definition: AddressRanges.h:153

llvm::CGSCC
@ CGSCC
Definition: Attributor.h:6486

llvm::HexPrintStyle::Upper
@ Upper

llvm::HexPrintStyle::Lower
@ Lower

llvm::createAMDGPUAttributorLegacyPass
Pass * createAMDGPUAttributorLegacyPass()
Definition: AMDGPUAttributor.cpp:1450

llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163

llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167

llvm::clampStateAndIndicateChange
ChangeStatus clampStateAndIndicateChange(StateType &S, const StateType &R)
Helper function to clamp a state S of type StateType with the information in R and indicate/return if...
Definition: Attributor.h:3473

llvm::ChangeStatus
ChangeStatus
{
Definition: Attributor.h:489

llvm::ChangeStatus::UNCHANGED
@ UNCHANGED

llvm::DepClassTy::REQUIRED
@ REQUIRED
The target cannot be valid if the source is not.

llvm::InstructionUniformity::Default
@ Default
The result values are uniform if and only if all operands are uniform.

Status
Definition: SIModeRegister.cpp:29

llvm::AAAddressSpace
An abstract interface for address space information.
Definition: Attributor.h:6294

llvm::AAAddressSpace::ID
static const char ID
Unique ID (due to the unique address)
Definition: Attributor.h:6330

llvm::AACallEdges
An abstract state for querying live call edges.
Definition: Attributor.h:5495

llvm::AACallEdges::getOptimisticEdges
virtual const SetVector< Function * > & getOptimisticEdges() const =0
Get the optimistic edges.

llvm::AACallEdges::ID
static const char ID
Unique ID (due to the unique address)
Definition: Attributor.h:5538

llvm::AACallEdges::hasNonAsmUnknownCallee
virtual bool hasNonAsmUnknownCallee() const =0
Is there any call with a unknown callee, excluding any inline asm.

llvm::AAIndirectCallInfo::ID
static const char ID
Unique ID (due to the unique address)
Definition: Attributor.h:6449

llvm::AAInstanceInfo::ID
static const char ID
Unique ID (due to the unique address)
Definition: Attributor.h:4352

llvm::AAPointerInfo::Access
An access description.
Definition: Attributor.h:5996

llvm::AAPointerInfo::Access::getRemoteInst
Instruction * getRemoteInst() const
Return the actual instruction that causes the access.
Definition: Attributor.h:6096

llvm::AAPointerInfo
An abstract interface for struct information.
Definition: Attributor.h:5763

llvm::AAPointerInfo::forallInterferingAccesses
virtual bool forallInterferingAccesses(AA::RangeTy Range, function_ref< bool(const Access &, bool)> CB) const =0
Call CB on all accesses that might interfere with Range and return true if all such accesses were kno...

llvm::AAPointerInfo::ID
static const char ID
Unique ID (due to the unique address)
Definition: Attributor.h:6212

llvm::AAPotentialConstantValues::ID
static const char ID
Unique ID (due to the unique address)
Definition: Attributor.h:5312

llvm::AAPotentialValues::ID
static const char ID
Unique ID (due to the unique address)
Definition: Attributor.h:5349

llvm::AAUnderlyingObjects::ID
static const char ID
Unique ID (due to the unique address)
Definition: Attributor.h:6282

llvm::AA::RangeTy
Helper to represent an access offset and size, with logic to deal with uncertainty and check for over...
Definition: Attributor.h:237

llvm::AMDGPUAttributorOptions
Definition: AMDGPU.h:326

llvm::AbstractAttribute
Base struct for all "concrete attribute" deductions.
Definition: Attributor.h:3293

llvm::AbstractAttribute::getIdAddr
virtual const char * getIdAddr() const =0
This function should return the address of the ID of the AbstractAttribute.

llvm::AbstractState
An interface to query the internal state of an abstract attribute.
Definition: Attributor.h:2613

llvm::AbstractState::indicatePessimisticFixpoint
virtual ChangeStatus indicatePessimisticFixpoint()=0
Indicate that the abstract state should converge to the pessimistic state.

llvm::AbstractState::isAtFixpoint
virtual bool isAtFixpoint() const =0
Return if this abstract state is fixed, thus does not need to be updated if information changes as it...

llvm::AbstractState::isValidState
virtual bool isValidState() const =0
Return if this abstract state is in a valid state.

llvm::AbstractState::indicateOptimisticFixpoint
virtual ChangeStatus indicateOptimisticFixpoint()=0
Indicate that the abstract state should converge to the optimistic state.

llvm::AnalysisGetter
Wrapper for FunctionAnalysisManager.
Definition: Attributor.h:1127

llvm::AttributorConfig
Configuration for the Attributor.
Definition: Attributor.h:1431

llvm::Attributor
The fixpoint analysis framework that orchestrates the attribute deduction.
Definition: Attributor.h:1525

llvm::CalleeInfo
Class to accumulate and hold information about a callee.
Definition: ModuleSummaryIndex.h:59

llvm::DecIntegerState
Specialization of the integer state for a decreasing value, hence 0 is the best state and ~0u the wor...
Definition: Attributor.h:2863

llvm::IRAttribute
Helper class that provides common functionality to manifest IR attributes.
Definition: Attributor.h:3198

llvm::IRAttribute::manifest
ChangeStatus manifest(Attributor &A) override
See AbstractAttribute::manifest(...).
Definition: Attributor.h:3230

llvm::IRPosition
Helper to describe and deal with positions in the LLVM-IR.
Definition: Attributor.h:586

llvm::IRPosition::callsite_returned
static const IRPosition callsite_returned(const CallBase &CB)
Create a position describing the returned value of CB.
Definition: Attributor.h:654

llvm::IRPosition::value
static const IRPosition value(const Value &V, const CallBaseContext *CBContext=nullptr)
Create a position describing the value of V.
Definition: Attributor.h:610

llvm::IRPosition::IRP_FUNCTION
@ IRP_FUNCTION
An attribute for a function (scope).
Definition: Attributor.h:598

llvm::IRPosition::function
static const IRPosition function(const Function &F, const CallBaseContext *CBContext=nullptr)
Create a position describing the function scope of F.
Definition: Attributor.h:629

llvm::IRPosition::getPositionKind
Kind getPositionKind() const
Return the associated position kind.
Definition: Attributor.h:882

llvm::InformationCache
Data structure to hold cached (LLVM-IR) information.
Definition: Attributor.h:1203

llvm::IntegerRangeState
State for an integer range.
Definition: Attributor.h:2939

llvm::IntegerStateBase::isValidState
bool isValidState() const override
See AbstractState::isValidState() NOTE: For now we simply pretend that the worst possible state is in...
Definition: Attributor.h:2672

llvm::StateWrapper
Helper to tie a abstract state implementation to an abstract attribute.
Definition: Attributor.h:3182

llvm::StateWrapper::getState
StateType & getState() override
See AbstractAttribute::getState(...).
Definition: Attributor.h:3190

llvm::cl::desc
Definition: CommandLine.h:409