docs/doxygen/AMDGPUTargetTransformInfo_8h_source.html

//===- AMDGPUTargetTransformInfo.h - AMDGPU specific TTI --------*- C++ -*-===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

/// \file

/// This file a TargetTransformInfoImplBase conforming object specific to the

/// AMDGPU target machine. It uses the target's detailed information to

/// provide more precise answers to certain TTI queries, while letting the

/// target independent and default TTI implementations handle the rest.

//

//===----------------------------------------------------------------------===//


#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H

#define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H


#include "AMDGPU.h"

#include "llvm/CodeGen/BasicTTIImpl.h"

#include "llvm/Support/AMDGPUAddrSpace.h"

#include <optional>


namespace llvm {


class AMDGPUTargetMachine;

class GCNSubtarget;

class InstCombiner;

class Loop;

class ScalarEvolution;

class SITargetLowering;

class Type;

class Value;


class AMDGPUTTIImpl final : public BasicTTIImplBase<AMDGPUTTIImpl> {

  using BaseT = BasicTTIImplBase<AMDGPUTTIImpl>;

  using TTI = TargetTransformInfo;


  friend BaseT;


  Triple TargetTriple;


  const TargetSubtargetInfo *ST;

  const TargetLoweringBase *TLI;


  const TargetSubtargetInfo *getST() const { return ST; }

  const TargetLoweringBase *getTLI() const { return TLI; }


public:

  explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F);


  void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,

                               TTI::UnrollingPreferences &UP,

                               OptimizationRemarkEmitter *ORE) const override;


  void getPeelingPreferences(Loop *L, ScalarEvolution &SE,

                             TTI::PeelingPreferences &PP) const override;


  uint64_t getMaxMemIntrinsicInlineSizeThreshold() const override;

};


class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {

  using BaseT = BasicTTIImplBase<GCNTTIImpl>;

  using TTI = TargetTransformInfo;


  friend BaseT;


  const GCNSubtarget *ST;

  const SITargetLowering *TLI;

  AMDGPUTTIImpl CommonTTI;

  bool IsGraphics;

  bool HasFP32Denormals;

  bool HasFP64FP16Denormals;

  static constexpr bool InlinerVectorBonusPercent = 0;


  static const FeatureBitset InlineFeatureIgnoreList;


  const GCNSubtarget *getST() const { return ST; }

  const SITargetLowering *getTLI() const { return TLI; }


  static inline int getFullRateInstrCost() {

    return TargetTransformInfo::TCC_Basic;

  }


  static inline int getHalfRateInstrCost(TTI::TargetCostKind CostKind) {

    return CostKind == TTI::TCK_CodeSize ? 2

                                         : 2 * TargetTransformInfo::TCC_Basic;

  }


  // TODO: The size is usually 8 bytes, but takes 4x as many cycles. Maybe

  // should be 2 or 4.

  static inline int getQuarterRateInstrCost(TTI::TargetCostKind CostKind) {

    return CostKind == TTI::TCK_CodeSize ? 2

                                         : 4 * TargetTransformInfo::TCC_Basic;

  }


  // On some parts, normal fp64 operations are half rate, and others

  // quarter. This also applies to some integer operations.

  int get64BitInstrCost(TTI::TargetCostKind CostKind) const;


  std::pair<InstructionCost, MVT> getTypeLegalizationCost(Type *Ty) const;


public:

  explicit GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F);


  bool hasBranchDivergence(const Function *F = nullptr) const override;


  void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,

                               TTI::UnrollingPreferences &UP,

                               OptimizationRemarkEmitter *ORE) const override;


  void getPeelingPreferences(Loop *L, ScalarEvolution &SE,

                             TTI::PeelingPreferences &PP) const override;


  TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override {

    assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");

    return TTI::PSK_FastHardware;

  }


  unsigned getNumberOfRegisters(unsigned RCID) const override;

  TypeSize

  getRegisterBitWidth(TargetTransformInfo::RegisterKind Vector) const override;

  unsigned getMinVectorRegisterBitWidth() const override;

  unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const override;

  unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,

                               unsigned ChainSizeInBytes,

                               VectorType *VecTy) const override;

  unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,

                                unsigned ChainSizeInBytes,

                                VectorType *VecTy) const override;

  unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override;


  bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, Align Alignment,

                                  unsigned AddrSpace) const;

  bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,

                                   unsigned AddrSpace) const override;

  bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,

                                    unsigned AddrSpace) const override;


  uint64_t getMaxMemIntrinsicInlineSizeThreshold() const override;

  Type *getMemcpyLoopLoweringType(

      LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,

      unsigned DestAddrSpace, Align SrcAlign, Align DestAlign,

      std::optional<uint32_t> AtomicElementSize) const override;


  void getMemcpyLoopResidualLoweringType(

      SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,

      unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,

      Align SrcAlign, Align DestAlign,

      std::optional<uint32_t> AtomicCpySize) const override;

  unsigned getMaxInterleaveFactor(ElementCount VF) const override;


  bool getTgtMemIntrinsic(IntrinsicInst *Inst,

                          MemIntrinsicInfo &Info) const override;


  InstructionCost getArithmeticInstrCost(

      unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,

      TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None},

      TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None},

      ArrayRef<const Value *> Args = {},

      const Instruction *CxtI = nullptr) const override;


  InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,

                                 const Instruction *I = nullptr) const override;


  bool isInlineAsmSourceOfDivergence(const CallInst *CI,

                                     ArrayRef<unsigned> Indices = {}) const;


  using BaseT::getVectorInstrCost;

  InstructionCost getVectorInstrCost(unsigned Opcode, Type *ValTy,

                                     TTI::TargetCostKind CostKind,

                                     unsigned Index, const Value *Op0,

                                     const Value *Op1) const override;


  bool isReadRegisterSourceOfDivergence(const IntrinsicInst *ReadReg) const;

  bool isSourceOfDivergence(const Value *V) const override;

  bool isAlwaysUniform(const Value *V) const override;


  bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const override {

    // Address space casts must cast between different address spaces.

    if (FromAS == ToAS)

      return false;


    // Casts between any aliasing address spaces are valid.

    return AMDGPU::addrspacesMayAlias(FromAS, ToAS);

  }


  bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const override {

    return AMDGPU::addrspacesMayAlias(AS0, AS1);

  }


  unsigned getFlatAddressSpace() const override {

    // Don't bother running InferAddressSpaces pass on graphics shaders which

    // don't use flat addressing.

    if (IsGraphics)

      return -1;

    return AMDGPUAS::FLAT_ADDRESS;

  }


  bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,

                                  Intrinsic::ID IID) const override;


  bool


  canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const override {

    return AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS::REGION_ADDRESS &&

           AS != AMDGPUAS::PRIVATE_ADDRESS;

  }


  Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,

                                          Value *NewV) const override;


  bool canSimplifyLegacyMulToMul(const Instruction &I, const Value *Op0,

                                 const Value *Op1, InstCombiner &IC) const;


  bool simplifyDemandedLaneMaskArg(InstCombiner &IC, IntrinsicInst &II,

                                   unsigned LaneAgIdx) const;


  std::optional<Instruction *>

  instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override;


  Value *simplifyAMDGCNLaneIntrinsicDemanded(InstCombiner &IC,

                                             IntrinsicInst &II,

                                             const APInt &DemandedElts,

                                             APInt &UndefElts) const;


  Instruction *hoistLaneIntrinsicThroughOperand(InstCombiner &IC,

                                                IntrinsicInst &II) const;


  std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(

      InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,

      APInt &UndefElts2, APInt &UndefElts3,

      std::function<void(Instruction *, unsigned, APInt, APInt &)>

          SimplifyAndSetOp) const override;


  InstructionCost getVectorSplitCost() const { return 0; }


  InstructionCost

  getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy,

                 ArrayRef<int> Mask, TTI::TargetCostKind CostKind, int Index,

                 VectorType *SubTp, ArrayRef<const Value *> Args = {},

                 const Instruction *CxtI = nullptr) const override;


  bool isProfitableToSinkOperands(Instruction *I,

                                  SmallVectorImpl<Use *> &Ops) const override;


  bool areInlineCompatible(const Function *Caller,

                           const Function *Callee) const override;


  int getInliningLastCallToStaticBonus() const override;

  unsigned getInliningThresholdMultiplier() const override { return 11; }

  unsigned adjustInliningThreshold(const CallBase *CB) const override;

  unsigned getCallerAllocaCost(const CallBase *CB,

                               const AllocaInst *AI) const override;


  int getInlinerVectorBonusPercent() const override {

    return InlinerVectorBonusPercent;

  }


  InstructionCost

  getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,

                             std::optional<FastMathFlags> FMF,

                             TTI::TargetCostKind CostKind) const override;


  InstructionCost

  getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,

                        TTI::TargetCostKind CostKind) const override;

  InstructionCost

  getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF,

                         TTI::TargetCostKind CostKind) const override;


  /// Data cache line size for LoopDataPrefetch pass. Has no use before GFX12.

  unsigned getCacheLineSize() const override { return 128; }


  /// How much before a load we should place the prefetch instruction.

  /// This is currently measured in number of IR instructions.

  unsigned getPrefetchDistance() const override;


  /// \return if target want to issue a prefetch in address space \p AS.

  bool shouldPrefetchAddressSpace(unsigned AS) const override;

  void collectKernelLaunchBounds(

      const Function &F,

      SmallVectorImpl<std::pair<StringRef, int64_t>> &LB) const override;


  enum class KnownIEEEMode { Unknown, On, Off };


  /// Return KnownIEEEMode::On if we know if the use context can assume

  /// "amdgpu-ieee"="true" and KnownIEEEMode::Off if we can assume

  /// "amdgpu-ieee"="false".

  KnownIEEEMode fpenvIEEEMode(const Instruction &I) const;


  /// Account for loads of i8 vector types to have reduced cost. For

  /// example the cost of load 4 i8s values is one is the cost of loading

  /// a single i32 value.

  InstructionCost getMemoryOpCost(

      unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace,

      TTI::TargetCostKind CostKind,

      TTI::OperandValueInfo OpInfo = {TTI::OK_AnyValue, TTI::OP_None},

      const Instruction *I = nullptr) const override;


  /// When counting parts on AMD GPUs, account for i8s being grouped

  /// together under a single i32 value. Otherwise fall back to base

  /// implementation.

  unsigned getNumberOfParts(Type *Tp) const override;

};


} // end namespace llvm


#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H

assert
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

AMDGPUAddrSpace.h
AMDGPU address space definition.

AMDGPU.h

BasicTTIImpl.h
This file provides a helper that implements much of the TTI interface in terms of the target-independ...

Info
Analysis containing CSE Info
Definition CSEInfo.cpp:27

CostKind
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))

IntrinsicCostStrategy::InstructionCost
@ InstructionCost
Definition CostModel.cpp:52

TemplateParamKind::Type
@ Type
Definition ItaniumDemangle.h:1243

Ops
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Definition ItaniumDemangle.h:3368

F
#define F(x, y, z)
Definition MD5.cpp:55

I
#define I(x, y, z)
Definition MD5.cpp:58

II
uint64_t IntrinsicInst * II
Definition NVVMIntrRange.cpp:46

llvm::AMDGPUTTIImpl
Definition AMDGPUTargetTransformInfo.h:36

llvm::AMDGPUTTIImpl::getMaxMemIntrinsicInlineSizeThreshold
uint64_t getMaxMemIntrinsicInlineSizeThreshold() const override
Definition AMDGPUTargetTransformInfo.cpp:281

llvm::AMDGPUTTIImpl::AMDGPUTTIImpl
AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
Definition AMDGPUTargetTransformInfo.cpp:105

llvm::AMDGPUTTIImpl::getPeelingPreferences
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
Definition AMDGPUTargetTransformInfo.cpp:276

llvm::AMDGPUTTIImpl::getUnrollingPreferences
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
Definition AMDGPUTargetTransformInfo.cpp:111

llvm::AMDGPUTargetMachine
Definition AMDGPUTargetMachine.h:30

llvm::APInt
Class for arbitrary precision integers.
Definition APInt.h:78

llvm::AllocaInst
an instruction to allocate memory on the stack
Definition Instructions.h:65

llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41

llvm::BasicTTIImplBase< GCNTTIImpl >::getVectorInstrCost
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1) const override
Definition BasicTTIImpl.h:1423

llvm::BasicTTIImplBase< AMDGPUTTIImpl >::BasicTTIImplBase
BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL)
Definition BasicTTIImpl.h:367

llvm::CallBase
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition InstrTypes.h:1114

llvm::ElementCount
Definition TypeSize.h:299

llvm::FastMathFlags
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:22

llvm::FeatureBitset
Container class for subtarget features.
Definition SubtargetFeature.h:42

llvm::Function
Definition Function.h:64

llvm::GCNSubtarget
Definition GCNSubtarget.h:34

llvm::GCNTTIImpl::simplifyDemandedLaneMaskArg
bool simplifyDemandedLaneMaskArg(InstCombiner &IC, IntrinsicInst &II, unsigned LaneAgIdx) const
Simplify a lane index operand (e.g.
Definition AMDGPUInstCombineIntrinsic.cpp:518

llvm::GCNTTIImpl::GCNTTIImpl
GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
Definition AMDGPUTargetTransformInfo.cpp:305

llvm::GCNTTIImpl::getLoadStoreVecRegBitWidth
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override
Definition AMDGPUTargetTransformInfo.cpp:380

llvm::GCNTTIImpl::getShuffleCost
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
Definition AMDGPUTargetTransformInfo.cpp:1226

llvm::GCNTTIImpl::getMemoryOpCost
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
Account for loads of i8 vector types to have reduced cost.
Definition AMDGPUTargetTransformInfo.cpp:1551

llvm::GCNTTIImpl::instCombineIntrinsic
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override
Definition AMDGPUInstCombineIntrinsic.cpp:639

llvm::GCNTTIImpl::getArithmeticInstrCost
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
Definition AMDGPUTargetTransformInfo.cpp:530

llvm::GCNTTIImpl::collectKernelLaunchBounds
void collectKernelLaunchBounds(const Function &F, SmallVectorImpl< std::pair< StringRef, int64_t > > &LB) const override
Definition AMDGPUTargetTransformInfo.cpp:1518

llvm::GCNTTIImpl::addrspacesMayAlias
bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const override
Definition AMDGPUTargetTransformInfo.h:189

llvm::GCNTTIImpl::hoistLaneIntrinsicThroughOperand
Instruction * hoistLaneIntrinsicThroughOperand(InstCombiner &IC, IntrinsicInst &II) const
Definition AMDGPUInstCombineIntrinsic.cpp:557

llvm::GCNTTIImpl::KnownIEEEMode
KnownIEEEMode
Definition AMDGPUTargetTransformInfo.h:285

llvm::GCNTTIImpl::KnownIEEEMode::On
@ On
Definition AMDGPUTargetTransformInfo.h:285

llvm::GCNTTIImpl::KnownIEEEMode::Unknown
@ Unknown
Definition AMDGPUTargetTransformInfo.h:285

llvm::GCNTTIImpl::KnownIEEEMode::Off
@ Off
Definition AMDGPUTargetTransformInfo.h:285

llvm::GCNTTIImpl::isLegalToVectorizeStoreChain
bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const override
Definition AMDGPUTargetTransformInfo.cpp:416

llvm::GCNTTIImpl::getInlinerVectorBonusPercent
int getInlinerVectorBonusPercent() const override
Definition AMDGPUTargetTransformInfo.h:256

llvm::GCNTTIImpl::isInlineAsmSourceOfDivergence
bool isInlineAsmSourceOfDivergence(const CallInst *CI, ArrayRef< unsigned > Indices={}) const
Analyze if the results of inline asm are divergent.
Definition AMDGPUTargetTransformInfo.cpp:918

llvm::GCNTTIImpl::isReadRegisterSourceOfDivergence
bool isReadRegisterSourceOfDivergence(const IntrinsicInst *ReadReg) const
Definition AMDGPUTargetTransformInfo.cpp:954

llvm::GCNTTIImpl::getMaximumVF
unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const override
Definition AMDGPUTargetTransformInfo.cpp:348

llvm::GCNTTIImpl::getNumberOfRegisters
unsigned getNumberOfRegisters(unsigned RCID) const override
Definition AMDGPUTargetTransformInfo.cpp:320

llvm::GCNTTIImpl::canHaveNonUndefGlobalInitializerInAddressSpace
bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const override
Definition AMDGPUTargetTransformInfo.h:205

llvm::GCNTTIImpl::isLegalToVectorizeLoadChain
bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const override
Definition AMDGPUTargetTransformInfo.cpp:410

llvm::GCNTTIImpl::getStoreVectorFactor
unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const override
Definition AMDGPUTargetTransformInfo.cpp:370

llvm::GCNTTIImpl::isLegalToVectorizeMemChain
bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
Definition AMDGPUTargetTransformInfo.cpp:397

llvm::GCNTTIImpl::getCacheLineSize
unsigned getCacheLineSize() const override
Data cache line size for LoopDataPrefetch pass. Has no use before GFX12.
Definition AMDGPUTargetTransformInfo.h:273

llvm::GCNTTIImpl::shouldPrefetchAddressSpace
bool shouldPrefetchAddressSpace(unsigned AS) const override
Definition AMDGPUTargetTransformInfo.cpp:1514

llvm::GCNTTIImpl::simplifyDemandedVectorEltsIntrinsic
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const override
Definition AMDGPUInstCombineIntrinsic.cpp:1978

llvm::GCNTTIImpl::hasBranchDivergence
bool hasBranchDivergence(const Function *F=nullptr) const override
Definition AMDGPUTargetTransformInfo.cpp:316

llvm::GCNTTIImpl::rewriteIntrinsicWithAddressSpace
Value * rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const override
Definition AMDGPUTargetTransformInfo.cpp:1138

llvm::GCNTTIImpl::getCallerAllocaCost
unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const override
Definition AMDGPUTargetTransformInfo.cpp:1437

llvm::GCNTTIImpl::getMaxInterleaveFactor
unsigned getMaxInterleaveFactor(ElementCount VF) const override
Definition AMDGPUTargetTransformInfo.cpp:495

llvm::GCNTTIImpl::getMemcpyLoopResidualLoweringType
void getMemcpyLoopResidualLoweringType(SmallVectorImpl< Type * > &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, Align SrcAlign, Align DestAlign, std::optional< uint32_t > AtomicCpySize) const override
Definition AMDGPUTargetTransformInfo.cpp:453

llvm::GCNTTIImpl::getArithmeticReductionCost
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
Definition AMDGPUTargetTransformInfo.cpp:854

llvm::GCNTTIImpl::getIntrinsicInstrCost
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
Get intrinsic cost based on arguments.
Definition AMDGPUTargetTransformInfo.cpp:712

llvm::GCNTTIImpl::getInliningThresholdMultiplier
unsigned getInliningThresholdMultiplier() const override
Definition AMDGPUTargetTransformInfo.h:251

llvm::GCNTTIImpl::getLoadVectorFactor
unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const override
Definition AMDGPUTargetTransformInfo.cpp:359

llvm::GCNTTIImpl::getPrefetchDistance
unsigned getPrefetchDistance() const override
How much before a load we should place the prefetch instruction.
Definition AMDGPUTargetTransformInfo.cpp:1510

llvm::GCNTTIImpl::getCFInstrCost
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
Definition AMDGPUTargetTransformInfo.cpp:823

llvm::GCNTTIImpl::fpenvIEEEMode
KnownIEEEMode fpenvIEEEMode(const Instruction &I) const
Return KnownIEEEMode::On if we know if the use context can assume "amdgpu-ieee"="true" and KnownIEEEM...
Definition AMDGPUTargetTransformInfo.cpp:1535

llvm::GCNTTIImpl::adjustInliningThreshold
unsigned adjustInliningThreshold(const CallBase *CB) const override
Definition AMDGPUTargetTransformInfo.cpp:1426

llvm::GCNTTIImpl::isProfitableToSinkOperands
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Whether it is profitable to sink the operands of an Instruction I to the basic block of I.
Definition AMDGPUTargetTransformInfo.cpp:1293

llvm::GCNTTIImpl::getTgtMemIntrinsic
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const override
Definition AMDGPUTargetTransformInfo.cpp:504

llvm::GCNTTIImpl::isAlwaysUniform
bool isAlwaysUniform(const Value *V) const override
Definition AMDGPUTargetTransformInfo.cpp:1048

llvm::GCNTTIImpl::areInlineCompatible
bool areInlineCompatible(const Function *Caller, const Function *Callee) const override
Definition AMDGPUTargetTransformInfo.cpp:1309

llvm::GCNTTIImpl::getMinMaxReductionCost
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
Try to calculate op costs for min/max reduction operations.
Definition AMDGPUTargetTransformInfo.cpp:872

llvm::GCNTTIImpl::isSourceOfDivergence
bool isSourceOfDivergence(const Value *V) const override
Definition AMDGPUTargetTransformInfo.cpp:977

llvm::GCNTTIImpl::getInliningLastCallToStaticBonus
int getInliningLastCallToStaticBonus() const override
Definition AMDGPUTargetTransformInfo.cpp:1421

llvm::GCNTTIImpl::getFlatAddressSpace
unsigned getFlatAddressSpace() const override
Definition AMDGPUTargetTransformInfo.h:193

llvm::GCNTTIImpl::getVectorSplitCost
InstructionCost getVectorSplitCost() const
Definition AMDGPUTargetTransformInfo.h:236

llvm::GCNTTIImpl::getVectorInstrCost
InstructionCost getVectorInstrCost(unsigned Opcode, Type *ValTy, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1) const override
Definition AMDGPUTargetTransformInfo.cpp:886

llvm::GCNTTIImpl::simplifyAMDGCNLaneIntrinsicDemanded
Value * simplifyAMDGCNLaneIntrinsicDemanded(InstCombiner &IC, IntrinsicInst &II, const APInt &DemandedElts, APInt &UndefElts) const
Definition AMDGPUInstCombineIntrinsic.cpp:1912

llvm::GCNTTIImpl::collectFlatAddressOperands
bool collectFlatAddressOperands(SmallVectorImpl< int > &OpIndexes, Intrinsic::ID IID) const override
Definition AMDGPUTargetTransformInfo.cpp:1122

llvm::GCNTTIImpl::getPopcntSupport
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override
Definition AMDGPUTargetTransformInfo.h:116

llvm::GCNTTIImpl::getNumberOfParts
unsigned getNumberOfParts(Type *Tp) const override
When counting parts on AMD GPUs, account for i8s being grouped together under a single i32 value.
Definition AMDGPUTargetTransformInfo.cpp:1568

llvm::GCNTTIImpl::getPeelingPreferences
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
Definition AMDGPUTargetTransformInfo.cpp:1484

llvm::GCNTTIImpl::canSimplifyLegacyMulToMul
bool canSimplifyLegacyMulToMul(const Instruction &I, const Value *Op0, const Value *Op1, InstCombiner &IC) const
Definition AMDGPUInstCombineIntrinsic.cpp:391

llvm::GCNTTIImpl::getMinVectorRegisterBitWidth
unsigned getMinVectorRegisterBitWidth() const override
Definition AMDGPUTargetTransformInfo.cpp:344

llvm::GCNTTIImpl::getRegisterBitWidth
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind Vector) const override
Definition AMDGPUTargetTransformInfo.cpp:332

llvm::GCNTTIImpl::getUnrollingPreferences
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
Definition AMDGPUTargetTransformInfo.cpp:1478

llvm::GCNTTIImpl::getMemcpyLoopLoweringType
Type * getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, unsigned SrcAddrSpace, unsigned DestAddrSpace, Align SrcAlign, Align DestAlign, std::optional< uint32_t > AtomicElementSize) const override
Definition AMDGPUTargetTransformInfo.cpp:426

llvm::GCNTTIImpl::getMaxMemIntrinsicInlineSizeThreshold
uint64_t getMaxMemIntrinsicInlineSizeThreshold() const override
Definition AMDGPUTargetTransformInfo.cpp:422

llvm::GCNTTIImpl::isValidAddrSpaceCast
bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const override
Definition AMDGPUTargetTransformInfo.h:180

llvm::InstCombiner
The core instruction combiner logic.
Definition InstCombiner.h:48

llvm::InstructionCost
Definition InstructionCost.h:30

llvm::Instruction
Definition Instruction.h:69

llvm::IntrinsicCostAttributes
Definition TargetTransformInfo.h:126

llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition IntrinsicInst.h:49

llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68

llvm::Loop
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40

llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition OptimizationRemarkEmitter.h:33

llvm::SITargetLowering
Definition SIISelLowering.h:32

llvm::ScalarEvolution
The main scalar evolution driver.
Definition ScalarEvolution.h:448

llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition SmallVector.h:573

llvm::TargetLoweringBase
This base class for TargetLowering contains the SelectionDAG-independent parts that can be used from ...
Definition TargetLowering.h:198

llvm::TargetSubtargetInfo
TargetSubtargetInfo - Generic base class for all target subtargets.
Definition TargetSubtargetInfo.h:65

llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition TargetTransformInfo.h:223

llvm::TargetTransformInfo::TargetCostKind
TargetCostKind
The kind of cost model.
Definition TargetTransformInfo.h:275

llvm::TargetTransformInfo::TCK_CodeSize
@ TCK_CodeSize
Instruction code size.
Definition TargetTransformInfo.h:278

llvm::TargetTransformInfo::OP_None
@ OP_None
Definition TargetTransformInfo.h:1161

llvm::TargetTransformInfo::RegisterKind
RegisterKind
Definition TargetTransformInfo.h:1215

llvm::TargetTransformInfo::PopcntSupportKind
PopcntSupportKind
Flags indicating the kind of support for population count.
Definition TargetTransformInfo.h:741

llvm::TargetTransformInfo::PSK_FastHardware
@ PSK_FastHardware
Definition TargetTransformInfo.h:741

llvm::TargetTransformInfo::TCC_Basic
@ TCC_Basic
The cost of a typical 'add' instruction.
Definition TargetTransformInfo.h:302

llvm::TargetTransformInfo::ShuffleKind
ShuffleKind
The various kinds of shuffle patterns for vector queries.
Definition TargetTransformInfo.h:1132

llvm::TargetTransformInfo::OK_AnyValue
@ OK_AnyValue
Definition TargetTransformInfo.h:1153

llvm::Triple
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47

llvm::TypeSize
Definition TypeSize.h:333

llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45

llvm::Value
LLVM Value Representation.
Definition Value.h:75

llvm::VectorType
Base class of all SIMD vector types.
Definition DerivedTypes.h:430

uint64_t

llvm::AMDGPUAS::REGION_ADDRESS
@ REGION_ADDRESS
Address space for region memory. (GDS)
Definition AMDGPUAddrSpace.h:32

llvm::AMDGPUAS::LOCAL_ADDRESS
@ LOCAL_ADDRESS
Address space for local memory.
Definition AMDGPUAddrSpace.h:34

llvm::AMDGPUAS::FLAT_ADDRESS
@ FLAT_ADDRESS
Address space for flat memory.
Definition AMDGPUAddrSpace.h:30

llvm::AMDGPUAS::PRIVATE_ADDRESS
@ PRIVATE_ADDRESS
Address space for private memory.
Definition AMDGPUAddrSpace.h:36

llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition AMDGPUMetadata.h:396

llvm::AMDGPU::addrspacesMayAlias
static bool addrspacesMayAlias(unsigned AS1, unsigned AS2)
Definition AMDGPU.h:579

llvm::Intrinsic::ID
unsigned ID
Definition GenericSSAContext.h:28

llvm::NVPTXAS::AddressSpace
AddressSpace
Definition NVPTXAddrSpace.h:21

llvm::sandboxir::Instruction
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition AddressRanges.h:18

llvm::Length
@ Length
Definition DWP.cpp:477

llvm::Value
FunctionAddr VTableAddr Value
Definition InstrProf.h:137

llvm::isPowerOf2_32
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279

llvm::ArrayRef
ArrayRef(const T &OneElt) -> ArrayRef< T >

llvm::VFParamKind::Vector
@ Vector
Definition VFABIDemangler.h:27

llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39

llvm::MemIntrinsicInfo
Information about a load/store intrinsic defined by the target.
Definition TargetTransformInfo.h:75

llvm::TargetTransformInfo::OperandValueInfo
Definition TargetTransformInfo.h:1169

llvm::TargetTransformInfo::PeelingPreferences
Definition TargetTransformInfo.h:681

llvm::TargetTransformInfo::UnrollingPreferences
Parameters that control the generic loop unrolling transformation.
Definition TargetTransformInfo.h:551