doxygen/AMDGPUTargetTransformInfo_8h_source.html

//===- AMDGPUTargetTransformInfo.h - AMDGPU specific TTI --------*- C++ -*-===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

/// \file

/// This file a TargetTransformInfo::Concept conforming object specific to the

/// AMDGPU target machine. It uses the target's detailed information to

/// provide more precise answers to certain TTI queries, while letting the

/// target independent and default TTI implementations handle the rest.

//

//===----------------------------------------------------------------------===//


#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H

#define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H


#include "AMDGPU.h"

#include "llvm/CodeGen/BasicTTIImpl.h"

#include "llvm/Support/AMDGPUAddrSpace.h"

#include <optional>


namespace llvm {


class AMDGPUTargetMachine;

class GCNSubtarget;

class InstCombiner;

class Loop;

class ScalarEvolution;

class SITargetLowering;

class Type;

class Value;


class AMDGPUTTIImpl final : public BasicTTIImplBase<AMDGPUTTIImpl> {

  using BaseT = BasicTTIImplBase<AMDGPUTTIImpl>;

  using TTI = TargetTransformInfo;


  friend BaseT;


  Triple TargetTriple;


  const TargetSubtargetInfo *ST;

  const TargetLoweringBase *TLI;


  const TargetSubtargetInfo *getST() const { return ST; }

  const TargetLoweringBase *getTLI() const { return TLI; }


public:

  explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F);


  void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,

                               TTI::UnrollingPreferences &UP,

                               OptimizationRemarkEmitter *ORE);


  void getPeelingPreferences(Loop *L, ScalarEvolution &SE,

                             TTI::PeelingPreferences &PP);


  int64_t getMaxMemIntrinsicInlineSizeThreshold() const;

};


class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {

  using BaseT = BasicTTIImplBase<GCNTTIImpl>;

  using TTI = TargetTransformInfo;


  friend BaseT;


  const GCNSubtarget *ST;

  const SITargetLowering *TLI;

  AMDGPUTTIImpl CommonTTI;

  bool IsGraphics;

  bool HasFP32Denormals;

  bool HasFP64FP16Denormals;

  static constexpr bool InlinerVectorBonusPercent = 0;


  static const FeatureBitset InlineFeatureIgnoreList;


  const GCNSubtarget *getST() const { return ST; }

  const SITargetLowering *getTLI() const { return TLI; }


  static inline int getFullRateInstrCost() {

    return TargetTransformInfo::TCC_Basic;

  }


  static inline int getHalfRateInstrCost(TTI::TargetCostKind CostKind) {

    return CostKind == TTI::TCK_CodeSize ? 2

                                         : 2 * TargetTransformInfo::TCC_Basic;

  }


  // TODO: The size is usually 8 bytes, but takes 4x as many cycles. Maybe

  // should be 2 or 4.

  static inline int getQuarterRateInstrCost(TTI::TargetCostKind CostKind) {

    return CostKind == TTI::TCK_CodeSize ? 2

                                         : 4 * TargetTransformInfo::TCC_Basic;

  }


  // On some parts, normal fp64 operations are half rate, and others

  // quarter. This also applies to some integer operations.

  int get64BitInstrCost(TTI::TargetCostKind CostKind) const;


  std::pair<InstructionCost, MVT> getTypeLegalizationCost(Type *Ty) const;


public:

  explicit GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F);


  bool hasBranchDivergence(const Function *F = nullptr) const;


  void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,

                               TTI::UnrollingPreferences &UP,

                               OptimizationRemarkEmitter *ORE);


  void getPeelingPreferences(Loop *L, ScalarEvolution &SE,

                             TTI::PeelingPreferences &PP);


  TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) {

    assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");

    return TTI::PSK_FastHardware;

  }


  unsigned getNumberOfRegisters(unsigned RCID) const;

  TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind Vector) const;

  unsigned getMinVectorRegisterBitWidth() const;

  unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const;

  unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,

                               unsigned ChainSizeInBytes,

                               VectorType *VecTy) const;

  unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,

                                unsigned ChainSizeInBytes,

                                VectorType *VecTy) const;

  unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;


  bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, Align Alignment,

                                  unsigned AddrSpace) const;

  bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,

                                   unsigned AddrSpace) const;

  bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,

                                    unsigned AddrSpace) const;


  int64_t getMaxMemIntrinsicInlineSizeThreshold() const;

  Type *

  getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,

                            unsigned SrcAddrSpace, unsigned DestAddrSpace,

                            Align SrcAlign, Align DestAlign,

                            std::optional<uint32_t> AtomicElementSize) const;


  void getMemcpyLoopResidualLoweringType(

      SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,

      unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,

      Align SrcAlign, Align DestAlign,

      std::optional<uint32_t> AtomicCpySize) const;

  unsigned getMaxInterleaveFactor(ElementCount VF);


  bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const;


  InstructionCost getArithmeticInstrCost(

      unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,

      TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None},

      TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None},

      ArrayRef<const Value *> Args = {}, const Instruction *CxtI = nullptr);


  InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,

                                 const Instruction *I = nullptr);


  bool isInlineAsmSourceOfDivergence(const CallInst *CI,

                                     ArrayRef<unsigned> Indices = {}) const;


  using BaseT::getVectorInstrCost;

  InstructionCost getVectorInstrCost(unsigned Opcode, Type *ValTy,

                                     TTI::TargetCostKind CostKind,

                                     unsigned Index, Value *Op0, Value *Op1);


  bool isReadRegisterSourceOfDivergence(const IntrinsicInst *ReadReg) const;

  bool isSourceOfDivergence(const Value *V) const;

  bool isAlwaysUniform(const Value *V) const;


  bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const {

    // Address space casts must cast between different address spaces.

    if (FromAS == ToAS)

      return false;


    if (FromAS == AMDGPUAS::FLAT_ADDRESS)

      return AMDGPU::isExtendedGlobalAddrSpace(ToAS) ||

             ToAS == AMDGPUAS::LOCAL_ADDRESS ||

             ToAS == AMDGPUAS::PRIVATE_ADDRESS;


    if (AMDGPU::isExtendedGlobalAddrSpace(FromAS))

      return AMDGPU::isFlatGlobalAddrSpace(ToAS) ||

             ToAS == AMDGPUAS::CONSTANT_ADDRESS_32BIT;


    if (FromAS == AMDGPUAS::LOCAL_ADDRESS ||

        FromAS == AMDGPUAS::PRIVATE_ADDRESS)

      return ToAS == AMDGPUAS::FLAT_ADDRESS;


    return false;

  }


  bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const {

    return AMDGPU::addrspacesMayAlias(AS0, AS1);

  }


  unsigned getFlatAddressSpace() const {

    // Don't bother running InferAddressSpaces pass on graphics shaders which

    // don't use flat addressing.

    if (IsGraphics)

      return -1;

    return AMDGPUAS::FLAT_ADDRESS;

  }


  bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,

                                  Intrinsic::ID IID) const;


  bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const {

    return AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS::REGION_ADDRESS &&

           AS != AMDGPUAS::PRIVATE_ADDRESS;

  }


  Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,

                                          Value *NewV) const;


  bool canSimplifyLegacyMulToMul(const Instruction &I, const Value *Op0,

                                 const Value *Op1, InstCombiner &IC) const;


  bool simplifyDemandedLaneMaskArg(InstCombiner &IC, IntrinsicInst &II,

                                   unsigned LaneAgIdx) const;


  std::optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,

                                                    IntrinsicInst &II) const;

  std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(

      InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,

      APInt &UndefElts2, APInt &UndefElts3,

      std::function<void(Instruction *, unsigned, APInt, APInt &)>

          SimplifyAndSetOp) const;


  InstructionCost getVectorSplitCost() { return 0; }


  InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,

                                 ArrayRef<int> Mask,

                                 TTI::TargetCostKind CostKind, int Index,

                                 VectorType *SubTp,

                                 ArrayRef<const Value *> Args = {},

                                 const Instruction *CxtI = nullptr);


  bool isProfitableToSinkOperands(Instruction *I,

                                  SmallVectorImpl<Use *> &Ops) const;


  bool areInlineCompatible(const Function *Caller,

                           const Function *Callee) const;


  int getInliningLastCallToStaticBonus() const;

  unsigned getInliningThresholdMultiplier() const { return 11; }

  unsigned adjustInliningThreshold(const CallBase *CB) const;

  unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const;


  int getInlinerVectorBonusPercent() const { return InlinerVectorBonusPercent; }


  InstructionCost getArithmeticReductionCost(

      unsigned Opcode, VectorType *Ty, std::optional<FastMathFlags> FMF,

      TTI::TargetCostKind CostKind);


  InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,

                                        TTI::TargetCostKind CostKind);

  InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty,

                                         FastMathFlags FMF,

                                         TTI::TargetCostKind CostKind);


  /// Data cache line size for LoopDataPrefetch pass. Has no use before GFX12.

  unsigned getCacheLineSize() const override { return 128; }


  /// How much before a load we should place the prefetch instruction.

  /// This is currently measured in number of IR instructions.

  unsigned getPrefetchDistance() const override;


  /// \return if target want to issue a prefetch in address space \p AS.

  bool shouldPrefetchAddressSpace(unsigned AS) const override;

};


} // end namespace llvm


#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H

AMDGPUAddrSpace.h
AMDGPU address space definition.

AMDGPU.h

BasicTTIImpl.h
This file provides a helper that implements much of the TTI interface in terms of the target-independ...

Type
RelocType Type
Definition: COFFYAML.cpp:410

Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27

CostKind
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))

Index
uint32_t Index
Definition: ELFObjHandler.cpp:83

F
#define F(x, y, z)
Definition: MD5.cpp:55

I
#define I(x, y, z)
Definition: MD5.cpp:58

InstCombiner
Machine InstCombiner
Definition: MachineCombiner.cpp:134

II
uint64_t IntrinsicInst * II
Definition: NVVMIntrRange.cpp:51

assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

llvm::AMDGPUTTIImpl
Definition: AMDGPUTargetTransformInfo.h:36

llvm::AMDGPUTTIImpl::getPeelingPreferences
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
Definition: AMDGPUTargetTransformInfo.cpp:272

llvm::AMDGPUTTIImpl::getUnrollingPreferences
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
Definition: AMDGPUTargetTransformInfo.cpp:110

llvm::AMDGPUTTIImpl::getMaxMemIntrinsicInlineSizeThreshold
int64_t getMaxMemIntrinsicInlineSizeThreshold() const
Definition: AMDGPUTargetTransformInfo.cpp:277

llvm::AMDGPUTargetMachine
Definition: AMDGPUTargetMachine.h:31

llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:78

llvm::AllocaInst
an instruction to allocate memory on the stack
Definition: Instructions.h:63

llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41

llvm::BasicTTIImplBase
Base class which can be used to help build a TTI implementation.
Definition: BasicTTIImpl.h:80

llvm::BasicTTIImplBase< GCNTTIImpl >::getVectorInstrCost
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
Definition: BasicTTIImpl.h:1346

llvm::CallBase
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1112

llvm::ElementCount
Definition: TypeSize.h:300

llvm::FastMathFlags
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:20

llvm::FeatureBitset
Container class for subtarget features.
Definition: SubtargetFeature.h:41

llvm::Function
Definition: Function.h:63

llvm::GCNSubtarget
Definition: GCNSubtarget.h:34

llvm::GCNTTIImpl
Definition: AMDGPUTargetTransformInfo.h:63

llvm::GCNTTIImpl::getRegisterBitWidth
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind Vector) const
Definition: AMDGPUTargetTransformInfo.cpp:328

llvm::GCNTTIImpl::getIntrinsicInstrCost
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Definition: AMDGPUTargetTransformInfo.cpp:727

llvm::GCNTTIImpl::getVectorInstrCost
InstructionCost getVectorInstrCost(unsigned Opcode, Type *ValTy, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
Definition: AMDGPUTargetTransformInfo.cpp:859

llvm::GCNTTIImpl::isAlwaysUniform
bool isAlwaysUniform(const Value *V) const
Definition: AMDGPUTargetTransformInfo.cpp:992

llvm::GCNTTIImpl::simplifyDemandedLaneMaskArg
bool simplifyDemandedLaneMaskArg(InstCombiner &IC, IntrinsicInst &II, unsigned LaneAgIdx) const
Simplify a lane index operand (e.g.
Definition: AMDGPUInstCombineIntrinsic.cpp:456

llvm::GCNTTIImpl::getPeelingPreferences
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
Definition: AMDGPUTargetTransformInfo.cpp:1400

llvm::GCNTTIImpl::getMaxMemIntrinsicInlineSizeThreshold
int64_t getMaxMemIntrinsicInlineSizeThreshold() const
Definition: AMDGPUTargetTransformInfo.cpp:415

llvm::GCNTTIImpl::isLegalToVectorizeLoadChain
bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
Definition: AMDGPUTargetTransformInfo.cpp:403

llvm::GCNTTIImpl::getMinMaxReductionCost
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
Definition: AMDGPUTargetTransformInfo.cpp:845

llvm::GCNTTIImpl::isInlineAsmSourceOfDivergence
bool isInlineAsmSourceOfDivergence(const CallInst *CI, ArrayRef< unsigned > Indices={}) const
Analyze if the results of inline asm are divergent.
Definition: AMDGPUTargetTransformInfo.cpp:891

llvm::GCNTTIImpl::addrspacesMayAlias
bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const
Definition: AMDGPUTargetTransformInfo.h:198

llvm::GCNTTIImpl::isReadRegisterSourceOfDivergence
bool isReadRegisterSourceOfDivergence(const IntrinsicInst *ReadReg) const
Definition: AMDGPUTargetTransformInfo.cpp:927

llvm::GCNTTIImpl::getPopcntSupport
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth)
Definition: AMDGPUTargetTransformInfo.h:116

llvm::GCNTTIImpl::getInliningLastCallToStaticBonus
int getInliningLastCallToStaticBonus() const
Definition: AMDGPUTargetTransformInfo.cpp:1337

llvm::GCNTTIImpl::simplifyDemandedVectorEltsIntrinsic
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
Definition: AMDGPUInstCombineIntrinsic.cpp:1538

llvm::GCNTTIImpl::getNumberOfRegisters
unsigned getNumberOfRegisters(unsigned RCID) const
Definition: AMDGPUTargetTransformInfo.cpp:316

llvm::GCNTTIImpl::canHaveNonUndefGlobalInitializerInAddressSpace
bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const
Definition: AMDGPUTargetTransformInfo.h:213

llvm::GCNTTIImpl::isLegalToVectorizeMemChain
bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
Definition: AMDGPUTargetTransformInfo.cpp:390

llvm::GCNTTIImpl::getCacheLineSize
unsigned getCacheLineSize() const override
Data cache line size for LoopDataPrefetch pass. Has no use before GFX12.
Definition: AMDGPUTargetTransformInfo.h:268

llvm::GCNTTIImpl::isValidAddrSpaceCast
bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const
Definition: AMDGPUTargetTransformInfo.h:177

llvm::GCNTTIImpl::shouldPrefetchAddressSpace
bool shouldPrefetchAddressSpace(unsigned AS) const override
Definition: AMDGPUTargetTransformInfo.cpp:1430

llvm::GCNTTIImpl::getStoreVectorFactor
unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
Definition: AMDGPUTargetTransformInfo.cpp:363

llvm::GCNTTIImpl::isLegalToVectorizeStoreChain
bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
Definition: AMDGPUTargetTransformInfo.cpp:409

llvm::GCNTTIImpl::getMaxInterleaveFactor
unsigned getMaxInterleaveFactor(ElementCount VF)
Definition: AMDGPUTargetTransformInfo.cpp:511

llvm::GCNTTIImpl::getInliningThresholdMultiplier
unsigned getInliningThresholdMultiplier() const
Definition: AMDGPUTargetTransformInfo.h:251

llvm::GCNTTIImpl::isProfitableToSinkOperands
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const
Whether it is profitable to sink the operands of an Instruction I to the basic block of I.
Definition: AMDGPUTargetTransformInfo.cpp:1209

llvm::GCNTTIImpl::getUnrollingPreferences
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
Definition: AMDGPUTargetTransformInfo.cpp:1394

llvm::GCNTTIImpl::getFlatAddressSpace
unsigned getFlatAddressSpace() const
Definition: AMDGPUTargetTransformInfo.h:202

llvm::GCNTTIImpl::getInlinerVectorBonusPercent
int getInlinerVectorBonusPercent() const
Definition: AMDGPUTargetTransformInfo.h:255

llvm::GCNTTIImpl::getVectorSplitCost
InstructionCost getVectorSplitCost()
Definition: AMDGPUTargetTransformInfo.h:235

llvm::GCNTTIImpl::getMinVectorRegisterBitWidth
unsigned getMinVectorRegisterBitWidth() const
Definition: AMDGPUTargetTransformInfo.cpp:340

llvm::GCNTTIImpl::getTgtMemIntrinsic
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const
Definition: AMDGPUTargetTransformInfo.cpp:520

llvm::GCNTTIImpl::getLoadVectorFactor
unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
Definition: AMDGPUTargetTransformInfo.cpp:352

llvm::GCNTTIImpl::getPrefetchDistance
unsigned getPrefetchDistance() const override
How much before a load we should place the prefetch instruction.
Definition: AMDGPUTargetTransformInfo.cpp:1426

llvm::GCNTTIImpl::rewriteIntrinsicWithAddressSpace
Value * rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const
Definition: AMDGPUTargetTransformInfo.cpp:1077

llvm::GCNTTIImpl::getMaximumVF
unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const
Definition: AMDGPUTargetTransformInfo.cpp:344

llvm::GCNTTIImpl::collectFlatAddressOperands
bool collectFlatAddressOperands(SmallVectorImpl< int > &OpIndexes, Intrinsic::ID IID) const
Definition: AMDGPUTargetTransformInfo.cpp:1063

llvm::GCNTTIImpl::adjustInliningThreshold
unsigned adjustInliningThreshold(const CallBase *CB) const
Definition: AMDGPUTargetTransformInfo.cpp:1342

llvm::GCNTTIImpl::areInlineCompatible
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
Definition: AMDGPUTargetTransformInfo.cpp:1225

llvm::GCNTTIImpl::getCallerAllocaCost
unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const
Definition: AMDGPUTargetTransformInfo.cpp:1353

llvm::GCNTTIImpl::getMemcpyLoopLoweringType
Type * getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, unsigned SrcAddrSpace, unsigned DestAddrSpace, Align SrcAlign, Align DestAlign, std::optional< uint32_t > AtomicElementSize) const
Definition: AMDGPUTargetTransformInfo.cpp:421

llvm::GCNTTIImpl::getMemcpyLoopResidualLoweringType
void getMemcpyLoopResidualLoweringType(SmallVectorImpl< Type * > &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, Align SrcAlign, Align DestAlign, std::optional< uint32_t > AtomicCpySize) const
Definition: AMDGPUTargetTransformInfo.cpp:465

llvm::GCNTTIImpl::getCFInstrCost
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: AMDGPUTargetTransformInfo.cpp:796

llvm::GCNTTIImpl::getLoadStoreVecRegBitWidth
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const
Definition: AMDGPUTargetTransformInfo.cpp:373

llvm::GCNTTIImpl::isSourceOfDivergence
bool isSourceOfDivergence(const Value *V) const
Definition: AMDGPUTargetTransformInfo.cpp:950

llvm::GCNTTIImpl::instCombineIntrinsic
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
Definition: AMDGPUInstCombineIntrinsic.cpp:485

llvm::GCNTTIImpl::canSimplifyLegacyMulToMul
bool canSimplifyLegacyMulToMul(const Instruction &I, const Value *Op0, const Value *Op1, InstCombiner &IC) const
Definition: AMDGPUInstCombineIntrinsic.cpp:330

llvm::GCNTTIImpl::getArithmeticInstrCost
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)
Definition: AMDGPUTargetTransformInfo.cpp:546

llvm::GCNTTIImpl::hasBranchDivergence
bool hasBranchDivergence(const Function *F=nullptr) const
Definition: AMDGPUTargetTransformInfo.cpp:312

llvm::GCNTTIImpl::getShuffleCost
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)
Definition: AMDGPUTargetTransformInfo.cpp:1146

llvm::GCNTTIImpl::getArithmeticReductionCost
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
Definition: AMDGPUTargetTransformInfo.cpp:827

llvm::InstCombiner
The core instruction combiner logic.
Definition: InstCombiner.h:48

llvm::InstructionCost
Definition: InstructionCost.h:29

llvm::Instruction
Definition: Instruction.h:68

llvm::IntrinsicCostAttributes
Definition: TargetTransformInfo.h:119

llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:48

llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67

llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:39

llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:32

llvm::SITargetLowering
Definition: SIISelLowering.h:31

llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:447

llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573

llvm::TargetLoweringBase
This base class for TargetLowering contains the SelectionDAG-independent parts that can be used from ...
Definition: TargetLowering.h:195

llvm::TargetSubtargetInfo
TargetSubtargetInfo - Generic base class for all target subtargets.
Definition: TargetSubtargetInfo.h:63

llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:212

llvm::TargetTransformInfo::TargetCostKind
TargetCostKind
The kind of cost model.
Definition: TargetTransformInfo.h:263

llvm::TargetTransformInfo::TCK_CodeSize
@ TCK_CodeSize
Instruction code size.
Definition: TargetTransformInfo.h:266

llvm::TargetTransformInfo::OP_None
@ OP_None
Definition: TargetTransformInfo.h:1122

llvm::TargetTransformInfo::RegisterKind
RegisterKind
Definition: TargetTransformInfo.h:1175

llvm::TargetTransformInfo::PopcntSupportKind
PopcntSupportKind
Flags indicating the kind of support for population count.
Definition: TargetTransformInfo.h:714

llvm::TargetTransformInfo::PSK_FastHardware
@ PSK_FastHardware
Definition: TargetTransformInfo.h:714

llvm::TargetTransformInfo::TCC_Basic
@ TCC_Basic
The cost of a typical 'add' instruction.
Definition: TargetTransformInfo.h:290

llvm::TargetTransformInfo::ShuffleKind
ShuffleKind
The various kinds of shuffle patterns for vector queries.
Definition: TargetTransformInfo.h:1093

llvm::TargetTransformInfo::OK_AnyValue
@ OK_AnyValue
Definition: TargetTransformInfo.h:1114

llvm::Triple
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44

llvm::TypeSize
Definition: TypeSize.h:334

llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45

llvm::Value
LLVM Value Representation.
Definition: Value.h:74

llvm::VectorType
Base class of all SIMD vector types.
Definition: DerivedTypes.h:427

llvm::AMDGPUAS::CONSTANT_ADDRESS_32BIT
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
Definition: AMDGPUAddrSpace.h:38

llvm::AMDGPUAS::REGION_ADDRESS
@ REGION_ADDRESS
Address space for region memory. (GDS)
Definition: AMDGPUAddrSpace.h:32

llvm::AMDGPUAS::LOCAL_ADDRESS
@ LOCAL_ADDRESS
Address space for local memory.
Definition: AMDGPUAddrSpace.h:35

llvm::AMDGPUAS::FLAT_ADDRESS
@ FLAT_ADDRESS
Address space for flat memory.
Definition: AMDGPUAddrSpace.h:30

llvm::AMDGPUAS::PRIVATE_ADDRESS
@ PRIVATE_ADDRESS
Address space for private memory.
Definition: AMDGPUAddrSpace.h:36

llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:395

llvm::AMDGPU::isFlatGlobalAddrSpace
bool isFlatGlobalAddrSpace(unsigned AS)
Definition: AMDGPUAddrSpace.h:86

llvm::AMDGPU::addrspacesMayAlias
static bool addrspacesMayAlias(unsigned AS1, unsigned AS2)
Definition: AMDGPU.h:475

llvm::AMDGPU::isExtendedGlobalAddrSpace
bool isExtendedGlobalAddrSpace(unsigned AS)
Definition: AMDGPUAddrSpace.h:91

llvm::Intrinsic::ID
unsigned ID
Definition: GenericSSAContext.h:28

llvm::TargetStackID::Value
Value
Definition: TargetFrameLowering.h:29

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18

llvm::Length
@ Length
Definition: DWP.cpp:480

llvm::isPowerOf2_32
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:293

llvm::VFParamKind::Vector
@ Vector

llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39

llvm::MemIntrinsicInfo
Information about a load/store intrinsic defined by the target.
Definition: TargetTransformInfo.h:71

llvm::TargetTransformInfo::OperandValueInfo
Definition: TargetTransformInfo.h:1130

llvm::TargetTransformInfo::PeelingPreferences
Definition: TargetTransformInfo.h:658

llvm::TargetTransformInfo::UnrollingPreferences
Parameters that control the generic loop unrolling transformation.
Definition: TargetTransformInfo.h:536