doxygen/NVPTXTargetTransformInfo_8h_source.html

//===-- NVPTXTargetTransformInfo.h - NVPTX specific TTI ---------*- C++ -*-===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

/// \file

/// This file a TargetTransformInfo::Concept conforming object specific to the

/// NVPTX target machine. It uses the target's detailed information to

/// provide more precise answers to certain TTI queries, while letting the

/// target independent and default TTI implementations handle the rest.

///

//===----------------------------------------------------------------------===//


#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXTARGETTRANSFORMINFO_H

#define LLVM_LIB_TARGET_NVPTX_NVPTXTARGETTRANSFORMINFO_H


#include "NVPTXTargetMachine.h"

#include "MCTargetDesc/NVPTXBaseInfo.h"

#include "llvm/Analysis/TargetTransformInfo.h"

#include "llvm/CodeGen/BasicTTIImpl.h"

#include "llvm/CodeGen/TargetLowering.h"

#include <optional>


namespace llvm {


class NVPTXTTIImpl : public BasicTTIImplBase<NVPTXTTIImpl> {

  typedef BasicTTIImplBase<NVPTXTTIImpl> BaseT;

  typedef TargetTransformInfo TTI;

  friend BaseT;


  const NVPTXSubtarget *ST;

  const NVPTXTargetLowering *TLI;


  const NVPTXSubtarget *getST() const { return ST; };

  const NVPTXTargetLowering *getTLI() const { return TLI; };


public:

  explicit NVPTXTTIImpl(const NVPTXTargetMachine *TM, const Function &F)

      : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl()),

        TLI(ST->getTargetLowering()) {}


  bool hasBranchDivergence(const Function *F = nullptr) { return true; }


  bool isSourceOfDivergence(const Value *V);


  unsigned getFlatAddressSpace() const {

    return AddressSpace::ADDRESS_SPACE_GENERIC;

  }


  bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const {

    return AS != AddressSpace::ADDRESS_SPACE_SHARED &&

           AS != AddressSpace::ADDRESS_SPACE_LOCAL && AS != ADDRESS_SPACE_PARAM;

  }


  std::optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,

                                                    IntrinsicInst &II) const;


  // Loads and stores can be vectorized if the alignment is at least as big as

  // the load/store we want to vectorize.

  bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,

                                   unsigned AddrSpace) const {

    return Alignment >= ChainSizeInBytes;

  }

  bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,

                                    unsigned AddrSpace) const {

    return isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment, AddrSpace);

  }


  // NVPTX has infinite registers of all kinds, but the actual machine doesn't.

  // We conservatively return 1 here which is just enough to enable the

  // vectorizers but disables heuristics based on the number of registers.

  // FIXME: Return a more reasonable number, while keeping an eye on

  // LoopVectorizer's unrolling heuristics.

  unsigned getNumberOfRegisters(bool Vector) const { return 1; }


  // Only <2 x half> should be vectorized, so always return 32 for the vector

  // register size.

  TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {

    return TypeSize::getFixed(32);

  }

  unsigned getMinVectorRegisterBitWidth() const { return 32; }


  // We don't want to prevent inlining because of target-cpu and -features

  // attributes that were added to newer versions of LLVM/Clang: There are

  // no incompatible functions in PTX, ptxas will throw errors in such cases.

  bool areInlineCompatible(const Function *Caller,

                           const Function *Callee) const {

    return true;

  }


  // Increase the inlining cost threshold by a factor of 11, reflecting that

  // calls are particularly expensive in NVPTX.

  unsigned getInliningThresholdMultiplier() const { return 11; }


  InstructionCost getArithmeticInstrCost(

      unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,

      TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None},

      TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None},

      ArrayRef<const Value *> Args = std::nullopt,

      const Instruction *CxtI = nullptr);


  void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,

                               TTI::UnrollingPreferences &UP,

                               OptimizationRemarkEmitter *ORE);


  void getPeelingPreferences(Loop *L, ScalarEvolution &SE,

                             TTI::PeelingPreferences &PP);


  bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) {

    // Volatile loads/stores are only supported for shared and global address

    // spaces, or for generic AS that maps to them.

    if (!(AddrSpace == llvm::ADDRESS_SPACE_GENERIC ||

          AddrSpace == llvm::ADDRESS_SPACE_GLOBAL ||

          AddrSpace == llvm::ADDRESS_SPACE_SHARED))

      return false;


    switch(I->getOpcode()){

    default:

      return false;

    case Instruction::Load:

    case Instruction::Store:

      return true;

    }

  }

};


} // end namespace llvm


#endif

getParent
static const Function * getParent(const Value *V)
Definition: BasicAliasAnalysis.cpp:849

BasicTTIImpl.h
This file provides a helper that implements much of the TTI interface in terms of the target-independ...

CostKind
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))

F
#define F(x, y, z)
Definition: MD5.cpp:55

I
#define I(x, y, z)
Definition: MD5.cpp:58

NVPTXBaseInfo.h

NVPTXTargetMachine.h

TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47

TargetLowering.h
This file describes how to lower LLVM code to machine code.

TargetTransformInfo.h
This pass exposes codegen information to IR-level passes.

llvm::BasicTTIImplBase
Base class which can be used to help build a TTI implementation.
Definition: BasicTTIImpl.h:80

llvm::Function
Definition: Function.h:62

llvm::InstCombiner
The core instruction combiner logic.
Definition: InstCombiner.h:47

llvm::InstructionCost
Definition: InstructionCost.h:29

llvm::Instruction
Definition: Instruction.h:49

llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:47

llvm::NVPTXSubtarget
Definition: NVPTXSubtarget.h:31

llvm::NVPTXTTIImpl
Definition: NVPTXTargetTransformInfo.h:28

llvm::NVPTXTTIImpl::isLegalToVectorizeStoreChain
bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
Definition: NVPTXTargetTransformInfo.h:66

llvm::NVPTXTTIImpl::canHaveNonUndefGlobalInitializerInAddressSpace
bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const
Definition: NVPTXTargetTransformInfo.h:52

llvm::NVPTXTTIImpl::getUnrollingPreferences
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
Definition: NVPTXTargetTransformInfo.cpp:427

llvm::NVPTXTTIImpl::areInlineCompatible
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
Definition: NVPTXTargetTransformInfo.h:88

llvm::NVPTXTTIImpl::isLegalToVectorizeLoadChain
bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
Definition: NVPTXTargetTransformInfo.h:62

llvm::NVPTXTTIImpl::hasBranchDivergence
bool hasBranchDivergence(const Function *F=nullptr)
Definition: NVPTXTargetTransformInfo.h:44

llvm::NVPTXTTIImpl::NVPTXTTIImpl
NVPTXTTIImpl(const NVPTXTargetMachine *TM, const Function &F)
Definition: NVPTXTargetTransformInfo.h:40

llvm::NVPTXTTIImpl::instCombineIntrinsic
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
Definition: NVPTXTargetTransformInfo.cpp:390

llvm::NVPTXTTIImpl::getMinVectorRegisterBitWidth
unsigned getMinVectorRegisterBitWidth() const
Definition: NVPTXTargetTransformInfo.h:83

llvm::NVPTXTTIImpl::getPeelingPreferences
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
Definition: NVPTXTargetTransformInfo.cpp:440

llvm::NVPTXTTIImpl::getFlatAddressSpace
unsigned getFlatAddressSpace() const
Definition: NVPTXTargetTransformInfo.h:48

llvm::NVPTXTTIImpl::hasVolatileVariant
bool hasVolatileVariant(Instruction *I, unsigned AddrSpace)
Definition: NVPTXTargetTransformInfo.h:111

llvm::NVPTXTTIImpl::getArithmeticInstrCost
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr)
Definition: NVPTXTargetTransformInfo.cpp:397

llvm::NVPTXTTIImpl::getNumberOfRegisters
unsigned getNumberOfRegisters(bool Vector) const
Definition: NVPTXTargetTransformInfo.h:76

llvm::NVPTXTTIImpl::getRegisterBitWidth
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const
Definition: NVPTXTargetTransformInfo.h:80

llvm::NVPTXTTIImpl::getInliningThresholdMultiplier
unsigned getInliningThresholdMultiplier() const
Definition: NVPTXTargetTransformInfo.h:95

llvm::NVPTXTTIImpl::isSourceOfDivergence
bool isSourceOfDivergence(const Value *V)
Definition: NVPTXTargetTransformInfo.cpp:72

llvm::NVPTXTargetLowering
Definition: NVPTXISelLowering.h:451

llvm::NVPTXTargetMachine
NVPTXTargetMachine.
Definition: NVPTXTargetMachine.h:25

llvm::TargetTransformInfoImplBase::getDataLayout
const DataLayout & getDataLayout() const
Definition: TargetTransformInfoImpl.h:47

llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:213

llvm::TargetTransformInfo::TargetCostKind
TargetCostKind
The kind of cost model.
Definition: TargetTransformInfo.h:258

llvm::TargetTransformInfo::OP_None
@ OP_None
Definition: TargetTransformInfo.h:1076

llvm::TargetTransformInfo::RegisterKind
RegisterKind
Definition: TargetTransformInfo.h:1125

llvm::TargetTransformInfo::OK_AnyValue
@ OK_AnyValue
Definition: TargetTransformInfo.h:1068

llvm::TypeSize
Definition: TypeSize.h:319

llvm::TypeSize::getFixed
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:330

llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45

llvm::Value
LLVM Value Representation.
Definition: Value.h:74

llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:395

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18

llvm::ADDRESS_SPACE_GENERIC
@ ADDRESS_SPACE_GENERIC
Definition: NVPTXBaseInfo.h:22

llvm::ADDRESS_SPACE_LOCAL
@ ADDRESS_SPACE_LOCAL
Definition: NVPTXBaseInfo.h:26

llvm::ADDRESS_SPACE_GLOBAL
@ ADDRESS_SPACE_GLOBAL
Definition: NVPTXBaseInfo.h:23

llvm::ADDRESS_SPACE_PARAM
@ ADDRESS_SPACE_PARAM
Definition: NVPTXBaseInfo.h:29

llvm::ADDRESS_SPACE_SHARED
@ ADDRESS_SPACE_SHARED
Definition: NVPTXBaseInfo.h:24

llvm::VFParamKind::Vector
@ Vector

llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39

llvm::TargetTransformInfo::OperandValueInfo
Definition: TargetTransformInfo.h:1084