docs/doxygen/R600TargetTransformInfo_8cpp_source.html

//===- R600TargetTransformInfo.cpp - AMDGPU specific TTI pass -----------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

// \file

// This file implements a TargetTransformInfo analysis pass specific to the

// R600 target machine. It uses the target's detailed information to provide

// more precise answers to certain TTI queries, while letting the target

// independent and default TTI implementations handle the rest.

//

//===----------------------------------------------------------------------===//


#include "R600TargetTransformInfo.h"

#include "AMDGPU.h"

#include "AMDGPUTargetMachine.h"

#include "R600Subtarget.h"


using namespace llvm;


#define DEBUG_TYPE "R600tti"


R600TTIImpl::R600TTIImpl(const AMDGPUTargetMachine *TM, const Function &F)

    : BaseT(TM, F.getDataLayout()),

      ST(static_cast<const R600Subtarget *>(TM->getSubtargetImpl(F))),

      TLI(ST->getTargetLowering()), CommonTTI(TM, F) {}


unsigned R600TTIImpl::getHardwareNumberOfRegisters(bool Vec) const {

  return 4 * 128; // XXX - 4 channels. Should these count as vector instead?

}


unsigned R600TTIImpl::getNumberOfRegisters(unsigned ClassID) const {

  bool Vec = ClassID == 1;

  return getHardwareNumberOfRegisters(Vec);

}


TypeSize


R600TTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {

  return TypeSize::getFixed(32);

}


unsigned R600TTIImpl::getMinVectorRegisterBitWidth() const { return 32; }


unsigned R600TTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const {

  if (AddrSpace == AMDGPUAS::GLOBAL_ADDRESS ||

      AddrSpace == AMDGPUAS::CONSTANT_ADDRESS)

    return 128;

  if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||

      AddrSpace == AMDGPUAS::REGION_ADDRESS)

    return 64;

  if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS)

    return 32;


  if ((AddrSpace == AMDGPUAS::PARAM_D_ADDRESS ||

       AddrSpace == AMDGPUAS::PARAM_I_ADDRESS ||

       (AddrSpace >= AMDGPUAS::CONSTANT_BUFFER_0 &&

        AddrSpace <= AMDGPUAS::CONSTANT_BUFFER_15)))

    return 128;

  llvm_unreachable("unhandled address space");

}


bool R600TTIImpl::isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,

                                             Align Alignment,

                                             unsigned AddrSpace) const {

  // We allow vectorization of flat stores, even though we may need to decompose

  // them later if they may access private memory. We don't have enough context

  // here, and legalization can handle it.

  return (AddrSpace != AMDGPUAS::PRIVATE_ADDRESS);

}


bool R600TTIImpl::isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,

                                              Align Alignment,

                                              unsigned AddrSpace) const {

  return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace);

}


bool R600TTIImpl::isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,

                                               Align Alignment,

                                               unsigned AddrSpace) const {

  return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace);

}


unsigned R600TTIImpl::getMaxInterleaveFactor(ElementCount VF) const {

  // Disable unrolling if the loop is not vectorized.

  // TODO: Enable this again.

  if (VF.isScalar())

    return 1;


  return 8;

}


InstructionCost R600TTIImpl::getCFInstrCost(unsigned Opcode,

                                            TTI::TargetCostKind CostKind,

                                            const Instruction *I) const {

  if (CostKind == TTI::TCK_CodeSize || CostKind == TTI::TCK_SizeAndLatency)

    return Opcode == Instruction::PHI ? 0 : 1;


  // XXX - For some reason this isn't called for switch.

  switch (Opcode) {

  case Instruction::Br:

  case Instruction::Ret:

    return 10;

  default:

    return BaseT::getCFInstrCost(Opcode, CostKind, I);

  }

}


InstructionCost R600TTIImpl::getVectorInstrCost(

    unsigned Opcode, Type *ValTy, TTI::TargetCostKind CostKind, unsigned Index,

    const Value *Op0, const Value *Op1, TTI::VectorInstrContext VIC) const {

  switch (Opcode) {

  case Instruction::ExtractElement:

  case Instruction::InsertElement: {

    unsigned EltSize =

        DL.getTypeSizeInBits(cast<VectorType>(ValTy)->getElementType());

    if (EltSize < 32) {

      return BaseT::getVectorInstrCost(Opcode, ValTy, CostKind, Index, Op0, Op1,

                                       VIC);

    }


    // Extracts are just reads of a subregister, so are free. Inserts are

    // considered free because we don't want to have any cost for scalarizing

    // operations, and we don't have to copy into a different register class.


    // Dynamic indexing isn't free and is best avoided.

    return Index == ~0u ? 2 : 0;

  }

  default:

    return BaseT::getVectorInstrCost(Opcode, ValTy, CostKind, Index, Op0, Op1,

                                     VIC);

  }

}


void R600TTIImpl::getUnrollingPreferences(

    Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP,

    OptimizationRemarkEmitter *ORE) const {

  CommonTTI.getUnrollingPreferences(L, SE, UP, ORE);

}


void R600TTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,

                                        TTI::PeelingPreferences &PP) const {

  CommonTTI.getPeelingPreferences(L, SE, PP);

}


const
aarch64 promote const
Definition AArch64PromoteConstant.cpp:228

AMDGPUTargetMachine.h
The AMDGPU TargetMachine interface definition for hw codegen targets.

AMDGPU.h

CostKind
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))

F
#define F(x, y, z)
Definition MD5.cpp:54

I
#define I(x, y, z)
Definition MD5.cpp:57

R600Subtarget.h
AMDGPU R600 specific subclass of TargetSubtarget.

R600TargetTransformInfo.h
This file a TargetTransformInfoImplBase conforming object specific to the R600 target machine.

llvm::AMDGPUTargetMachine
Definition AMDGPUTargetMachine.h:30

llvm::BasicTTIImplBase< R600TTIImpl >::getCFInstrCost
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
Definition BasicTTIImpl.h:1383

llvm::BasicTTIImplBase< R600TTIImpl >::getVectorInstrCost
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
Definition BasicTTIImpl.h:1441

llvm::BasicTTIImplBase< R600TTIImpl >::DL
const DataLayout & DL

llvm::ElementCount
Definition TypeSize.h:298

llvm::ElementCount::isScalar
constexpr bool isScalar() const
Exactly one element.
Definition TypeSize.h:320

llvm::Function
Definition Function.h:65

llvm::InstructionCost
Definition InstructionCost.h:30

llvm::Instruction
Definition Instruction.h:69

llvm::Loop
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40

llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition OptimizationRemarkEmitter.h:33

llvm::R600Subtarget
Definition R600Subtarget.h:29

llvm::R600TTIImpl::isLegalToVectorizeMemChain
bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
Definition R600TargetTransformInfo.cpp:65

llvm::R600TTIImpl::getVectorInstrCost
InstructionCost getVectorInstrCost(unsigned Opcode, Type *ValTy, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
Definition R600TargetTransformInfo.cpp:111

llvm::R600TTIImpl::getUnrollingPreferences
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
Definition R600TargetTransformInfo.cpp:137

llvm::R600TTIImpl::isLegalToVectorizeStoreChain
bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const override
Definition R600TargetTransformInfo.cpp:80

llvm::R600TTIImpl::getMinVectorRegisterBitWidth
unsigned getMinVectorRegisterBitWidth() const override
Definition R600TargetTransformInfo.cpp:45

llvm::R600TTIImpl::getMaxInterleaveFactor
unsigned getMaxInterleaveFactor(ElementCount VF) const override
Definition R600TargetTransformInfo.cpp:86

llvm::R600TTIImpl::getLoadStoreVecRegBitWidth
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override
Definition R600TargetTransformInfo.cpp:47

llvm::R600TTIImpl::R600TTIImpl
R600TTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
Definition R600TargetTransformInfo.cpp:26

llvm::R600TTIImpl::getHardwareNumberOfRegisters
unsigned getHardwareNumberOfRegisters(bool Vec) const
Definition R600TargetTransformInfo.cpp:31

llvm::R600TTIImpl::getCFInstrCost
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
Definition R600TargetTransformInfo.cpp:95

llvm::R600TTIImpl::isLegalToVectorizeLoadChain
bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const override
Definition R600TargetTransformInfo.cpp:74

llvm::R600TTIImpl::getPeelingPreferences
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
Definition R600TargetTransformInfo.cpp:143

llvm::R600TTIImpl::getRegisterBitWidth
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind Vector) const override
Definition R600TargetTransformInfo.cpp:41

llvm::R600TTIImpl::getNumberOfRegisters
unsigned getNumberOfRegisters(unsigned ClassID) const override
Definition R600TargetTransformInfo.cpp:35

llvm::ScalarEvolution
The main scalar evolution driver.
Definition ScalarEvolution.h:457

llvm::TargetTransformInfoImplBase::getDataLayout
virtual const DataLayout & getDataLayout() const
Definition TargetTransformInfoImpl.h:51

llvm::TargetTransformInfo::VectorInstrContext
VectorInstrContext
Represents a hint about the context in which an insert/extract is used.
Definition TargetTransformInfo.h:1073

llvm::TargetTransformInfo::TargetCostKind
TargetCostKind
The kind of cost model.
Definition TargetTransformInfo.h:338

llvm::TargetTransformInfo::TCK_CodeSize
@ TCK_CodeSize
Instruction code size.
Definition TargetTransformInfo.h:341

llvm::TargetTransformInfo::TCK_SizeAndLatency
@ TCK_SizeAndLatency
The weighted sum of size and latency.
Definition TargetTransformInfo.h:342

llvm::TargetTransformInfo::RegisterKind
RegisterKind
Definition TargetTransformInfo.h:1346

llvm::TypeSize
Definition TypeSize.h:332

llvm::TypeSize::getFixed
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343

llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45

llvm::Value
LLVM Value Representation.
Definition Value.h:75

llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition ErrorHandling.h:164

llvm::AMDGPUAS::CONSTANT_BUFFER_15
@ CONSTANT_BUFFER_15
Definition AMDGPUAddrSpace.h:80

llvm::AMDGPUAS::PARAM_D_ADDRESS
@ PARAM_D_ADDRESS
end Internal address spaces.
Definition AMDGPUAddrSpace.h:55

llvm::AMDGPUAS::REGION_ADDRESS
@ REGION_ADDRESS
Address space for region memory. (GDS)
Definition AMDGPUAddrSpace.h:34

llvm::AMDGPUAS::CONSTANT_BUFFER_0
@ CONSTANT_BUFFER_0
Definition AMDGPUAddrSpace.h:65

llvm::AMDGPUAS::LOCAL_ADDRESS
@ LOCAL_ADDRESS
Address space for local memory.
Definition AMDGPUAddrSpace.h:36

llvm::AMDGPUAS::PARAM_I_ADDRESS
@ PARAM_I_ADDRESS
Address space for indirect addressable parameter memory (VTX1).
Definition AMDGPUAddrSpace.h:57

llvm::AMDGPUAS::CONSTANT_ADDRESS
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
Definition AMDGPUAddrSpace.h:37

llvm::AMDGPUAS::GLOBAL_ADDRESS
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
Definition AMDGPUAddrSpace.h:33

llvm::AMDGPUAS::PRIVATE_ADDRESS
@ PRIVATE_ADDRESS
Address space for private memory.
Definition AMDGPUAddrSpace.h:38

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26

llvm::cast
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559

llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39

llvm::TargetTransformInfo::PeelingPreferences
Definition TargetTransformInfo.h:769

llvm::TargetTransformInfo::UnrollingPreferences
Parameters that control the generic loop unrolling transformation.
Definition TargetTransformInfo.h:639