LLVM 20.0.0git
|
#include "Target/AMDGPU/AMDGPUTargetTransformInfo.h"
Definition at line 63 of file AMDGPUTargetTransformInfo.h.
|
explicit |
Definition at line 301 of file AMDGPUTargetTransformInfo.cpp.
References F, and llvm::DenormalMode::getPreserveSign().
Definition at line 198 of file AMDGPUTargetTransformInfo.h.
References llvm::AMDGPU::addrspacesMayAlias().
Definition at line 1342 of file AMDGPUTargetTransformInfo.cpp.
References adjustInliningThresholdUsingCallee(), ArgAllocaCost, llvm::BasicTTIImplBase< GCNTTIImpl >::DL, and getCallArgsTotalAllocaSize().
Definition at line 1225 of file AMDGPUTargetTransformInfo.cpp.
References llvm::TargetLoweringBase::getTargetMachine(), InlineMaxBB, and llvm::SIModeRegisterDefaults::isInlineCompatible().
Definition at line 213 of file AMDGPUTargetTransformInfo.h.
References llvm::AMDGPUAS::LOCAL_ADDRESS, llvm::AMDGPUAS::PRIVATE_ADDRESS, and llvm::AMDGPUAS::REGION_ADDRESS.
bool GCNTTIImpl::canSimplifyLegacyMulToMul | ( | const Instruction & | I, |
const Value * | Op0, | ||
const Value * | Op1, | ||
InstCombiner & | IC | ||
) | const |
Definition at line 330 of file AMDGPUInstCombineIntrinsic.cpp.
References llvm::InstCombiner::getSimplifyQuery(), llvm::SimplifyQuery::getWithInstruction(), I, llvm::isKnownNeverInfOrNaN(), llvm::PatternMatch::m_FiniteNonZero(), and llvm::PatternMatch::match().
Referenced by instCombineIntrinsic().
bool GCNTTIImpl::collectFlatAddressOperands | ( | SmallVectorImpl< int > & | OpIndexes, |
Intrinsic::ID | IID | ||
) | const |
Definition at line 1063 of file AMDGPUTargetTransformInfo.cpp.
References llvm::SmallVectorTemplateBase< T, bool >::push_back().
InstructionCost GCNTTIImpl::getArithmeticInstrCost | ( | unsigned | Opcode, |
Type * | Ty, | ||
TTI::TargetCostKind | CostKind, | ||
TTI::OperandValueInfo | Op1Info = {TTI::OK_AnyValue, TTI::OP_None} , |
||
TTI::OperandValueInfo | Op2Info = {TTI::OK_AnyValue, TTI::OP_None} , |
||
ArrayRef< const Value * > | Args = {} , |
||
const Instruction * | CxtI = nullptr |
||
) |
Definition at line 546 of file AMDGPUTargetTransformInfo.cpp.
References llvm::ISD::ADD, llvm::ISD::AND, CostKind, llvm::FAdd, llvm::ISD::FADD, llvm::FPOpFusion::Fast, llvm::ISD::FDIV, llvm::ISD::FMUL, llvm::ISD::FNEG, llvm::ISD::FREM, llvm::ISD::FSUB, llvm::BasicTTIImplBase< GCNTTIImpl >::getArithmeticInstrCost(), llvm::TargetLoweringBase::getTargetMachine(), llvm::AMDGPUSubtarget::has16BitInsts(), llvm::Instruction::hasAllowContract(), llvm::Instruction::hasApproxFunc(), llvm::AMDGPUSubtarget::hasMadMacF32Insts(), llvm::Value::hasOneUse(), llvm::GCNSubtarget::hasPackedFP32Ops(), llvm::GCNSubtarget::hasUsableDivScaleConditionOutput(), llvm::TargetLoweringBase::InstructionOpcodeToISD(), llvm::AMDGPUTargetLowering::isFNegFree(), llvm::PatternMatch::m_FPOne(), llvm::PatternMatch::match(), llvm::ISD::MUL, llvm::TargetMachine::Options, Options, llvm::ISD::OR, llvm::ISD::SHL, llvm::ISD::SRA, llvm::ISD::SRL, llvm::ISD::SUB, llvm::TargetTransformInfo::TCC_Free, llvm::TargetOptions::UnsafeFPMath, llvm::Value::user_begin(), and llvm::ISD::XOR.
InstructionCost GCNTTIImpl::getArithmeticReductionCost | ( | unsigned | Opcode, |
VectorType * | Ty, | ||
std::optional< FastMathFlags > | FMF, | ||
TTI::TargetCostKind | CostKind | ||
) |
Definition at line 827 of file AMDGPUTargetTransformInfo.cpp.
References CostKind, llvm::BasicTTIImplBase< GCNTTIImpl >::DL, llvm::BasicTTIImplBase< GCNTTIImpl >::getArithmeticReductionCost(), llvm::EVT::getScalarSizeInBits(), llvm::TargetLoweringBase::getValueType(), llvm::AMDGPUSubtarget::hasVOP3PInsts(), and llvm::TargetTransformInfo::requiresOrderedReduction().
|
inlineoverridevirtual |
Data cache line size for LoopDataPrefetch pass. Has no use before GFX12.
Reimplemented from llvm::BasicTTIImplBase< GCNTTIImpl >.
Definition at line 268 of file AMDGPUTargetTransformInfo.h.
unsigned GCNTTIImpl::getCallerAllocaCost | ( | const CallBase * | CB, |
const AllocaInst * | AI | ||
) | const |
Definition at line 1353 of file AMDGPUTargetTransformInfo.cpp.
References ArgAllocaCost, ArgAllocaCutoff, llvm::BasicTTIImplBase< GCNTTIImpl >::DL, llvm::AllocaInst::getAllocatedType(), getCallArgsTotalAllocaSize(), llvm::CallBase::getCalledFunction(), getInliningThresholdMultiplier(), llvm::DataLayout::getTypeAllocSize(), and llvm::none_of().
InstructionCost GCNTTIImpl::getCFInstrCost | ( | unsigned | Opcode, |
TTI::TargetCostKind | CostKind, | ||
const Instruction * | I = nullptr |
||
) |
Definition at line 796 of file AMDGPUTargetTransformInfo.cpp.
References assert(), CostKind, llvm::BasicTTIImplBase< GCNTTIImpl >::getCFInstrCost(), I, llvm::TargetTransformInfo::TCK_CodeSize, and llvm::TargetTransformInfo::TCK_SizeAndLatency.
|
inline |
Definition at line 202 of file AMDGPUTargetTransformInfo.h.
References llvm::AMDGPUAS::FLAT_ADDRESS.
|
inline |
Definition at line 255 of file AMDGPUTargetTransformInfo.h.
int GCNTTIImpl::getInliningLastCallToStaticBonus | ( | ) | const |
Definition at line 1337 of file AMDGPUTargetTransformInfo.cpp.
References llvm::TargetTransformInfoImplBase::getInliningLastCallToStaticBonus(), and getInliningThresholdMultiplier().
|
inline |
Definition at line 251 of file AMDGPUTargetTransformInfo.h.
Referenced by getCallerAllocaCost(), and getInliningLastCallToStaticBonus().
InstructionCost GCNTTIImpl::getIntrinsicInstrCost | ( | const IntrinsicCostAttributes & | ICA, |
TTI::TargetCostKind | CostKind | ||
) |
Definition at line 727 of file AMDGPUTargetTransformInfo.cpp.
References llvm::any_of(), assert(), CostKind, llvm::IntrinsicCostAttributes::getID(), llvm::BasicTTIImplBase< GCNTTIImpl >::getIntrinsicInstrCost(), llvm::IntrinsicCostAttributes::getReturnType(), llvm::AMDGPUSubtarget::has16BitInsts(), llvm::AMDGPUSubtarget::hasFastFMAF32(), llvm::GCNSubtarget::hasPackedFP32Ops(), intrinsicHasPackedVectorBenefit(), and RetTy.
Definition at line 373 of file AMDGPUTargetTransformInfo.cpp.
References llvm::AMDGPUAS::BUFFER_FAT_POINTER, llvm::AMDGPUAS::BUFFER_RESOURCE, llvm::AMDGPUAS::BUFFER_STRIDED_POINTER, llvm::AMDGPUAS::CONSTANT_ADDRESS, llvm::AMDGPUAS::CONSTANT_ADDRESS_32BIT, llvm::GCNSubtarget::getMaxPrivateElementSize(), llvm::AMDGPUAS::GLOBAL_ADDRESS, and llvm::AMDGPUAS::PRIVATE_ADDRESS.
unsigned GCNTTIImpl::getLoadVectorFactor | ( | unsigned | VF, |
unsigned | LoadSize, | ||
unsigned | ChainSizeInBytes, | ||
VectorType * | VecTy | ||
) | const |
Definition at line 352 of file AMDGPUTargetTransformInfo.cpp.
References llvm::Type::getScalarSizeInBits().
Definition at line 344 of file AMDGPUTargetTransformInfo.cpp.
References llvm::AMDGPUSubtarget::has16BitInsts(), and llvm::GCNSubtarget::hasPackedFP32Ops().
unsigned GCNTTIImpl::getMaxInterleaveFactor | ( | ElementCount | VF | ) |
Definition at line 511 of file AMDGPUTargetTransformInfo.cpp.
References llvm::ElementCount::isScalar().
int64_t GCNTTIImpl::getMaxMemIntrinsicInlineSizeThreshold | ( | ) | const |
Definition at line 415 of file AMDGPUTargetTransformInfo.cpp.
Type * GCNTTIImpl::getMemcpyLoopLoweringType | ( | LLVMContext & | Context, |
Value * | Length, | ||
unsigned | SrcAddrSpace, | ||
unsigned | DestAddrSpace, | ||
Align | SrcAlign, | ||
Align | DestAlign, | ||
std::optional< uint32_t > | AtomicElementSize | ||
) | const |
Definition at line 421 of file AMDGPUTargetTransformInfo.cpp.
References llvm::FixedVectorType::get(), llvm::Type::getInt16Ty(), llvm::Type::getInt32Ty(), llvm::Type::getIntNTy(), llvm::Length, llvm::AMDGPUAS::LOCAL_ADDRESS, MemcpyLoopUnroll, llvm::MinAlign(), and llvm::AMDGPUAS::REGION_ADDRESS.
void GCNTTIImpl::getMemcpyLoopResidualLoweringType | ( | SmallVectorImpl< Type * > & | OpsOut, |
LLVMContext & | Context, | ||
unsigned | RemainingBytes, | ||
unsigned | SrcAddrSpace, | ||
unsigned | DestAddrSpace, | ||
Align | SrcAlign, | ||
Align | DestAlign, | ||
std::optional< uint32_t > | AtomicCpySize | ||
) | const |
Definition at line 465 of file AMDGPUTargetTransformInfo.cpp.
References llvm::FixedVectorType::get(), llvm::Type::getInt16Ty(), llvm::Type::getInt32Ty(), llvm::Type::getInt64Ty(), llvm::Type::getInt8Ty(), llvm::TargetTransformInfoImplBase::getMemcpyLoopResidualLoweringType(), llvm::MinAlign(), and llvm::SmallVectorTemplateBase< T, bool >::push_back().
InstructionCost GCNTTIImpl::getMinMaxReductionCost | ( | Intrinsic::ID | IID, |
VectorType * | Ty, | ||
FastMathFlags | FMF, | ||
TTI::TargetCostKind | CostKind | ||
) |
Definition at line 845 of file AMDGPUTargetTransformInfo.cpp.
References CostKind, llvm::BasicTTIImplBase< GCNTTIImpl >::DL, llvm::BasicTTIImplBase< GCNTTIImpl >::getMinMaxReductionCost(), llvm::EVT::getScalarSizeInBits(), llvm::TargetLoweringBase::getValueType(), and llvm::AMDGPUSubtarget::hasVOP3PInsts().
unsigned GCNTTIImpl::getMinVectorRegisterBitWidth | ( | ) | const |
Definition at line 340 of file AMDGPUTargetTransformInfo.cpp.
Definition at line 316 of file AMDGPUTargetTransformInfo.cpp.
void GCNTTIImpl::getPeelingPreferences | ( | Loop * | L, |
ScalarEvolution & | SE, | ||
TTI::PeelingPreferences & | PP | ||
) |
Definition at line 1400 of file AMDGPUTargetTransformInfo.cpp.
References llvm::AMDGPUTTIImpl::getPeelingPreferences().
|
inline |
Definition at line 116 of file AMDGPUTargetTransformInfo.h.
References assert(), llvm::isPowerOf2_32(), and llvm::TargetTransformInfo::PSK_FastHardware.
|
overridevirtual |
How much before a load we should place the prefetch instruction.
This is currently measured in number of IR instructions.
Reimplemented from llvm::BasicTTIImplBase< GCNTTIImpl >.
Definition at line 1426 of file AMDGPUTargetTransformInfo.cpp.
References llvm::GCNSubtarget::hasPrefetch().
TypeSize GCNTTIImpl::getRegisterBitWidth | ( | TargetTransformInfo::RegisterKind | Vector | ) | const |
Definition at line 328 of file AMDGPUTargetTransformInfo.cpp.
References llvm::TypeSize::getFixed(), llvm::TypeSize::getScalable(), llvm::GCNSubtarget::hasPackedFP32Ops(), llvm_unreachable, llvm::TargetTransformInfo::RGK_FixedWidthVector, llvm::TargetTransformInfo::RGK_ScalableVector, and llvm::TargetTransformInfo::RGK_Scalar.
InstructionCost GCNTTIImpl::getShuffleCost | ( | TTI::ShuffleKind | Kind, |
VectorType * | Tp, | ||
ArrayRef< int > | Mask, | ||
TTI::TargetCostKind | CostKind, | ||
int | Index, | ||
VectorType * | SubTp, | ||
ArrayRef< const Value * > | Args = {} , |
||
const Instruction * | CxtI = nullptr |
||
) |
Definition at line 1146 of file AMDGPUTargetTransformInfo.cpp.
References llvm::alignTo(), CostKind, llvm::count_if(), llvm::BasicTTIImplBase< GCNTTIImpl >::DL, llvm::VectorType::getElementType(), llvm::GCNSubtarget::getGeneration(), llvm::BasicTTIImplBase< GCNTTIImpl >::getShuffleCost(), llvm::DataLayout::getTypeSizeInBits(), llvm::AMDGPUSubtarget::hasVOP3PInsts(), llvm::BasicTTIImplBase< GCNTTIImpl >::improveShuffleKindFromMask(), llvm::TargetTransformInfo::SK_Broadcast, llvm::TargetTransformInfo::SK_ExtractSubvector, llvm::TargetTransformInfo::SK_InsertSubvector, llvm::TargetTransformInfo::SK_PermuteSingleSrc, llvm::TargetTransformInfo::SK_PermuteTwoSrc, llvm::TargetTransformInfo::SK_Reverse, llvm::TargetTransformInfo::SK_Select, llvm::TargetTransformInfo::SK_Splice, and llvm::AMDGPUSubtarget::VOLCANIC_ISLANDS.
unsigned GCNTTIImpl::getStoreVectorFactor | ( | unsigned | VF, |
unsigned | StoreSize, | ||
unsigned | ChainSizeInBytes, | ||
VectorType * | VecTy | ||
) | const |
Definition at line 363 of file AMDGPUTargetTransformInfo.cpp.
bool GCNTTIImpl::getTgtMemIntrinsic | ( | IntrinsicInst * | Inst, |
MemIntrinsicInfo & | Info | ||
) | const |
Definition at line 520 of file AMDGPUTargetTransformInfo.cpp.
References llvm::CallBase::getArgOperand(), llvm::IntrinsicInst::getIntrinsicID(), Info, and llvm::SequentiallyConsistent.
void GCNTTIImpl::getUnrollingPreferences | ( | Loop * | L, |
ScalarEvolution & | SE, | ||
TTI::UnrollingPreferences & | UP, | ||
OptimizationRemarkEmitter * | ORE | ||
) |
Definition at line 1394 of file AMDGPUTargetTransformInfo.cpp.
References llvm::AMDGPUTTIImpl::getUnrollingPreferences().
InstructionCost GCNTTIImpl::getVectorInstrCost | ( | unsigned | Opcode, |
Type * | ValTy, | ||
TTI::TargetCostKind | CostKind, | ||
unsigned | Index, | ||
Value * | Op0, | ||
Value * | Op1 | ||
) |
Definition at line 859 of file AMDGPUTargetTransformInfo.cpp.
References CostKind, llvm::BasicTTIImplBase< GCNTTIImpl >::DL, llvm::DataLayout::getTypeSizeInBits(), llvm::BasicTTIImplBase< GCNTTIImpl >::getVectorInstrCost(), and llvm::AMDGPUSubtarget::has16BitInsts().
|
inline |
Definition at line 235 of file AMDGPUTargetTransformInfo.h.
Definition at line 312 of file AMDGPUTargetTransformInfo.cpp.
References F, and llvm::AMDGPUSubtarget::isSingleLaneExecution().
std::optional< Instruction * > GCNTTIImpl::instCombineIntrinsic | ( | InstCombiner & | IC, |
IntrinsicInst & | II | ||
) | const |
Definition at line 485 of file AMDGPUInstCombineIntrinsic.cpp.
References llvm::CallBase::addFnAttr(), llvm::FastMathFlags::allowContract(), assert(), llvm::APFloat::bitcastToAPInt(), llvm::InstCombiner::Builder, llvm::CallingConv::C, canContractSqrtToRsq(), canSimplifyLegacyMulToMul(), CC, llvm::ConstantFoldCompareInstOperands(), llvm::APFloat::convert(), llvm::IRBuilderBase::CreateAShr(), llvm::IRBuilderBase::CreateExtractVector(), llvm::IRBuilderBase::CreateFAddFMF(), llvm::IRBuilderBase::CreateFMulFMF(), llvm::IRBuilderBase::CreateICmpNE(), llvm::IRBuilderBase::CreateInsertElement(), llvm::IRBuilderBase::CreateIntrinsic(), llvm::IRBuilderBase::CreateLShr(), llvm::IRBuilderBase::CreateMaxNum(), llvm::IRBuilderBase::CreateMinNum(), llvm::IRBuilderBase::CreateSExt(), llvm::IRBuilderBase::CreateShl(), llvm::IRBuilderBase::CreateZExt(), defaultComponentBroadcast(), llvm::APFloat::divide(), llvm::BasicTTIImplBase< GCNTTIImpl >::DL, llvm::InstCombiner::eraseInstFromFunction(), llvm::Exponent, llvm::FAdd, llvm::fcAllFlags, llvm::CmpInst::FIRST_FCMP_PREDICATE, llvm::CmpInst::FIRST_ICMP_PREDICATE, fmed3AMDGCN(), llvm::FMul, llvm::AMDGPU::MFMAScaleFormats::FP4_E2M1, llvm::AMDGPU::MFMAScaleFormats::FP6_E2M3, llvm::AMDGPU::MFMAScaleFormats::FP6_E3M2, llvm::AMDGPU::MFMAScaleFormats::FP8_E4M3, llvm::AMDGPU::MFMAScaleFormats::FP8_E5M2, llvm::frexp(), llvm::MDNode::get(), llvm::MetadataAsValue::get(), llvm::MDString::get(), llvm::FixedVectorType::get(), llvm::UndefValue::get(), llvm::PoisonValue::get(), llvm::IRBuilderBase::getContext(), llvm::ConstantInt::getFalse(), llvm::FPMathOperator::getFastMathFlags(), llvm::Type::getFltSemantics(), llvm::Type::getHalfTy(), llvm::AMDGPU::getImageDimIntrinsicInfo(), llvm::ConstantFP::getInfinity(), llvm::IRBuilderBase::getInt64(), llvm::Type::getIntegerBitWidth(), llvm::IRBuilderBase::getIntNTy(), llvm::CmpInst::getInversePredicate(), llvm::ConstantFP::getNaN(), llvm::Constant::getNullValue(), llvm::Intrinsic::getOrInsertDeclaration(), llvm::APFloat::getQNaN(), llvm::APFloat::getSemantics(), llvm::InstCombiner::getSimplifyQuery(), llvm::CmpInst::getSwappedPredicate(), llvm::Value::getType(), llvm::ConstantInt::getValue(), llvm::ConstantFP::getValueAPF(), llvm::AMDGPUSubtarget::getWavefrontSize(), llvm::APFloat::getZero(), llvm::ConstantFP::getZero(), llvm::APInt::getZExtValue(), llvm::ConstantInt::getZExtValue(), llvm::GCNSubtarget::hasDefaultComponentBroadcast(), llvm::GCNSubtarget::hasDefaultComponentZero(), llvm::GCNSubtarget::hasMed3_16(), I, llvm::CmpInst::ICMP_EQ, llvm::CmpInst::ICMP_NE, Idx, llvm::APFloatBase::IEEEhalf(), llvm::APFloatBase::IEK_Inf, llvm::APFloatBase::IEK_NaN, II, llvm::Type::isDoubleTy(), llvm::Type::isFloatTy(), llvm::CmpInst::isFPPredicate(), llvm::Type::isHalfTy(), llvm::Type::isIntegerTy(), llvm::APFloat::isNaN(), llvm::Constant::isNullValue(), llvm::CmpInst::isSigned(), isTriviallyUniform(), llvm::SimplifyQuery::isUndefValue(), llvm::GCNSubtarget::isWave32(), llvm::GCNSubtarget::isWaveSizeKnown(), llvm::CmpInst::LAST_FCMP_PREDICATE, llvm::CmpInst::LAST_ICMP_PREDICATE, llvm_unreachable, llvm::PatternMatch::m_AllOnes(), llvm::PatternMatch::m_AnyZeroFP(), llvm::PatternMatch::m_Cmp(), llvm::PatternMatch::m_ConstantFP(), llvm::PatternMatch::m_FPExt(), llvm::PatternMatch::m_NaN(), llvm::PatternMatch::m_One(), llvm::PatternMatch::m_SExt(), llvm::PatternMatch::m_Value(), llvm::PatternMatch::m_Zero(), llvm::PatternMatch::m_ZExt(), llvm::PatternMatch::m_ZExtOrSExt(), llvm::Make_64(), llvm::APFloat::makeQuiet(), llvm::PatternMatch::match(), matchFPExtFromF16(), llvm::NearestTiesToEven, llvm::Offset, llvm::InstCombiner::replaceInstUsesWith(), llvm::InstCombiner::replaceOperand(), llvm::APFloatBase::rmNearestTiesToEven, llvm::APFloatBase::rmTowardZero, llvm::scalbn(), Signed, simplifyAMDGCNImageIntrinsic(), simplifyAMDGCNMemoryIntrinsicDemanded(), simplifyDemandedLaneMaskArg(), std::swap(), llvm::Value::takeName(), trimTrailingZerosInVector(), llvm::APInt::trunc(), X, and Y.
Definition at line 992 of file AMDGPUTargetTransformInfo.cpp.
References llvm::CallingConv::C, llvm::computeKnownBits(), llvm::BasicTTIImplBase< GCNTTIImpl >::DL, F, llvm::ExtractValueInst::getIndices(), llvm::AMDGPUSubtarget::getMaxWorkitemID(), llvm::User::getOperand(), llvm::AMDGPUSubtarget::getWavefrontSizeLog2(), llvm::CallBase::isInlineAsm(), isInlineAsmSourceOfDivergence(), llvm::AMDGPU::isIntrinsicAlwaysUniform(), llvm::PatternMatch::m_AShr(), llvm::PatternMatch::m_c_And(), llvm::PatternMatch::m_ConstantInt(), llvm::PatternMatch::m_LShr(), llvm::PatternMatch::m_Value(), llvm::PatternMatch::match(), and llvm::ArrayRef< T >::size().
bool GCNTTIImpl::isInlineAsmSourceOfDivergence | ( | const CallInst * | CI, |
ArrayRef< unsigned > | Indices = {} |
||
) | const |
Analyze if the results of inline asm are divergent.
If Indices
is empty, this is analyzing the collective result of all output registers. Otherwise, this is only querying a specific result index if this returns multiple registers in a struct.
Definition at line 891 of file AMDGPUTargetTransformInfo.cpp.
References llvm::TargetLowering::ComputeConstraintToUse(), llvm::BasicTTIImplBase< GCNTTIImpl >::DL, llvm::ArrayRef< T >::empty(), llvm::Instruction::getDataLayout(), llvm::SITargetLowering::getRegForInlineAsmConstraint(), llvm::GCNSubtarget::getRegisterInfo(), llvm::InlineAsm::isOutput, llvm::TargetLowering::ParseConstraints(), llvm::ArrayRef< T >::size(), and TRI.
Referenced by isAlwaysUniform(), and isSourceOfDivergence().
bool GCNTTIImpl::isLegalToVectorizeLoadChain | ( | unsigned | ChainSizeInBytes, |
Align | Alignment, | ||
unsigned | AddrSpace | ||
) | const |
Definition at line 403 of file AMDGPUTargetTransformInfo.cpp.
References isLegalToVectorizeMemChain().
bool GCNTTIImpl::isLegalToVectorizeMemChain | ( | unsigned | ChainSizeInBytes, |
Align | Alignment, | ||
unsigned | AddrSpace | ||
) | const |
Definition at line 390 of file AMDGPUTargetTransformInfo.cpp.
References llvm::GCNSubtarget::getMaxPrivateElementSize(), llvm::GCNSubtarget::hasUnalignedScratchAccessEnabled(), and llvm::AMDGPUAS::PRIVATE_ADDRESS.
Referenced by isLegalToVectorizeLoadChain(), and isLegalToVectorizeStoreChain().
bool GCNTTIImpl::isLegalToVectorizeStoreChain | ( | unsigned | ChainSizeInBytes, |
Align | Alignment, | ||
unsigned | AddrSpace | ||
) | const |
Definition at line 409 of file AMDGPUTargetTransformInfo.cpp.
References isLegalToVectorizeMemChain().
bool GCNTTIImpl::isProfitableToSinkOperands | ( | Instruction * | I, |
SmallVectorImpl< Use * > & | Ops | ||
) | const |
Whether it is profitable to sink the operands of an Instruction I to the basic block of I.
This helps using several modifiers (like abs and neg) more often.
Definition at line 1209 of file AMDGPUTargetTransformInfo.cpp.
References llvm::any_of(), llvm::SmallVectorBase< Size_T >::empty(), I, llvm::PatternMatch::m_FAbs(), llvm::PatternMatch::m_FNeg(), llvm::PatternMatch::m_Value(), llvm::PatternMatch::match(), and llvm::SmallVectorTemplateBase< T, bool >::push_back().
bool GCNTTIImpl::isReadRegisterSourceOfDivergence | ( | const IntrinsicInst * | ReadReg | ) | const |
Definition at line 927 of file AMDGPUTargetTransformInfo.cpp.
References llvm::CallBase::getArgOperand(), llvm::Value::getType(), llvm::MVT::getVT(), and RegName.
Referenced by isSourceOfDivergence().
Definition at line 950 of file AMDGPUTargetTransformInfo.cpp.
References A, llvm::AMDGPUAS::FLAT_ADDRESS, llvm::AMDGPU::isArgPassedInSGPR(), isInlineAsmSourceOfDivergence(), llvm::AMDGPU::isIntrinsicSourceOfDivergence(), isReadRegisterSourceOfDivergence(), and llvm::AMDGPUAS::PRIVATE_ADDRESS.
Value * GCNTTIImpl::rewriteIntrinsicWithAddressSpace | ( | IntrinsicInst * | II, |
Value * | OldV, | ||
Value * | NewV | ||
) | const |
Definition at line 1077 of file AMDGPUTargetTransformInfo.cpp.
References B, llvm::computeKnownBits(), llvm::KnownBits::countMinLeadingOnes(), llvm::BasicTTIImplBase< GCNTTIImpl >::DL, llvm::Type::getContext(), llvm::ConstantInt::getFalse(), llvm::Intrinsic::getOrInsertDeclaration(), llvm::Type::getPointerAddressSpace(), llvm::DataLayout::getPointerSizeInBits(), llvm::TargetLoweringBase::getTargetMachine(), llvm::ConstantInt::getTrue(), llvm::Value::getType(), II, llvm::AMDGPU::isExtendedGlobalAddrSpace(), llvm::AMDGPUAS::LOCAL_ADDRESS, and llvm::AMDGPUAS::PRIVATE_ADDRESS.
AS
. Reimplemented from llvm::BasicTTIImplBase< GCNTTIImpl >.
Definition at line 1430 of file AMDGPUTargetTransformInfo.cpp.
References llvm::AMDGPU::isFlatGlobalAddrSpace().
bool GCNTTIImpl::simplifyDemandedLaneMaskArg | ( | InstCombiner & | IC, |
IntrinsicInst & | II, | ||
unsigned | LaneArgIdx | ||
) | const |
Simplify a lane index operand (e.g.
llvm.amdgcn.readlane src1).
The instruction only reads the low 5 bits for wave32, and 6 bits for wave64.
Definition at line 456 of file AMDGPUInstCombineIntrinsic.cpp.
References llvm::KnownBits::getConstant(), llvm::Value::getType(), llvm::AMDGPUSubtarget::getWavefrontSizeLog2(), II, llvm::KnownBits::isConstant(), and llvm::InstCombiner::SimplifyDemandedBits().
Referenced by instCombineIntrinsic().
std::optional< Value * > GCNTTIImpl::simplifyDemandedVectorEltsIntrinsic | ( | InstCombiner & | IC, |
IntrinsicInst & | II, | ||
APInt | DemandedElts, | ||
APInt & | UndefElts, | ||
APInt & | UndefElts2, | ||
APInt & | UndefElts3, | ||
std::function< void(Instruction *, unsigned, APInt, APInt &)> | SimplifyAndSetOp | ||
) | const |
Definition at line 1538 of file AMDGPUInstCombineIntrinsic.cpp.
References II, and simplifyAMDGCNMemoryIntrinsicDemanded().