|
LLVM 23.0.0git
|
#include "AMDGPUInstrInfo.h"#include "AMDGPUTargetTransformInfo.h"#include "GCNSubtarget.h"#include "SIDefines.h"#include "llvm/ADT/FloatingPointMode.h"#include "llvm/ADT/STLExtras.h"#include "llvm/ADT/Sequence.h"#include "llvm/Analysis/ConstantFolding.h"#include "llvm/Analysis/ValueTracking.h"#include "llvm/IR/Constants.h"#include "llvm/IR/Dominators.h"#include "llvm/IR/IntrinsicsAMDGPU.h"#include "llvm/Support/MathExtras.h"#include "llvm/Transforms/InstCombine/InstCombiner.h"#include <optional>#include "AMDGPUGenSearchableTables.inc"Go to the source code of this file.
Macros | |
| #define | DEBUG_TYPE "AMDGPUtti" |
| #define | GET_AMDGPUImageDMaskIntrinsicTable_IMPL |
Functions | |
| static APFloat | fmed3AMDGCN (const APFloat &Src0, const APFloat &Src1, const APFloat &Src2) |
| static bool | canSafelyConvertTo16Bit (Value &V, bool IsFloat) |
| static Value * | convertTo16Bit (Value &V, InstCombiner::BuilderTy &Builder) |
| static std::optional< Instruction * > | modifyIntrinsicCall (IntrinsicInst &OldIntr, Instruction &InstToReplace, unsigned NewIntr, InstCombiner &IC, std::function< void(SmallVectorImpl< Value * > &, SmallVectorImpl< Type * > &)> Func) |
| Applies Func(OldIntr.Args, OldIntr.ArgTys), creates intrinsic call with modified arguments (based on OldIntr) and replaces InstToReplace with this newly created intrinsic call. | |
| static std::optional< Instruction * > | simplifyAMDGCNImageIntrinsic (const GCNSubtarget *ST, const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr, IntrinsicInst &II, InstCombiner &IC) |
| static Value * | matchFPExtFromF16 (Value *Arg) |
| Match an fpext from half to float, or a constant we can convert. | |
| static APInt | trimTrailingZerosInVector (InstCombiner &IC, Value *UseV, Instruction *I) |
| static APInt | defaultComponentBroadcast (Value *V) |
| static Value * | simplifyAMDGCNMemoryIntrinsicDemanded (InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, int DMaskIdx, bool IsLoad) |
| Implement SimplifyDemandedVectorElts for amdgcn buffer and image intrinsics. | |
| static bool | canContractSqrtToRsq (const FPMathOperator *SqrtOp) |
| Return true if it's legal to contract llvm.amdgcn.rcp(llvm.sqrt) | |
| static bool | isTriviallyUniform (const Use &U) |
| Return true if we can easily prove that use U is uniform. | |
| static CallInst * | rewriteCall (IRBuilderBase &B, CallInst &Old, Function &NewCallee, ArrayRef< Value * > Ops) |
| static bool | isThreadID (const GCNSubtarget &ST, Value *V) |
| static std::optional< unsigned > | evalLaneExpr (Value *V, unsigned Lane, const GCNSubtarget &ST, const DataLayout &DL, unsigned Depth=0) |
| Evaluate V as a function of the lane ID and return its value on Lane, or std::nullopt if V is not a closed-form expression of the lane ID. | |
| static bool | tryBuildShuffleMap (Value *Index, const GCNSubtarget &ST, SmallVectorImpl< uint8_t > &Ids, const DataLayout &DL) |
| Build the per-lane shuffle map by evaluating Index for every lane in the wave. | |
| template<unsigned Period> | |
| static bool | hasPeriodicLayout (ArrayRef< uint8_t > Ids) |
| Lanes are partitioned into groups of Period; each group is a translated copy of the first: Ids[I] = Ids[I % Period] + (I & ~(Period - 1)). | |
| template<unsigned N> | |
| static bool | isRowPattern (ArrayRef< uint8_t > Ids) |
| Match an N-lane row pattern: each lane in [0, N) reads from a source lane in the same N-lane row, and the pattern repeats periodically across rows. | |
| static std::optional< unsigned > | matchQuadPermPattern (ArrayRef< uint8_t > Ids) |
| Match a 4-lane (quad) permutation, encoded as the v_mov_b32_dpp QUAD_PERM control word: bits[1:0]=Ids[0], [3:2]=Ids[1], [5:4]=Ids[2], [7:6]=Ids[3]. | |
| template<unsigned N> | |
| static bool | matchMirrorPattern (ArrayRef< uint8_t > Ids) |
| Match an N-lane reversal (mirror) pattern. | |
| static std::optional< unsigned > | matchRowRotatePattern (ArrayRef< uint8_t > Ids) |
| Match a 16-lane cyclic rotation; returns the rotation amount in [1, 15]. | |
| static std::optional< unsigned > | matchRowSharePattern (ArrayRef< uint8_t > Ids) |
| Match a row-share pattern: all 16 lanes of each row read the same source lane. | |
| static std::optional< unsigned > | matchRowXMaskPattern (ArrayRef< uint8_t > Ids) |
| Match an XOR mask pattern within each 16-lane row: Ids[J] == Mask ^ J, with Mask in [1, 15]. | |
| static std::optional< unsigned > | matchHalfRowPermPattern (ArrayRef< uint8_t > Ids) |
| Match an 8-lane arbitrary permutation, encoded as the v_mov_b32_dpp8 24-bit selector (three bits per output lane). | |
| static uint64_t | computePermlane16Masks (ArrayRef< uint8_t > Ids) |
| Pack a 16-lane permutation into a single 64-bit value: four bits per output lane, lane J in bits [J*4 + 3 : J*4]. | |
| static bool | matchHalfWaveSwapPattern (ArrayRef< uint8_t > Ids) |
| Match a half-wave swap: lane J reads from lane J ^ 32. | |
| static bool | isCrossRowPattern (ArrayRef< uint8_t > Ids) |
| Match a cross-row permutation suitable for v_permlanex16: every lane in the low 16-lane half reads from the high half of its own row, and vice versa. | |
| static std::optional< unsigned > | matchDsSwizzleBitmaskPattern (ArrayRef< uint8_t > Ids) |
| Match a DS_SWIZZLE bitmask-mode permutation: dst_lane = ((src_lane & AND) | OR) ^ XOR with each mask being five bits. | |
| static Value * | createUpdateDpp (IRBuilderBase &B, Value *Val, unsigned Ctrl) |
| Emit v_mov_b32_dpp with the given control word, row/bank masks 0xF, and bound_ctrl=1 so out-of-bounds lanes are well-defined and the DPP mov can be folded into a consuming VALU op by GCNDPPCombine. | |
| static Value * | createMovDpp8 (IRBuilderBase &B, Value *Val, unsigned Selector) |
| Emit v_mov_b32_dpp8 with the given 24-bit lane selector. | |
| static Value * | createPermlane16 (IRBuilderBase &B, Value *Val, uint32_t Lo, uint32_t Hi) |
| Emit v_permlane16 with the precomputed lane-select halves. | |
| static Value * | createPermlaneX16 (IRBuilderBase &B, Value *Val, uint32_t Lo, uint32_t Hi) |
| Emit v_permlanex16 with the precomputed lane-select halves. | |
| static Value * | createDsSwizzle (IRBuilderBase &B, Value *Val, unsigned Offset, const DataLayout &DL) |
| Emit ds_swizzle with the given immediate, bitcasting/converting between pointer/float types and i32 as required by the intrinsic signature. | |
| static Value * | createPermlane64 (IRBuilderBase &B, Value *Val) |
| Emit v_permlane64 (swap of the two 32-lane halves of a wave64). | |
| static Value * | matchShuffleToHWIntrinsic (IRBuilderBase &B, Value *Src, ArrayRef< uint8_t > Ids, const GCNSubtarget &ST, const DataLayout &DL) |
| Given a shuffle map, try to emit the best hardware intrinsic. | |
| static std::optional< Instruction * > | tryOptimizeShufflePattern (InstCombiner &IC, IntrinsicInst &II, const GCNSubtarget &ST) |
| Try to fold a wave_shuffle/ds_bpermute whose lane index is a constant function of the lane ID into a hardware-specific lane permutation intrinsic. | |
Variables | |
| static constexpr auto | isQuadPattern = isRowPattern<4> |
| static constexpr auto | isHalfRowPattern = isRowPattern<8> |
| static constexpr auto | isFullRowPattern = isRowPattern<16> |
| static constexpr auto | matchHalfRowMirrorPattern = matchMirrorPattern<8> |
| static constexpr auto | matchFullRowMirrorPattern = matchMirrorPattern<16> |
| #define DEBUG_TYPE "AMDGPUtti" |
Definition at line 36 of file AMDGPUInstCombineIntrinsic.cpp.
| #define GET_AMDGPUImageDMaskIntrinsicTable_IMPL |
Definition at line 44 of file AMDGPUInstCombineIntrinsic.cpp.
|
static |
Return true if it's legal to contract llvm.amdgcn.rcp(llvm.sqrt)
Definition at line 500 of file AMDGPUInstCombineIntrinsic.cpp.
References llvm::FPMathOperator::getFPAccuracy(), llvm::Value::getType(), llvm::FPMathOperator::hasApproxFunc(), llvm::Type::isFloatTy(), and llvm::Type::isHalfTy().
Referenced by llvm::GCNTTIImpl::instCombineIntrinsic().
Definition at line 74 of file AMDGPUInstCombineIntrinsic.cpp.
References llvm::APFloat::convert(), llvm::dyn_cast(), llvm::APInt::getActiveBits(), llvm::Value::getType(), llvm::APFloatBase::IEEEhalf(), llvm::Type::isHalfTy(), llvm::Type::isIntegerTy(), llvm::PatternMatch::m_FPExt(), llvm::PatternMatch::m_Value(), llvm::PatternMatch::m_ZExt(), llvm::PatternMatch::match(), and llvm::APFloatBase::rmTowardZero.
Referenced by simplifyAMDGCNImageIntrinsic().
Pack a 16-lane permutation into a single 64-bit value: four bits per output lane, lane J in bits [J*4 + 3 : J*4].
The caller splits it into the low and high 32-bit selector operands of v_permlane16 / v_permlanex16.
Definition at line 813 of file AMDGPUInstCombineIntrinsic.cpp.
Referenced by matchShuffleToHWIntrinsic().
Definition at line 112 of file AMDGPUInstCombineIntrinsic.cpp.
References llvm::cast(), llvm::Type::getHalfTy(), llvm::Type::getInt16Ty(), llvm::isa(), llvm::Type::isFloatingPointTy(), llvm::Type::isIntegerTy(), and llvm_unreachable.
|
static |
Emit ds_swizzle with the given immediate, bitcasting/converting between pointer/float types and i32 as required by the intrinsic signature.
Definition at line 923 of file AMDGPUInstCombineIntrinsic.cpp.
References assert(), B(), DL, llvm::Value::getType(), llvm::Type::isPointerTy(), and llvm::Offset.
Referenced by matchShuffleToHWIntrinsic().
|
static |
Emit v_mov_b32_dpp8 with the given 24-bit lane selector.
Definition at line 897 of file AMDGPUInstCombineIntrinsic.cpp.
References B(), and llvm::Value::getType().
Referenced by matchShuffleToHWIntrinsic().
|
static |
Emit v_permlane16 with the precomputed lane-select halves.
Definition at line 903 of file AMDGPUInstCombineIntrinsic.cpp.
References B(), llvm::PoisonValue::get(), llvm::Value::getType(), llvm::Hi, and llvm::Lo.
Referenced by matchShuffleToHWIntrinsic().
|
static |
Emit v_permlane64 (swap of the two 32-lane halves of a wave64).
Definition at line 944 of file AMDGPUInstCombineIntrinsic.cpp.
References B(), and llvm::Value::getType().
Referenced by matchShuffleToHWIntrinsic().
|
static |
Emit v_permlanex16 with the precomputed lane-select halves.
Each output lane reads from the other 16-lane half of the same row.
Definition at line 913 of file AMDGPUInstCombineIntrinsic.cpp.
References B(), llvm::PoisonValue::get(), llvm::Value::getType(), llvm::Hi, and llvm::Lo.
Referenced by matchShuffleToHWIntrinsic().
|
static |
Emit v_mov_b32_dpp with the given control word, row/bank masks 0xF, and bound_ctrl=1 so out-of-bounds lanes are well-defined and the DPP mov can be folded into a consuming VALU op by GCNDPPCombine.
Definition at line 889 of file AMDGPUInstCombineIntrinsic.cpp.
References B(), llvm::PoisonValue::get(), and llvm::Value::getType().
Referenced by matchShuffleToHWIntrinsic().
Definition at line 466 of file AMDGPUInstCombineIntrinsic.cpp.
References llvm::cast(), llvm::APInt::clearBit(), llvm::dyn_cast(), llvm::findScalarElement(), llvm::APInt::getAllOnes(), I, llvm::isa(), and llvm::PoisonMaskElem.
Referenced by llvm::GCNTTIImpl::instCombineIntrinsic().
|
static |
Evaluate V as a function of the lane ID and return its value on Lane, or std::nullopt if V is not a closed-form expression of the lane ID.
Definition at line 667 of file AMDGPUInstCombineIntrinsic.cpp.
References AbstractManglingParser< Derived, Alloc >::Ops, llvm::ConstantFoldInstOperands(), llvm::Depth, DL, llvm::dyn_cast(), llvm::dyn_cast_or_null(), evalLaneExpr(), llvm::User::getOperand(), llvm::Value::getType(), llvm::isa(), isThreadID(), LHS, llvm::MaxAnalysisRecursionDepth, and RHS.
Referenced by evalLaneExpr(), tryBuildShuffleMap(), and tryOptimizeShufflePattern().
Definition at line 53 of file AMDGPUInstCombineIntrinsic.cpp.
References assert(), llvm::APFloatBase::cmpEqual, llvm::APFloatBase::cmpUnordered, llvm::APFloat::compare(), and llvm::maxnum().
Referenced by llvm::GCNTTIImpl::instCombineIntrinsic().
Lanes are partitioned into groups of Period; each group is a translated copy of the first: Ids[I] = Ids[I % Period] + (I & ~(Period - 1)).
Definition at line 724 of file AMDGPUInstCombineIntrinsic.cpp.
References E(), I, llvm::isPowerOf2_32(), and llvm::ArrayRef< T >::size().
Referenced by isCrossRowPattern(), isRowPattern(), and matchDsSwizzleBitmaskPattern().
Match a cross-row permutation suitable for v_permlanex16: every lane in the low 16-lane half reads from the high half of its own row, and vice versa.
Definition at line 834 of file AMDGPUInstCombineIntrinsic.cpp.
References hasPeriodicLayout().
Referenced by matchShuffleToHWIntrinsic().
Match an N-lane row pattern: each lane in [0, N) reads from a source lane in the same N-lane row, and the pattern repeats periodically across rows.
Definition at line 734 of file AMDGPUInstCombineIntrinsic.cpp.
References hasPeriodicLayout(), I, and N.
Referenced by matchMirrorPattern().
|
static |
Definition at line 566 of file AMDGPUInstCombineIntrinsic.cpp.
References llvm::PatternMatch::m_ConstantInt(), llvm::PatternMatch::m_Intrinsic(), and llvm::PatternMatch::match().
Referenced by evalLaneExpr().
Return true if we can easily prove that use U is uniform.
Definition at line 507 of file AMDGPUInstCombineIntrinsic.cpp.
References A(), llvm::cast(), llvm::dyn_cast(), II, llvm::isa(), llvm::AMDGPU::isArgPassedInSGPR(), and llvm::AMDGPU::isIntrinsicAlwaysUniform().
Referenced by llvm::GCNTTIImpl::hoistLaneIntrinsicThroughOperand(), and llvm::GCNTTIImpl::instCombineIntrinsic().
Match a DS_SWIZZLE bitmask-mode permutation: dst_lane = ((src_lane & AND) | OR) ^ XOR with each mask being five bits.
Returns the encoded swizzle immediate. The hardware applies the formula independently within each 32-lane group, so on wave64 the high group must replicate the low one (translated by 32).
Definition at line 852 of file AMDGPUInstCombineIntrinsic.cpp.
References B(), llvm::AMDGPU::Swizzle::BITMASK_AND_SHIFT, llvm::AMDGPU::Swizzle::BITMASK_OR_SHIFT, llvm::AMDGPU::Swizzle::BITMASK_PERM_ENC, llvm::AMDGPU::Swizzle::BITMASK_XOR_SHIFT, hasPeriodicLayout(), I, and llvm::seq().
Referenced by matchShuffleToHWIntrinsic().
Match an fpext from half to float, or a constant we can convert.
Definition at line 422 of file AMDGPUInstCombineIntrinsic.cpp.
References llvm::APFloat::convert(), llvm::Value::getContext(), llvm::Type::getHalfTy(), llvm::ConstantFP::getValueAPF(), llvm::APFloatBase::IEEEhalf(), llvm::PatternMatch::m_ConstantFP(), llvm::PatternMatch::m_FPExt(), llvm::MIPatternMatch::m_OneUse(), llvm::PatternMatch::m_Value(), llvm::PatternMatch::match(), and llvm::APFloatBase::rmNearestTiesToEven.
Referenced by llvm::GCNTTIImpl::instCombineIntrinsic().
Match an 8-lane arbitrary permutation, encoded as the v_mov_b32_dpp8 24-bit selector (three bits per output lane).
Definition at line 801 of file AMDGPUInstCombineIntrinsic.cpp.
References isHalfRowPattern.
Referenced by matchShuffleToHWIntrinsic().
Match a half-wave swap: lane J reads from lane J ^ 32.
Only meaningful on wave64 targets.
Definition at line 822 of file AMDGPUInstCombineIntrinsic.cpp.
References llvm::ArrayRef< T >::size().
Referenced by matchShuffleToHWIntrinsic().
Match an N-lane reversal (mirror) pattern.
Definition at line 755 of file AMDGPUInstCombineIntrinsic.cpp.
References isRowPattern(), and N.
Match a 4-lane (quad) permutation, encoded as the v_mov_b32_dpp QUAD_PERM control word: bits[1:0]=Ids[0], [3:2]=Ids[1], [5:4]=Ids[2], [7:6]=Ids[3].
Definition at line 748 of file AMDGPUInstCombineIntrinsic.cpp.
References isQuadPattern.
Referenced by matchShuffleToHWIntrinsic().
Match a 16-lane cyclic rotation; returns the rotation amount in [1, 15].
Definition at line 768 of file AMDGPUInstCombineIntrinsic.cpp.
References isFullRowPattern.
Referenced by matchShuffleToHWIntrinsic().
Match a row-share pattern: all 16 lanes of each row read the same source lane.
Returns the shared source lane index in [0, 16).
Definition at line 779 of file AMDGPUInstCombineIntrinsic.cpp.
References llvm::all_equal(), isFullRowPattern, and llvm::ArrayRef< T >::take_front().
Referenced by matchShuffleToHWIntrinsic().
Match an XOR mask pattern within each 16-lane row: Ids[J] == Mask ^ J, with Mask in [1, 15].
Definition at line 789 of file AMDGPUInstCombineIntrinsic.cpp.
References isFullRowPattern.
Referenced by matchShuffleToHWIntrinsic().
|
static |
Given a shuffle map, try to emit the best hardware intrinsic.
Definition at line 950 of file AMDGPUInstCombineIntrinsic.cpp.
References llvm::all_equal(), B(), computePermlane16Masks(), createDsSwizzle(), createMovDpp8(), createPermlane16(), createPermlane64(), createPermlaneX16(), createUpdateDpp(), DL, llvm::Hi_32(), isCrossRowPattern(), isFullRowPattern, llvm::Lo_32(), matchDsSwizzleBitmaskPattern(), matchFullRowMirrorPattern, matchHalfRowMirrorPattern, matchHalfRowPermPattern(), matchHalfWaveSwapPattern(), matchQuadPermPattern(), matchRowRotatePattern(), matchRowSharePattern(), matchRowXMaskPattern(), llvm::AMDGPU::Swizzle::QUAD_PERM_ENC, llvm::AMDGPU::DPP::ROW_HALF_MIRROR, llvm::AMDGPU::DPP::ROW_MIRROR, llvm::AMDGPU::DPP::ROW_ROR_FIRST, llvm::AMDGPU::DPP::ROW_SHARE_FIRST, and llvm::AMDGPU::DPP::ROW_XMASK_FIRST.
Referenced by tryOptimizeShufflePattern().
|
static |
Applies Func(OldIntr.Args, OldIntr.ArgTys), creates intrinsic call with modified arguments (based on OldIntr) and replaces InstToReplace with this newly created intrinsic call.
Definition at line 127 of file AMDGPUInstCombineIntrinsic.cpp.
References llvm::CallBase::args(), llvm::InstCombiner::Builder, llvm::Instruction::copyFastMathFlags(), llvm::Instruction::copyMetadata(), llvm::IRBuilderBase::CreateIntrinsic(), llvm::InstCombiner::eraseInstFromFunction(), llvm::CallBase::getCalledFunction(), llvm::Value::getType(), llvm::isa(), llvm::Intrinsic::isSignatureValid(), llvm::Type::isVoidTy(), llvm::InstCombiner::replaceInstUsesWith(), and llvm::Value::takeName().
Referenced by simplifyAMDGCNImageIntrinsic().
|
static |
Definition at line 554 of file AMDGPUInstCombineIntrinsic.cpp.
References AbstractManglingParser< Derived, Alloc >::Ops, B(), llvm::CallBase::getOperandBundlesAsDefs(), and llvm::Value::takeName().
Referenced by llvm::GCNTTIImpl::hoistLaneIntrinsicThroughOperand().
|
static |
Definition at line 161 of file AMDGPUInstCombineIntrinsic.cpp.
References assert(), llvm::AMDGPU::ImageDimIntrinsicInfo::BaseOpcode, llvm::AMDGPU::ImageDimIntrinsicInfo::BiasIndex, canSafelyConvertTo16Bit(), llvm::cast(), llvm::AMDGPU::ImageDimIntrinsicInfo::CoordStart, llvm::AMDGPU::ImageDimIntrinsicInfo::Dim, llvm::dyn_cast(), llvm::SmallVectorImpl< T >::emplace_back(), llvm::SmallVectorTemplateCommon< T, typename >::empty(), llvm::InstCombiner::eraseInstFromFunction(), for(), llvm::Type::getHalfTy(), llvm::AMDGPU::getImageDimIntrinsicByBaseOpcode(), llvm::Type::getInt16Ty(), llvm::AMDGPU::getMIMGBaseOpcodeInfo(), llvm::AMDGPU::getMIMGBiasMappingInfo(), llvm::AMDGPU::getMIMGLZMappingInfo(), llvm::AMDGPU::getMIMGMIPMappingInfo(), llvm::AMDGPU::getMIMGOffsetMappingInfo(), llvm::Intrinsic::getOrInsertDeclaration(), llvm::Type::getScalarType(), llvm::Value::getType(), llvm::Type::getWithNewType(), llvm::AMDGPU::ImageDimIntrinsicInfo::GradientStart, llvm::AMDGPU::MIMGBaseOpcodeInfo::HasD16, if(), II, llvm::AMDGPU::ImageDimIntrinsicInfo::Intr, llvm::Type::isFloatingPointTy(), llvm::Type::isHalfTy(), llvm::Intrinsic::isSignatureValid(), llvm::AMDGPU::ImageDimIntrinsicInfo::LodIndex, llvm::AMDGPU::ImageDimIntrinsicInfo::MipIndex, modifyIntrinsicCall(), llvm::AMDGPU::ImageDimIntrinsicInfo::NumBiasArgs, llvm::AMDGPU::ImageDimIntrinsicInfo::OffsetIndex, llvm::AMDGPU::MIMGBaseOpcodeInfo::Sampler, llvm::Value::takeName(), and llvm::AMDGPU::ImageDimIntrinsicInfo::VAddrEnd.
Referenced by llvm::GCNTTIImpl::instCombineIntrinsic().
|
static |
Implement SimplifyDemandedVectorElts for amdgcn buffer and image intrinsics.
The result of simplifying amdgcn image and buffer store intrinsics is updating definitions of the intrinsics vector argument, not Uses of the result like image and buffer loads. Note: This only supports non-TFE/LWE image intrinsic calls; those have struct returns.
Definition at line 2249 of file AMDGPUInstCombineIntrinsic.cpp.
References llvm::InstCombiner::Builder, llvm::cast(), llvm::Instruction::copyMetadata(), llvm::APInt::countr_zero(), llvm::IRBuilderBase::CreateAdd(), llvm::IRBuilderBase::CreateExtractElement(), llvm::IRBuilderBase::CreateInsertElement(), llvm::IRBuilderBase::CreateIntrinsic(), llvm::IRBuilderBase::CreateShuffleVector(), llvm::FixedVectorType::get(), llvm::PoisonValue::get(), llvm::APInt::getActiveBits(), llvm::InstCombiner::getDataLayout(), llvm::Value::getType(), llvm::DataLayout::getTypeSizeInBits(), llvm::ConstantInt::getZExtValue(), II, llvm::APInt::isMask(), llvm::Intrinsic::isSignatureValid(), llvm::Offset, llvm::APInt::popcount(), llvm::popcount(), llvm::SmallVectorTemplateBase< T, bool >::push_back(), llvm::IRBuilderBase::SetInsertPoint(), and llvm::Value::takeName().
Referenced by llvm::GCNTTIImpl::instCombineIntrinsic(), and llvm::GCNTTIImpl::simplifyDemandedVectorEltsIntrinsic().
|
static |
Definition at line 440 of file AMDGPUInstCombineIntrinsic.cpp.
References llvm::cast(), llvm::APInt::clearBit(), llvm::dyn_cast(), llvm::findScalarElement(), llvm::APInt::getAllOnes(), llvm::Value::getType(), I, and llvm::isa().
Referenced by llvm::GCNTTIImpl::instCombineIntrinsic().
|
static |
Build the per-lane shuffle map by evaluating Index for every lane in the wave.
Returns false if any lane index is non-constant or out of range.
Definition at line 707 of file AMDGPUInstCombineIntrinsic.cpp.
References DL, evalLaneExpr(), llvm::SmallVectorImpl< T >::resize(), and llvm::seq().
Referenced by tryOptimizeShufflePattern().
|
static |
Try to fold a wave_shuffle/ds_bpermute whose lane index is a constant function of the lane ID into a hardware-specific lane permutation intrinsic.
Definition at line 1017 of file AMDGPUInstCombineIntrinsic.cpp.
References llvm::InstCombiner::Builder, DL, evalLaneExpr(), llvm::InstCombiner::getDataLayout(), II, matchShuffleToHWIntrinsic(), llvm::InstCombiner::replaceInstUsesWith(), llvm::SmallVectorImpl< T >::resize(), llvm::seq(), and tryBuildShuffleMap().
Referenced by llvm::GCNTTIImpl::instCombineIntrinsic().
|
staticconstexpr |
Definition at line 743 of file AMDGPUInstCombineIntrinsic.cpp.
Referenced by matchRowRotatePattern(), matchRowSharePattern(), matchRowXMaskPattern(), and matchShuffleToHWIntrinsic().
|
staticconstexpr |
Definition at line 742 of file AMDGPUInstCombineIntrinsic.cpp.
Referenced by matchHalfRowPermPattern().
|
staticconstexpr |
Definition at line 741 of file AMDGPUInstCombineIntrinsic.cpp.
Referenced by matchQuadPermPattern().
|
staticconstexpr |
Definition at line 765 of file AMDGPUInstCombineIntrinsic.cpp.
Referenced by matchShuffleToHWIntrinsic().
|
staticconstexpr |
Definition at line 764 of file AMDGPUInstCombineIntrinsic.cpp.
Referenced by matchShuffleToHWIntrinsic().