|
LLVM 23.0.0git
|
#include "AMDGPUInstrInfo.h"#include "AMDGPUTargetTransformInfo.h"#include "GCNSubtarget.h"#include "SIDefines.h"#include "llvm/ADT/FloatingPointMode.h"#include "llvm/ADT/STLExtras.h"#include "llvm/ADT/Sequence.h"#include "llvm/Analysis/ConstantFolding.h"#include "llvm/Analysis/ValueTracking.h"#include "llvm/IR/Constants.h"#include "llvm/IR/Dominators.h"#include "llvm/IR/IntrinsicsAMDGPU.h"#include "llvm/Support/MathExtras.h"#include "llvm/Transforms/InstCombine/InstCombiner.h"#include <optional>#include "AMDGPUGenSearchableTables.inc"Go to the source code of this file.
Macros | |
| #define | DEBUG_TYPE "AMDGPUtti" |
| #define | GET_AMDGPUImageDMaskIntrinsicTable_IMPL |
Functions | |
| static APFloat | fmed3AMDGCN (const APFloat &Src0, const APFloat &Src1, const APFloat &Src2) |
| static bool | canSafelyConvertTo16Bit (Value &V, bool IsFloat) |
| static Value * | convertTo16Bit (Value &V, InstCombiner::BuilderTy &Builder) |
| static std::optional< Instruction * > | modifyIntrinsicCall (IntrinsicInst &OldIntr, Instruction &InstToReplace, unsigned NewIntr, InstCombiner &IC, std::function< void(SmallVectorImpl< Value * > &, SmallVectorImpl< Type * > &)> Func) |
| Applies Func(OldIntr.Args, OldIntr.ArgTys), creates intrinsic call with modified arguments (based on OldIntr) and replaces InstToReplace with this newly created intrinsic call. | |
| static std::optional< Instruction * > | simplifyAMDGCNImageIntrinsic (const GCNSubtarget *ST, const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr, IntrinsicInst &II, InstCombiner &IC) |
| static Value * | matchFPExtFromF16 (Value *Arg) |
| Match an fpext from half to float, or a constant we can convert. | |
| static APInt | trimTrailingZerosInVector (InstCombiner &IC, Value *UseV, Instruction *I) |
| static APInt | defaultComponentBroadcast (Value *V) |
| static Value * | simplifyAMDGCNMemoryIntrinsicDemanded (InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, int DMaskIdx, bool IsLoad) |
| Implement SimplifyDemandedVectorElts for amdgcn buffer and image intrinsics. | |
| static bool | canContractSqrtToRsq (const FPMathOperator *SqrtOp) |
| Return true if it's legal to contract llvm.amdgcn.rcp(llvm.sqrt) | |
| static bool | isTriviallyUniform (const Use &U) |
| Return true if we can easily prove that use U is uniform. | |
| static CallInst * | rewriteCall (IRBuilderBase &B, CallInst &Old, Function &NewCallee, ArrayRef< Value * > Ops) |
| static bool | isThreadID (const GCNSubtarget &ST, Value *V) |
| static std::optional< unsigned > | evalLaneExpr (Value *V, unsigned Lane, const GCNSubtarget &ST, const DataLayout &DL, unsigned Depth=0) |
| Evaluate V as a function of the lane ID and return its value on Lane, or std::nullopt if V is not a closed-form expression of the lane ID. | |
| static bool | tryBuildShuffleMap (Value *Index, const GCNSubtarget &ST, SmallVectorImpl< uint8_t > &Ids, const DataLayout &DL) |
| Build the per-lane shuffle map by evaluating Index for every lane in the wave. | |
| template<unsigned Period> | |
| static bool | hasPeriodicLayout (ArrayRef< uint8_t > Ids) |
| Lanes are partitioned into groups of Period; each group is a translated copy of the first: Ids[I] = Ids[I % Period] + (I & ~(Period - 1)). | |
| template<unsigned N> | |
| static bool | isRowPattern (ArrayRef< uint8_t > Ids) |
| Match an N-lane row pattern: each lane in [0, N) reads from a source lane in the same N-lane row, and the pattern repeats periodically across rows. | |
| static std::optional< unsigned > | matchQuadPermPattern (ArrayRef< uint8_t > Ids) |
| Match a 4-lane (quad) permutation, encoded as the v_mov_b32_dpp QUAD_PERM control word: bits[1:0]=Ids[0], [3:2]=Ids[1], [5:4]=Ids[2], [7:6]=Ids[3]. | |
| template<unsigned N> | |
| static bool | matchMirrorPattern (ArrayRef< uint8_t > Ids) |
| Match an N-lane reversal (mirror) pattern. | |
| static std::optional< unsigned > | matchRowRotatePattern (ArrayRef< uint8_t > Ids) |
| Match a 16-lane cyclic rotation; returns the rotation amount in [1, 15]. | |
| static std::optional< unsigned > | matchRowSharePattern (ArrayRef< uint8_t > Ids) |
| Match a row-share pattern: all 16 lanes of each row read the same source lane. | |
| static std::optional< unsigned > | matchRowXMaskPattern (ArrayRef< uint8_t > Ids) |
| Match an XOR mask pattern within each 16-lane row: Ids[J] == Mask ^ J, with Mask in [1, 15]. | |
| static std::optional< unsigned > | matchHalfRowPermPattern (ArrayRef< uint8_t > Ids) |
| Match an 8-lane arbitrary permutation, encoded as the v_mov_b32_dpp8 24-bit selector (three bits per output lane). | |
| static uint64_t | computePermlane16Masks (ArrayRef< uint8_t > Ids) |
| Pack a 16-lane permutation into a single 64-bit value: four bits per output lane, lane J in bits [J*4 + 3 : J*4]. | |
| static bool | matchHalfWaveSwapPattern (ArrayRef< uint8_t > Ids) |
| Match a half-wave swap: lane J reads from lane J ^ 32. | |
| static bool | isCrossRowPattern (ArrayRef< uint8_t > Ids) |
| Match a cross-row permutation suitable for v_permlanex16: every lane in the low 16-lane half reads from the high half of its own row, and vice versa. | |
| static std::optional< unsigned > | matchDsSwizzleBitmaskPattern (ArrayRef< uint8_t > Ids) |
| Match a DS_SWIZZLE bitmask-mode permutation: dst_lane = ((src_lane & AND) | OR) ^ XOR with each mask being five bits. | |
| static Value * | createUpdateDpp (IRBuilderBase &B, Value *Val, unsigned Ctrl) |
| Emit v_mov_b32_dpp with the given control word, row/bank masks 0xF, and bound_ctrl=1 so out-of-bounds lanes are well-defined and the DPP mov can be folded into a consuming VALU op by GCNDPPCombine. | |
| static Value * | createMovDpp8 (IRBuilderBase &B, Value *Val, unsigned Selector) |
| Emit v_mov_b32_dpp8 with the given 24-bit lane selector. | |
| static Value * | createPermlane16 (IRBuilderBase &B, Value *Val, uint32_t Lo, uint32_t Hi) |
| Emit v_permlane16 with the precomputed lane-select halves. | |
| static Value * | createPermlaneX16 (IRBuilderBase &B, Value *Val, uint32_t Lo, uint32_t Hi) |
| Emit v_permlanex16 with the precomputed lane-select halves. | |
| static Value * | createDsSwizzle (IRBuilderBase &B, Value *Val, unsigned Offset, const DataLayout &DL) |
| Emit ds_swizzle with the given immediate, bitcasting/converting between pointer/float types and i32 as required by the intrinsic signature. | |
| static Value * | createPermlane64 (IRBuilderBase &B, Value *Val) |
| Emit v_permlane64 (swap of the two 32-lane halves of a wave64). | |
| static Value * | matchShuffleToHWIntrinsic (IRBuilderBase &B, Value *Src, ArrayRef< uint8_t > Ids, const GCNSubtarget &ST, const DataLayout &DL) |
| Given a shuffle map, try to emit the best hardware intrinsic. | |
| static std::optional< Instruction * > | tryOptimizeShufflePattern (InstCombiner &IC, IntrinsicInst &II, const GCNSubtarget &ST) |
| Try to fold a wave_shuffle/ds_bpermute whose lane index is a constant function of the lane ID into a hardware-specific lane permutation intrinsic. | |
Variables | |
| static constexpr auto | isQuadPattern = isRowPattern<4> |
| static constexpr auto | isHalfRowPattern = isRowPattern<8> |
| static constexpr auto | isFullRowPattern = isRowPattern<16> |
| static constexpr auto | matchHalfRowMirrorPattern = matchMirrorPattern<8> |
| static constexpr auto | matchFullRowMirrorPattern = matchMirrorPattern<16> |
| #define DEBUG_TYPE "AMDGPUtti" |
Definition at line 36 of file AMDGPUInstCombineIntrinsic.cpp.
| #define GET_AMDGPUImageDMaskIntrinsicTable_IMPL |
Definition at line 44 of file AMDGPUInstCombineIntrinsic.cpp.
|
static |
Return true if it's legal to contract llvm.amdgcn.rcp(llvm.sqrt)
Definition at line 499 of file AMDGPUInstCombineIntrinsic.cpp.
References llvm::FPMathOperator::getFPAccuracy(), llvm::Value::getType(), llvm::FPMathOperator::hasApproxFunc(), llvm::Type::isFloatTy(), and llvm::Type::isHalfTy().
Referenced by llvm::GCNTTIImpl::instCombineIntrinsic().
Definition at line 74 of file AMDGPUInstCombineIntrinsic.cpp.
References llvm::APFloat::convert(), llvm::dyn_cast(), llvm::APInt::getActiveBits(), llvm::Value::getType(), llvm::APFloatBase::IEEEhalf(), llvm::Type::isHalfTy(), llvm::Type::isIntegerTy(), llvm::PatternMatch::m_FPExt(), llvm::PatternMatch::m_Value(), llvm::PatternMatch::m_ZExt(), llvm::PatternMatch::match(), and llvm::APFloatBase::rmTowardZero.
Referenced by simplifyAMDGCNImageIntrinsic().
Pack a 16-lane permutation into a single 64-bit value: four bits per output lane, lane J in bits [J*4 + 3 : J*4].
The caller splits it into the low and high 32-bit selector operands of v_permlane16 / v_permlanex16.
Definition at line 812 of file AMDGPUInstCombineIntrinsic.cpp.
Referenced by matchShuffleToHWIntrinsic().
Definition at line 112 of file AMDGPUInstCombineIntrinsic.cpp.
References llvm::cast(), llvm::Type::getHalfTy(), llvm::Type::getInt16Ty(), llvm::isa(), llvm::Type::isFloatingPointTy(), llvm::Type::isIntegerTy(), and llvm_unreachable.
|
static |
Emit ds_swizzle with the given immediate, bitcasting/converting between pointer/float types and i32 as required by the intrinsic signature.
Definition at line 922 of file AMDGPUInstCombineIntrinsic.cpp.
References assert(), B(), DL, llvm::Value::getType(), llvm::Type::isPointerTy(), and llvm::Offset.
Referenced by matchShuffleToHWIntrinsic().
|
static |
Emit v_mov_b32_dpp8 with the given 24-bit lane selector.
Definition at line 896 of file AMDGPUInstCombineIntrinsic.cpp.
References B(), and llvm::Value::getType().
Referenced by matchShuffleToHWIntrinsic().
|
static |
Emit v_permlane16 with the precomputed lane-select halves.
Definition at line 902 of file AMDGPUInstCombineIntrinsic.cpp.
References B(), llvm::PoisonValue::get(), llvm::Value::getType(), llvm::Hi, and llvm::Lo.
Referenced by matchShuffleToHWIntrinsic().
|
static |
Emit v_permlane64 (swap of the two 32-lane halves of a wave64).
Definition at line 943 of file AMDGPUInstCombineIntrinsic.cpp.
References B(), and llvm::Value::getType().
Referenced by matchShuffleToHWIntrinsic().
|
static |
Emit v_permlanex16 with the precomputed lane-select halves.
Each output lane reads from the other 16-lane half of the same row.
Definition at line 912 of file AMDGPUInstCombineIntrinsic.cpp.
References B(), llvm::PoisonValue::get(), llvm::Value::getType(), llvm::Hi, and llvm::Lo.
Referenced by matchShuffleToHWIntrinsic().
|
static |
Emit v_mov_b32_dpp with the given control word, row/bank masks 0xF, and bound_ctrl=1 so out-of-bounds lanes are well-defined and the DPP mov can be folded into a consuming VALU op by GCNDPPCombine.
Definition at line 888 of file AMDGPUInstCombineIntrinsic.cpp.
References B(), llvm::PoisonValue::get(), and llvm::Value::getType().
Referenced by matchShuffleToHWIntrinsic().
Definition at line 465 of file AMDGPUInstCombineIntrinsic.cpp.
References llvm::cast(), llvm::APInt::clearBit(), llvm::dyn_cast(), llvm::findScalarElement(), llvm::APInt::getAllOnes(), I, llvm::isa(), and llvm::PoisonMaskElem.
Referenced by llvm::GCNTTIImpl::instCombineIntrinsic().
|
static |
Evaluate V as a function of the lane ID and return its value on Lane, or std::nullopt if V is not a closed-form expression of the lane ID.
Definition at line 666 of file AMDGPUInstCombineIntrinsic.cpp.
References AbstractManglingParser< Derived, Alloc >::Ops, llvm::ConstantFoldInstOperands(), llvm::Depth, DL, llvm::dyn_cast(), llvm::dyn_cast_or_null(), evalLaneExpr(), llvm::User::getOperand(), llvm::Value::getType(), llvm::isa(), isThreadID(), LHS, llvm::MaxAnalysisRecursionDepth, and RHS.
Referenced by evalLaneExpr(), tryBuildShuffleMap(), and tryOptimizeShufflePattern().
Definition at line 53 of file AMDGPUInstCombineIntrinsic.cpp.
References assert(), llvm::APFloatBase::cmpEqual, llvm::APFloatBase::cmpUnordered, llvm::APFloat::compare(), and llvm::maxnum().
Referenced by llvm::GCNTTIImpl::instCombineIntrinsic().
Lanes are partitioned into groups of Period; each group is a translated copy of the first: Ids[I] = Ids[I % Period] + (I & ~(Period - 1)).
Definition at line 723 of file AMDGPUInstCombineIntrinsic.cpp.
References E(), I, llvm::isPowerOf2_32(), and llvm::ArrayRef< T >::size().
Referenced by isCrossRowPattern(), isRowPattern(), and matchDsSwizzleBitmaskPattern().
Match a cross-row permutation suitable for v_permlanex16: every lane in the low 16-lane half reads from the high half of its own row, and vice versa.
Definition at line 833 of file AMDGPUInstCombineIntrinsic.cpp.
References hasPeriodicLayout().
Referenced by matchShuffleToHWIntrinsic().
Match an N-lane row pattern: each lane in [0, N) reads from a source lane in the same N-lane row, and the pattern repeats periodically across rows.
Definition at line 733 of file AMDGPUInstCombineIntrinsic.cpp.
References hasPeriodicLayout(), I, and N.
Referenced by matchMirrorPattern().
|
static |
Definition at line 565 of file AMDGPUInstCombineIntrinsic.cpp.
References llvm::PatternMatch::m_ConstantInt(), llvm::PatternMatch::m_Intrinsic(), and llvm::PatternMatch::match().
Referenced by evalLaneExpr().
Return true if we can easily prove that use U is uniform.
Definition at line 506 of file AMDGPUInstCombineIntrinsic.cpp.
References A(), llvm::cast(), llvm::dyn_cast(), II, llvm::isa(), llvm::AMDGPU::isArgPassedInSGPR(), and llvm::AMDGPU::isIntrinsicAlwaysUniform().
Referenced by llvm::GCNTTIImpl::hoistLaneIntrinsicThroughOperand(), and llvm::GCNTTIImpl::instCombineIntrinsic().
Match a DS_SWIZZLE bitmask-mode permutation: dst_lane = ((src_lane & AND) | OR) ^ XOR with each mask being five bits.
Returns the encoded swizzle immediate. The hardware applies the formula independently within each 32-lane group, so on wave64 the high group must replicate the low one (translated by 32).
Definition at line 851 of file AMDGPUInstCombineIntrinsic.cpp.
References B(), llvm::AMDGPU::Swizzle::BITMASK_AND_SHIFT, llvm::AMDGPU::Swizzle::BITMASK_OR_SHIFT, llvm::AMDGPU::Swizzle::BITMASK_PERM_ENC, llvm::AMDGPU::Swizzle::BITMASK_XOR_SHIFT, hasPeriodicLayout(), I, and llvm::seq().
Referenced by matchShuffleToHWIntrinsic().
Match an fpext from half to float, or a constant we can convert.
Definition at line 421 of file AMDGPUInstCombineIntrinsic.cpp.
References llvm::APFloat::convert(), llvm::Value::getContext(), llvm::Type::getHalfTy(), llvm::ConstantFP::getValueAPF(), llvm::APFloatBase::IEEEhalf(), llvm::PatternMatch::m_ConstantFP(), llvm::PatternMatch::m_FPExt(), llvm::MIPatternMatch::m_OneUse(), llvm::PatternMatch::m_Value(), llvm::PatternMatch::match(), and llvm::APFloatBase::rmNearestTiesToEven.
Referenced by llvm::GCNTTIImpl::instCombineIntrinsic().
Match an 8-lane arbitrary permutation, encoded as the v_mov_b32_dpp8 24-bit selector (three bits per output lane).
Definition at line 800 of file AMDGPUInstCombineIntrinsic.cpp.
References isHalfRowPattern.
Referenced by matchShuffleToHWIntrinsic().
Match a half-wave swap: lane J reads from lane J ^ 32.
Only meaningful on wave64 targets.
Definition at line 821 of file AMDGPUInstCombineIntrinsic.cpp.
References llvm::ArrayRef< T >::size().
Referenced by matchShuffleToHWIntrinsic().
Match an N-lane reversal (mirror) pattern.
Definition at line 754 of file AMDGPUInstCombineIntrinsic.cpp.
References isRowPattern(), and N.
Match a 4-lane (quad) permutation, encoded as the v_mov_b32_dpp QUAD_PERM control word: bits[1:0]=Ids[0], [3:2]=Ids[1], [5:4]=Ids[2], [7:6]=Ids[3].
Definition at line 747 of file AMDGPUInstCombineIntrinsic.cpp.
References isQuadPattern.
Referenced by matchShuffleToHWIntrinsic().
Match a 16-lane cyclic rotation; returns the rotation amount in [1, 15].
Definition at line 767 of file AMDGPUInstCombineIntrinsic.cpp.
References isFullRowPattern.
Referenced by matchShuffleToHWIntrinsic().
Match a row-share pattern: all 16 lanes of each row read the same source lane.
Returns the shared source lane index in [0, 16).
Definition at line 778 of file AMDGPUInstCombineIntrinsic.cpp.
References llvm::all_equal(), isFullRowPattern, and llvm::ArrayRef< T >::take_front().
Referenced by matchShuffleToHWIntrinsic().
Match an XOR mask pattern within each 16-lane row: Ids[J] == Mask ^ J, with Mask in [1, 15].
Definition at line 788 of file AMDGPUInstCombineIntrinsic.cpp.
References isFullRowPattern.
Referenced by matchShuffleToHWIntrinsic().
|
static |
Given a shuffle map, try to emit the best hardware intrinsic.
Definition at line 949 of file AMDGPUInstCombineIntrinsic.cpp.
References llvm::all_equal(), B(), computePermlane16Masks(), createDsSwizzle(), createMovDpp8(), createPermlane16(), createPermlane64(), createPermlaneX16(), createUpdateDpp(), DL, llvm::Hi_32(), isCrossRowPattern(), isFullRowPattern, llvm::Lo_32(), matchDsSwizzleBitmaskPattern(), matchFullRowMirrorPattern, matchHalfRowMirrorPattern, matchHalfRowPermPattern(), matchHalfWaveSwapPattern(), matchQuadPermPattern(), matchRowRotatePattern(), matchRowSharePattern(), matchRowXMaskPattern(), llvm::AMDGPU::Swizzle::QUAD_PERM_ENC, llvm::AMDGPU::DPP::ROW_HALF_MIRROR, llvm::AMDGPU::DPP::ROW_MIRROR, llvm::AMDGPU::DPP::ROW_ROR_FIRST, llvm::AMDGPU::DPP::ROW_SHARE_FIRST, and llvm::AMDGPU::DPP::ROW_XMASK_FIRST.
Referenced by tryOptimizeShufflePattern().
|
static |
Applies Func(OldIntr.Args, OldIntr.ArgTys), creates intrinsic call with modified arguments (based on OldIntr) and replaces InstToReplace with this newly created intrinsic call.
Definition at line 127 of file AMDGPUInstCombineIntrinsic.cpp.
References llvm::CallBase::args(), llvm::InstCombiner::Builder, llvm::Instruction::copyFastMathFlags(), llvm::Instruction::copyMetadata(), llvm::IRBuilderBase::CreateIntrinsic(), llvm::InstCombiner::eraseInstFromFunction(), llvm::CallBase::getCalledFunction(), llvm::Value::getType(), llvm::isa(), llvm::Intrinsic::isSignatureValid(), llvm::Type::isVoidTy(), llvm::InstCombiner::replaceInstUsesWith(), and llvm::Value::takeName().
Referenced by simplifyAMDGCNImageIntrinsic().
|
static |
Definition at line 553 of file AMDGPUInstCombineIntrinsic.cpp.
References AbstractManglingParser< Derived, Alloc >::Ops, B(), llvm::CallBase::getOperandBundlesAsDefs(), and llvm::Value::takeName().
Referenced by llvm::GCNTTIImpl::hoistLaneIntrinsicThroughOperand().
|
static |
Definition at line 161 of file AMDGPUInstCombineIntrinsic.cpp.
References assert(), llvm::AMDGPU::ImageDimIntrinsicInfo::BaseOpcode, llvm::AMDGPU::ImageDimIntrinsicInfo::BiasIndex, canSafelyConvertTo16Bit(), llvm::cast(), llvm::AMDGPU::ImageDimIntrinsicInfo::CoordStart, llvm::AMDGPU::ImageDimIntrinsicInfo::Dim, llvm::dyn_cast(), llvm::SmallVectorImpl< T >::emplace_back(), llvm::SmallVectorTemplateCommon< T, typename >::empty(), llvm::InstCombiner::eraseInstFromFunction(), for(), llvm::Type::getHalfTy(), llvm::AMDGPU::getImageDimIntrinsicByBaseOpcode(), llvm::Type::getInt16Ty(), llvm::AMDGPU::getMIMGBaseOpcodeInfo(), llvm::AMDGPU::getMIMGBiasMappingInfo(), llvm::AMDGPU::getMIMGLZMappingInfo(), llvm::AMDGPU::getMIMGMIPMappingInfo(), llvm::AMDGPU::getMIMGOffsetMappingInfo(), llvm::Intrinsic::getOrInsertDeclaration(), llvm::Type::getScalarType(), llvm::Value::getType(), llvm::Type::getWithNewType(), llvm::AMDGPU::ImageDimIntrinsicInfo::GradientStart, llvm::AMDGPU::MIMGBaseOpcodeInfo::HasD16, if(), II, llvm::AMDGPU::ImageDimIntrinsicInfo::Intr, llvm::Type::isFloatingPointTy(), llvm::Type::isHalfTy(), llvm::Intrinsic::isSignatureValid(), llvm::AMDGPU::ImageDimIntrinsicInfo::LodIndex, llvm::AMDGPU::ImageDimIntrinsicInfo::MipIndex, modifyIntrinsicCall(), llvm::AMDGPU::ImageDimIntrinsicInfo::NumBiasArgs, llvm::AMDGPU::ImageDimIntrinsicInfo::OffsetIndex, llvm::AMDGPU::MIMGBaseOpcodeInfo::Sampler, llvm::Value::takeName(), and llvm::AMDGPU::ImageDimIntrinsicInfo::VAddrEnd.
Referenced by llvm::GCNTTIImpl::instCombineIntrinsic().
|
static |
Implement SimplifyDemandedVectorElts for amdgcn buffer and image intrinsics.
The result of simplifying amdgcn image and buffer store intrinsics is updating definitions of the intrinsics vector argument, not Uses of the result like image and buffer loads. Note: This only supports non-TFE/LWE image intrinsic calls; those have struct returns.
Definition at line 2248 of file AMDGPUInstCombineIntrinsic.cpp.
References llvm::InstCombiner::Builder, llvm::cast(), llvm::Instruction::copyMetadata(), llvm::APInt::countr_zero(), llvm::IRBuilderBase::CreateAdd(), llvm::IRBuilderBase::CreateExtractElement(), llvm::IRBuilderBase::CreateInsertElement(), llvm::IRBuilderBase::CreateIntrinsic(), llvm::IRBuilderBase::CreateShuffleVector(), llvm::FixedVectorType::get(), llvm::PoisonValue::get(), llvm::APInt::getActiveBits(), llvm::InstCombiner::getDataLayout(), llvm::Value::getType(), llvm::DataLayout::getTypeSizeInBits(), llvm::ConstantInt::getZExtValue(), II, llvm::APInt::isMask(), llvm::Intrinsic::isSignatureValid(), llvm::Offset, llvm::APInt::popcount(), llvm::popcount(), llvm::SmallVectorTemplateBase< T, bool >::push_back(), llvm::IRBuilderBase::SetInsertPoint(), and llvm::Value::takeName().
Referenced by llvm::GCNTTIImpl::instCombineIntrinsic(), and llvm::GCNTTIImpl::simplifyDemandedVectorEltsIntrinsic().
|
static |
Definition at line 439 of file AMDGPUInstCombineIntrinsic.cpp.
References llvm::cast(), llvm::APInt::clearBit(), llvm::dyn_cast(), llvm::findScalarElement(), llvm::APInt::getAllOnes(), llvm::Value::getType(), I, and llvm::isa().
Referenced by llvm::GCNTTIImpl::instCombineIntrinsic().
|
static |
Build the per-lane shuffle map by evaluating Index for every lane in the wave.
Returns false if any lane index is non-constant or out of range.
Definition at line 706 of file AMDGPUInstCombineIntrinsic.cpp.
References DL, evalLaneExpr(), llvm::SmallVectorImpl< T >::resize(), and llvm::seq().
Referenced by tryOptimizeShufflePattern().
|
static |
Try to fold a wave_shuffle/ds_bpermute whose lane index is a constant function of the lane ID into a hardware-specific lane permutation intrinsic.
Definition at line 1016 of file AMDGPUInstCombineIntrinsic.cpp.
References llvm::InstCombiner::Builder, DL, evalLaneExpr(), llvm::InstCombiner::getDataLayout(), II, matchShuffleToHWIntrinsic(), llvm::InstCombiner::replaceInstUsesWith(), llvm::SmallVectorImpl< T >::resize(), llvm::seq(), and tryBuildShuffleMap().
Referenced by llvm::GCNTTIImpl::instCombineIntrinsic().
|
staticconstexpr |
Definition at line 742 of file AMDGPUInstCombineIntrinsic.cpp.
Referenced by matchRowRotatePattern(), matchRowSharePattern(), matchRowXMaskPattern(), and matchShuffleToHWIntrinsic().
|
staticconstexpr |
Definition at line 741 of file AMDGPUInstCombineIntrinsic.cpp.
Referenced by matchHalfRowPermPattern().
|
staticconstexpr |
Definition at line 740 of file AMDGPUInstCombineIntrinsic.cpp.
Referenced by matchQuadPermPattern().
|
staticconstexpr |
Definition at line 764 of file AMDGPUInstCombineIntrinsic.cpp.
Referenced by matchShuffleToHWIntrinsic().
|
staticconstexpr |
Definition at line 763 of file AMDGPUInstCombineIntrinsic.cpp.
Referenced by matchShuffleToHWIntrinsic().