#include "AMDGPUInstrInfo.h"
#include "AMDGPUTargetTransformInfo.h"
#include "GCNSubtarget.h"
#include "SIDefines.h"
#include "llvm/ADT/FloatingPointMode.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Sequence.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Transforms/InstCombine/InstCombiner.h"
#include <optional>
#include "AMDGPUGenSearchableTables.inc"

Macros
#define	DEBUG_TYPE "AMDGPUtti"
#define	GET_AMDGPUImageDMaskIntrinsicTable_IMPL

Functions
static APFloat	fmed3AMDGCN (const APFloat &Src0, const APFloat &Src1, const APFloat &Src2)
static bool	canSafelyConvertTo16Bit (Value &V, bool IsFloat, bool AllowI16SExt=false)
static Value *	convertTo16Bit (Value &V, InstCombiner::BuilderTy &Builder)
static std::optional< Instruction * >	modifyIntrinsicCall (IntrinsicInst &OldIntr, Instruction &InstToReplace, unsigned NewIntr, InstCombiner &IC, std::function< void(SmallVectorImpl< Value * > &, SmallVectorImpl< Type * > &)> Func)
	Applies Func(OldIntr.Args, OldIntr.ArgTys), creates intrinsic call with modified arguments (based on OldIntr) and replaces InstToReplace with this newly created intrinsic call.
static std::optional< Instruction * >	simplifyAMDGCNImageIntrinsic (const GCNSubtarget ST, const AMDGPU::ImageDimIntrinsicInfo ImageDimIntr, IntrinsicInst &II, InstCombiner &IC)
static Value *	matchFPExtFromF16 (Value *Arg)
	Match an fpext from half to float, or a constant we can convert.
static APInt	trimTrailingZerosInVector (InstCombiner &IC, Value UseV, Instruction I)
static APInt	defaultComponentBroadcast (Value *V)
static Value *	simplifyAMDGCNMemoryIntrinsicDemanded (InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, int DMaskIdx, bool IsLoad)
	Implement SimplifyDemandedVectorElts for amdgcn buffer and image intrinsics.
static bool	canContractSqrtToRsq (const FPMathOperator *SqrtOp)
	Return true if it's legal to contract llvm.amdgcn.rcp(llvm.sqrt)
static bool	isTriviallyUniform (const Use &U)
	Return true if we can easily prove that use U is uniform.
static CallInst *	rewriteCall (IRBuilderBase &B, CallInst &Old, Function &NewCallee, ArrayRef< Value * > Ops)
static bool	isThreadID (const GCNSubtarget &ST, Value *V)
static std::optional< unsigned >	evalLaneExpr (Value *V, unsigned Lane, const GCNSubtarget &ST, const DataLayout &DL, unsigned Depth=0)
	Evaluate V as a function of the lane ID and return its value on Lane, or std::nullopt if V is not a closed-form expression of the lane ID.
static bool	tryBuildShuffleMap (Value *Index, const GCNSubtarget &ST, SmallVectorImpl< uint8_t > &Ids, const DataLayout &DL)
	Build the per-lane shuffle map by evaluating Index for every lane in the wave.
template<unsigned Period>
static bool	hasPeriodicLayout (ArrayRef< uint8_t > Ids)
	Lanes are partitioned into groups of Period; each group is a translated copy of the first: Ids[I] = Ids[I % Period] + (I & ~(Period - 1)).
template<unsigned N>
static bool	isRowPattern (ArrayRef< uint8_t > Ids)
	Match an N-lane row pattern: each lane in [0, N) reads from a source lane in the same N-lane row, and the pattern repeats periodically across rows.
static std::optional< unsigned >	matchQuadPermPattern (ArrayRef< uint8_t > Ids)
	Match a 4-lane (quad) permutation, encoded as the v_mov_b32_dpp QUAD_PERM control word: bits[1:0]=Ids[0], [3:2]=Ids[1], [5:4]=Ids[2], [7:6]=Ids[3].
template<unsigned N>
static bool	matchMirrorPattern (ArrayRef< uint8_t > Ids)
	Match an N-lane reversal (mirror) pattern.
static std::optional< unsigned >	matchRowRotatePattern (ArrayRef< uint8_t > Ids)
	Match a 16-lane cyclic rotation; returns the rotation amount in [1, 15].
static std::optional< unsigned >	matchRowSharePattern (ArrayRef< uint8_t > Ids)
	Match a row-share pattern: all 16 lanes of each row read the same source lane.
static std::optional< unsigned >	matchRowXMaskPattern (ArrayRef< uint8_t > Ids)
	Match an XOR mask pattern within each 16-lane row: Ids[J] == Mask ^ J, with Mask in [1, 15].
static std::optional< unsigned >	matchHalfRowPermPattern (ArrayRef< uint8_t > Ids)
	Match an 8-lane arbitrary permutation, encoded as the v_mov_b32_dpp8 24-bit selector (three bits per output lane).
static uint64_t	computePermlane16Masks (ArrayRef< uint8_t > Ids)
	Pack a 16-lane permutation into a single 64-bit value: four bits per output lane, lane J in bits [J4 + 3 : J4].
static bool	matchHalfWaveSwapPattern (ArrayRef< uint8_t > Ids)
	Match a half-wave swap: lane J reads from lane J ^ 32.
static bool	isCrossRowPattern (ArrayRef< uint8_t > Ids)
	Match a cross-row permutation suitable for v_permlanex16: every lane in the low 16-lane half reads from the high half of its own row, and vice versa.
static std::optional< unsigned >	matchDsSwizzleBitmaskPattern (ArrayRef< uint8_t > Ids)
	Match a DS_SWIZZLE bitmask-mode permutation: dst_lane = ((src_lane & AND) \| OR) ^ XOR with each mask being five bits.
static std::optional< unsigned >	matchDsSwizzleRotatePattern (ArrayRef< uint8_t > Ids)
	Match a GFX9+ DS_SWIZZLE rotate-mode permutation: a cyclic left-rotation of all 32 lanes within each 32-lane group by a constant N in [0, 31], i.e.
static Value *	createUpdateDpp (IRBuilderBase &B, Value *Val, unsigned Ctrl)
	Emit v_mov_b32_dpp with the given control word, row/bank masks 0xF, and bound_ctrl=1 so out-of-bounds lanes are well-defined and the DPP mov can be folded into a consuming VALU op by GCNDPPCombine.
static Value *	createMovDpp8 (IRBuilderBase &B, Value *Val, unsigned Selector)
	Emit v_mov_b32_dpp8 with the given 24-bit lane selector.
static Value *	createPermlane16 (IRBuilderBase &B, Value *Val, uint32_t Lo, uint32_t Hi)
	Emit v_permlane16 with the precomputed lane-select halves.
static Value *	createPermlaneX16 (IRBuilderBase &B, Value *Val, uint32_t Lo, uint32_t Hi)
	Emit v_permlanex16 with the precomputed lane-select halves.
static Value *	createDsSwizzle (IRBuilderBase &B, Value *Val, unsigned Offset, const DataLayout &DL)
	Emit ds_swizzle with the given immediate, bitcasting/converting between pointer/float types and i32 as required by the intrinsic signature.
static Value *	createPermlane64 (IRBuilderBase &B, Value *Val)
	Emit v_permlane64 (swap of the two 32-lane halves of a wave64).
static Value *	matchShuffleToHWIntrinsic (IRBuilderBase &B, Value *Src, ArrayRef< uint8_t > Ids, const GCNSubtarget &ST, const DataLayout &DL)
	Given a shuffle map, try to emit the best hardware intrinsic.
static std::optional< Instruction * >	tryOptimizeShufflePattern (InstCombiner &IC, IntrinsicInst &II, const GCNSubtarget &ST)
	Try to fold a wave_shuffle/ds_bpermute whose lane index is a constant function of the lane ID into a hardware-specific lane permutation intrinsic.

Variables
static constexpr auto	isQuadPattern = isRowPattern<4>
static constexpr auto	isHalfRowPattern = isRowPattern<8>
static constexpr auto	isFullRowPattern = isRowPattern<16>
static constexpr auto	matchHalfRowMirrorPattern = matchMirrorPattern<8>
static constexpr auto	matchFullRowMirrorPattern = matchMirrorPattern<16>

Macro Definition Documentation

◆ DEBUG_TYPE

#define DEBUG_TYPE "AMDGPUtti"

Definition at line 36 of file AMDGPUInstCombineIntrinsic.cpp.

◆ GET_AMDGPUImageDMaskIntrinsicTable_IMPL

#define GET_AMDGPUImageDMaskIntrinsicTable_IMPL

Definition at line 44 of file AMDGPUInstCombineIntrinsic.cpp.

Function Documentation

◆ canContractSqrtToRsq()

bool canContractSqrtToRsq ( const FPMathOperator * SqrtOp )

static

Return true if it's legal to contract llvm.amdgcn.rcp(llvm.sqrt)

Definition at line 526 of file AMDGPUInstCombineIntrinsic.cpp.

References llvm::FPMathOperator::getFPAccuracy(), llvm::Value::getType(), llvm::FPMathOperator::hasApproxFunc(), llvm::Type::isFloatTy(), and llvm::Type::isHalfTy().

Referenced by llvm::GCNTTIImpl::instCombineIntrinsic().

◆ canSafelyConvertTo16Bit()

bool canSafelyConvertTo16Bit	(	Value &	V,
		bool	IsFloat,
		bool	AllowI16SExt = false )

static

Definition at line 73 of file AMDGPUInstCombineIntrinsic.cpp.

References llvm::APFloat::convert(), llvm::dyn_cast(), llvm::APInt::getActiveBits(), llvm::Type::getScalarType(), llvm::Value::getType(), llvm::APFloatBase::IEEEhalf(), llvm::Type::isHalfTy(), llvm::Type::isIntegerTy(), llvm::PatternMatch::m_ExtractElt(), llvm::PatternMatch::m_FPExt(), llvm::PatternMatch::m_SExt(), llvm::PatternMatch::m_Value(), llvm::PatternMatch::m_ZExt(), llvm::PatternMatch::match(), and llvm::APFloatBase::rmTowardZero.

Referenced by simplifyAMDGCNImageIntrinsic().

◆ computePermlane16Masks()

uint64_t computePermlane16Masks ( ArrayRef< uint8_t > Ids )

static

Pack a 16-lane permutation into a single 64-bit value: four bits per output lane, lane J in bits [J*4 + 3 : J*4].

The caller splits it into the low and high 32-bit selector operands of v_permlane16 / v_permlanex16.

Definition at line 839 of file AMDGPUInstCombineIntrinsic.cpp.

Referenced by matchShuffleToHWIntrinsic().

◆ convertTo16Bit()

Value * convertTo16Bit	(	Value &	V,
		InstCombiner::BuilderTy &	Builder )

static

Definition at line 122 of file AMDGPUInstCombineIntrinsic.cpp.

References llvm::cast(), llvm::Type::getHalfTy(), llvm::Type::getInt16Ty(), llvm::User::getOperand(), llvm::isa(), llvm::Type::isFloatingPointTy(), llvm::Type::isIntegerTy(), llvm_unreachable, llvm::PatternMatch::m_ExtractElt(), llvm::PatternMatch::m_Instruction(), llvm::PatternMatch::m_Value(), and llvm::PatternMatch::match().

◆ createDsSwizzle()

Value * createDsSwizzle	(	IRBuilderBase &	B,
		Value *	Val,
		unsigned	Offset,
		const DataLayout &	DL )

static

Emit ds_swizzle with the given immediate, bitcasting/converting between pointer/float types and i32 as required by the intrinsic signature.

Definition at line 972 of file AMDGPUInstCombineIntrinsic.cpp.

References assert(), B(), DL, llvm::Value::getType(), llvm::Type::isPointerTy(), and llvm::Offset.

Referenced by matchShuffleToHWIntrinsic().

◆ createMovDpp8()

Value * createMovDpp8	(	IRBuilderBase &	B,
		Value *	Val,
		unsigned	Selector )

static

Emit v_mov_b32_dpp8 with the given 24-bit lane selector.

Definition at line 946 of file AMDGPUInstCombineIntrinsic.cpp.

References B(), and llvm::Value::getType().

Referenced by matchShuffleToHWIntrinsic().

◆ createPermlane16()

Value * createPermlane16	(	IRBuilderBase &	B,
		Value *	Val,
		uint32_t	Lo,
		uint32_t	Hi )

static

Emit v_permlane16 with the precomputed lane-select halves.

Definition at line 952 of file AMDGPUInstCombineIntrinsic.cpp.

References B(), llvm::PoisonValue::get(), llvm::Value::getType(), llvm::Hi, and llvm::Lo.

Referenced by matchShuffleToHWIntrinsic().

◆ createPermlane64()

Value * createPermlane64	(	IRBuilderBase &	B,
		Value *	Val )

static

Emit v_permlane64 (swap of the two 32-lane halves of a wave64).

Definition at line 993 of file AMDGPUInstCombineIntrinsic.cpp.

References B(), and llvm::Value::getType().

Referenced by matchShuffleToHWIntrinsic().

◆ createPermlaneX16()

Value * createPermlaneX16	(	IRBuilderBase &	B,
		Value *	Val,
		uint32_t	Lo,
		uint32_t	Hi )

static

Emit v_permlanex16 with the precomputed lane-select halves.

Each output lane reads from the other 16-lane half of the same row.

Definition at line 962 of file AMDGPUInstCombineIntrinsic.cpp.

References B(), llvm::PoisonValue::get(), llvm::Value::getType(), llvm::Hi, and llvm::Lo.

Referenced by matchShuffleToHWIntrinsic().

◆ createUpdateDpp()

Value * createUpdateDpp	(	IRBuilderBase &	B,
		Value *	Val,
		unsigned	Ctrl )

static

Emit v_mov_b32_dpp with the given control word, row/bank masks 0xF, and bound_ctrl=1 so out-of-bounds lanes are well-defined and the DPP mov can be folded into a consuming VALU op by GCNDPPCombine.

Definition at line 938 of file AMDGPUInstCombineIntrinsic.cpp.

References B(), llvm::PoisonValue::get(), and llvm::Value::getType().

Referenced by matchShuffleToHWIntrinsic().

◆ defaultComponentBroadcast()

APInt defaultComponentBroadcast ( Value * V )

static

Definition at line 492 of file AMDGPUInstCombineIntrinsic.cpp.

References llvm::cast(), llvm::APInt::clearBit(), llvm::dyn_cast(), llvm::findScalarElement(), llvm::APInt::getAllOnes(), I, llvm::isa(), and llvm::PoisonMaskElem.

Referenced by llvm::GCNTTIImpl::instCombineIntrinsic().

◆ evalLaneExpr()

std::optional< unsigned > evalLaneExpr	(	Value *	V,
		unsigned	Lane,
		const GCNSubtarget &	ST,
		const DataLayout &	DL,
		unsigned	Depth = 0 )

static

Evaluate V as a function of the lane ID and return its value on Lane, or std::nullopt if V is not a closed-form expression of the lane ID.

Definition at line 693 of file AMDGPUInstCombineIntrinsic.cpp.

References AbstractManglingParser< Derived, Alloc >::Ops, llvm::ConstantFoldInstOperands(), llvm::Depth, DL, llvm::dyn_cast(), llvm::dyn_cast_or_null(), evalLaneExpr(), llvm::User::getOperand(), llvm::Value::getType(), llvm::isa(), isThreadID(), LHS, llvm::MaxAnalysisRecursionDepth, and RHS.

Referenced by evalLaneExpr(), tryBuildShuffleMap(), and tryOptimizeShufflePattern().

◆ fmed3AMDGCN()

APFloat fmed3AMDGCN	(	const APFloat &	Src0,
		const APFloat &	Src1,
		const APFloat &	Src2 )

static

Definition at line 53 of file AMDGPUInstCombineIntrinsic.cpp.

References assert(), llvm::APFloat::bitwiseIsEqual(), llvm::APFloat::isNaN(), and llvm::maxnum().

Referenced by llvm::GCNTTIImpl::instCombineIntrinsic().

◆ hasPeriodicLayout()

template<unsigned Period>

bool hasPeriodicLayout ( ArrayRef< uint8_t > Ids )

static

Lanes are partitioned into groups of Period; each group is a translated copy of the first: Ids[I] = Ids[I % Period] + (I & ~(Period - 1)).

Definition at line 750 of file AMDGPUInstCombineIntrinsic.cpp.

References E(), I, llvm::isPowerOf2_32(), and llvm::ArrayRef< T >::size().

Referenced by isCrossRowPattern(), isRowPattern(), matchDsSwizzleBitmaskPattern(), and matchDsSwizzleRotatePattern().

◆ isCrossRowPattern()

bool isCrossRowPattern ( ArrayRef< uint8_t > Ids )

static

Match a cross-row permutation suitable for v_permlanex16: every lane in the low 16-lane half reads from the high half of its own row, and vice versa.

Definition at line 860 of file AMDGPUInstCombineIntrinsic.cpp.

References hasPeriodicLayout().

Referenced by matchShuffleToHWIntrinsic().

◆ isRowPattern()

template<unsigned N>

bool isRowPattern ( ArrayRef< uint8_t > Ids )

static

Match an N-lane row pattern: each lane in [0, N) reads from a source lane in the same N-lane row, and the pattern repeats periodically across rows.

Definition at line 760 of file AMDGPUInstCombineIntrinsic.cpp.

References hasPeriodicLayout(), I, and N.

Referenced by matchMirrorPattern().

◆ isThreadID()

bool isThreadID	(	const GCNSubtarget &	ST,
		Value *	V )

static

Definition at line 592 of file AMDGPUInstCombineIntrinsic.cpp.

References llvm::PatternMatch::m_ConstantInt(), llvm::PatternMatch::m_Intrinsic(), and llvm::PatternMatch::match().

Referenced by evalLaneExpr().

◆ isTriviallyUniform()

bool isTriviallyUniform ( const Use & U )

static

Return true if we can easily prove that use U is uniform.

Definition at line 533 of file AMDGPUInstCombineIntrinsic.cpp.

References A(), llvm::cast(), llvm::dyn_cast(), II, llvm::isa(), llvm::AMDGPU::isArgPassedInSGPR(), and llvm::AMDGPU::isIntrinsicAlwaysUniform().

Referenced by llvm::GCNTTIImpl::hoistLaneIntrinsicThroughOperand(), and llvm::GCNTTIImpl::instCombineIntrinsic().

◆ matchDsSwizzleBitmaskPattern()

std::optional< unsigned > matchDsSwizzleBitmaskPattern ( ArrayRef< uint8_t > Ids )

static

Match a DS_SWIZZLE bitmask-mode permutation: dst_lane = ((src_lane & AND) | OR) ^ XOR with each mask being five bits.

Returns the encoded swizzle immediate. The hardware applies the formula independently within each 32-lane group, so on wave64 the high group must replicate the low one (translated by 32).

Definition at line 878 of file AMDGPUInstCombineIntrinsic.cpp.

References B(), llvm::AMDGPU::Swizzle::BITMASK_AND_SHIFT, llvm::AMDGPU::Swizzle::BITMASK_OR_SHIFT, llvm::AMDGPU::Swizzle::BITMASK_PERM_ENC, llvm::AMDGPU::Swizzle::BITMASK_XOR_SHIFT, hasPeriodicLayout(), I, and llvm::seq().

Referenced by matchShuffleToHWIntrinsic().

◆ matchDsSwizzleRotatePattern()

std::optional< unsigned > matchDsSwizzleRotatePattern ( ArrayRef< uint8_t > Ids )

static

Match a GFX9+ DS_SWIZZLE rotate-mode permutation: a cyclic left-rotation of all 32 lanes within each 32-lane group by a constant N in [0, 31], i.e.

dst_lane = (src_lane + N) % 32. On wave64, hasPeriodicLayout<32> ensures both 32-lane groups rotate by the same amount.

Definition at line 917 of file AMDGPUInstCombineIntrinsic.cpp.

References hasPeriodicLayout(), I, N, llvm::AMDGPU::Swizzle::ROTATE_MODE_ENC, and llvm::AMDGPU::Swizzle::ROTATE_SIZE_SHIFT.

Referenced by matchShuffleToHWIntrinsic().

◆ matchFPExtFromF16()

Value * matchFPExtFromF16 ( Value * Arg )

static

Match an fpext from half to float, or a constant we can convert.

Definition at line 448 of file AMDGPUInstCombineIntrinsic.cpp.

References llvm::APFloat::convert(), llvm::Value::getContext(), llvm::Type::getHalfTy(), llvm::ConstantFP::getValueAPF(), llvm::APFloatBase::IEEEhalf(), llvm::PatternMatch::m_ConstantFP(), llvm::PatternMatch::m_FPExt(), llvm::MIPatternMatch::m_OneUse(), llvm::PatternMatch::m_Value(), llvm::PatternMatch::match(), and llvm::APFloatBase::rmNearestTiesToEven.

Referenced by llvm::GCNTTIImpl::instCombineIntrinsic().

◆ matchHalfRowPermPattern()

std::optional< unsigned > matchHalfRowPermPattern ( ArrayRef< uint8_t > Ids )

static

Match an 8-lane arbitrary permutation, encoded as the v_mov_b32_dpp8 24-bit selector (three bits per output lane).

Definition at line 827 of file AMDGPUInstCombineIntrinsic.cpp.

References isHalfRowPattern.

Referenced by matchShuffleToHWIntrinsic().

◆ matchHalfWaveSwapPattern()

bool matchHalfWaveSwapPattern ( ArrayRef< uint8_t > Ids )

static

Match a half-wave swap: lane J reads from lane J ^ 32.

Only meaningful on wave64 targets.

Definition at line 848 of file AMDGPUInstCombineIntrinsic.cpp.

References llvm::ArrayRef< T >::size().

Referenced by matchShuffleToHWIntrinsic().

◆ matchMirrorPattern()

template<unsigned N>

bool matchMirrorPattern ( ArrayRef< uint8_t > Ids )

static

Match an N-lane reversal (mirror) pattern.

Definition at line 781 of file AMDGPUInstCombineIntrinsic.cpp.

References isRowPattern(), and N.

◆ matchQuadPermPattern()

std::optional< unsigned > matchQuadPermPattern ( ArrayRef< uint8_t > Ids )

static

Match a 4-lane (quad) permutation, encoded as the v_mov_b32_dpp QUAD_PERM control word: bits[1:0]=Ids[0], [3:2]=Ids[1], [5:4]=Ids[2], [7:6]=Ids[3].

Definition at line 774 of file AMDGPUInstCombineIntrinsic.cpp.

References isQuadPattern.

Referenced by matchShuffleToHWIntrinsic().

◆ matchRowRotatePattern()

std::optional< unsigned > matchRowRotatePattern ( ArrayRef< uint8_t > Ids )

static

Match a 16-lane cyclic rotation; returns the rotation amount in [1, 15].

Definition at line 794 of file AMDGPUInstCombineIntrinsic.cpp.

References isFullRowPattern.

Referenced by matchShuffleToHWIntrinsic().

◆ matchRowSharePattern()

std::optional< unsigned > matchRowSharePattern ( ArrayRef< uint8_t > Ids )

static

Match a row-share pattern: all 16 lanes of each row read the same source lane.

Returns the shared source lane index in [0, 16).

Definition at line 805 of file AMDGPUInstCombineIntrinsic.cpp.

References llvm::all_equal(), isFullRowPattern, and llvm::ArrayRef< T >::take_front().

Referenced by matchShuffleToHWIntrinsic().

◆ matchRowXMaskPattern()

std::optional< unsigned > matchRowXMaskPattern ( ArrayRef< uint8_t > Ids )

static

Match an XOR mask pattern within each 16-lane row: Ids[J] == Mask ^ J, with Mask in [1, 15].

Definition at line 815 of file AMDGPUInstCombineIntrinsic.cpp.

References isFullRowPattern.

Referenced by matchShuffleToHWIntrinsic().

◆ matchShuffleToHWIntrinsic()

Value * matchShuffleToHWIntrinsic	(	IRBuilderBase &	B,
		Value *	Src,
		ArrayRef< uint8_t >	Ids,
		const GCNSubtarget &	ST,
		const DataLayout &	DL )

static

Given a shuffle map, try to emit the best hardware intrinsic.

Definition at line 999 of file AMDGPUInstCombineIntrinsic.cpp.

References llvm::all_equal(), llvm::all_of(), B(), computePermlane16Masks(), createDsSwizzle(), createMovDpp8(), createPermlane16(), createPermlane64(), createPermlaneX16(), createUpdateDpp(), DL, E(), llvm::enumerate(), llvm::Hi_32(), isCrossRowPattern(), isFullRowPattern, llvm::Lo_32(), matchDsSwizzleBitmaskPattern(), matchDsSwizzleRotatePattern(), matchFullRowMirrorPattern, matchHalfRowMirrorPattern, matchHalfRowPermPattern(), matchHalfWaveSwapPattern(), matchQuadPermPattern(), matchRowRotatePattern(), matchRowSharePattern(), matchRowXMaskPattern(), llvm::AMDGPU::Swizzle::QUAD_PERM_ENC, llvm::AMDGPU::DPP::ROW_HALF_MIRROR, llvm::AMDGPU::DPP::ROW_MIRROR, llvm::AMDGPU::DPP::ROW_ROR_FIRST, llvm::AMDGPU::DPP::ROW_SHARE_FIRST, and llvm::AMDGPU::DPP::ROW_XMASK_FIRST.

Referenced by tryOptimizeShufflePattern().

◆ modifyIntrinsicCall()

std::optional< Instruction * > modifyIntrinsicCall	(	IntrinsicInst &	OldIntr,
		Instruction &	InstToReplace,
		unsigned	NewIntr,
		InstCombiner &	IC,
		std::function< void(SmallVectorImpl< Value * > &, SmallVectorImpl< Type * > &)>	Func )

static

Applies Func(OldIntr.Args, OldIntr.ArgTys), creates intrinsic call with modified arguments (based on OldIntr) and replaces InstToReplace with this newly created intrinsic call.

Definition at line 144 of file AMDGPUInstCombineIntrinsic.cpp.

References llvm::CallBase::args(), llvm::InstCombiner::Builder, llvm::Instruction::copyFastMathFlags(), llvm::Instruction::copyMetadata(), llvm::IRBuilderBase::CreateIntrinsicWithoutFolding(), llvm::InstCombiner::eraseInstFromFunction(), llvm::CallBase::getAttributes(), llvm::CallBase::getCalledFunction(), llvm::Value::getType(), llvm::isa(), llvm::Intrinsic::isSignatureValid(), llvm::Type::isVoidTy(), llvm::InstCombiner::replaceInstUsesWith(), llvm::CallBase::setAttributes(), and llvm::Value::takeName().

Referenced by simplifyAMDGCNImageIntrinsic().

◆ rewriteCall()

CallInst * rewriteCall	(	IRBuilderBase &	B,
		CallInst &	Old,
		Function &	NewCallee,
		ArrayRef< Value * >	Ops )

static

Definition at line 580 of file AMDGPUInstCombineIntrinsic.cpp.

References AbstractManglingParser< Derived, Alloc >::Ops, B(), llvm::CallBase::getOperandBundlesAsDefs(), and llvm::Value::takeName().

Referenced by llvm::GCNTTIImpl::hoistLaneIntrinsicThroughOperand().

◆ simplifyAMDGCNImageIntrinsic()

std::optional< Instruction * > simplifyAMDGCNImageIntrinsic	(	const GCNSubtarget *	ST,
		const AMDGPU::ImageDimIntrinsicInfo *	ImageDimIntr,
		IntrinsicInst &	II,
		InstCombiner &	IC )

static

Definition at line 182 of file AMDGPUInstCombineIntrinsic.cpp.

Referenced by llvm::GCNTTIImpl::instCombineIntrinsic().

◆ simplifyAMDGCNMemoryIntrinsicDemanded()

Value * simplifyAMDGCNMemoryIntrinsicDemanded	(	InstCombiner &	IC,
		IntrinsicInst &	II,
		APInt	DemandedElts,
		int	DMaskIdx,
		bool	IsLoad )

static

Implement SimplifyDemandedVectorElts for amdgcn buffer and image intrinsics.

The result of simplifying amdgcn image and buffer store intrinsics is updating definitions of the intrinsics vector argument, not Uses of the result like image and buffer loads. Note: This only supports non-TFE/LWE image intrinsic calls; those have struct returns.

Definition at line 2303 of file AMDGPUInstCombineIntrinsic.cpp.

References llvm::InstCombiner::Builder, llvm::cast(), llvm::Instruction::copyMetadata(), llvm::APInt::countr_zero(), llvm::IRBuilderBase::CreateAdd(), llvm::IRBuilderBase::CreateExtractElement(), llvm::IRBuilderBase::CreateInsertElement(), llvm::IRBuilderBase::CreateIntrinsicWithoutFolding(), llvm::IRBuilderBase::CreateShuffleVector(), llvm::FixedVectorType::get(), llvm::PoisonValue::get(), llvm::APInt::getActiveBits(), llvm::InstCombiner::getDataLayout(), llvm::Value::getType(), llvm::DataLayout::getTypeSizeInBits(), llvm::ConstantInt::getZExtValue(), II, llvm::APInt::isMask(), llvm::Intrinsic::isSignatureValid(), llvm::Offset, llvm::APInt::popcount(), llvm::popcount(), llvm::SmallVectorTemplateBase< T, bool >::push_back(), llvm::CallBase::setAttributes(), llvm::IRBuilderBase::SetInsertPoint(), and llvm::Value::takeName().