37#define VPINTERNAL_VPLEGAL_CASES \
38 VPINTERNAL_CASE(Legal) \
39 VPINTERNAL_CASE(Discard) \
40 VPINTERNAL_CASE(Convert)
42#define VPINTERNAL_CASE(X) "|" #X
48 ". If non-empty, ignore "
49 "TargetTransformInfo and "
50 "always use this transformation for the %evl parameter (Used in "
56 ". If non-empty, Ignore "
57 "TargetTransformInfo and "
58 "always use this transformation for the %mask parameter (Used in "
62#define VPINTERNAL_CASE(X) .Case(#X, VPLegalization::X)
68#undef VPINTERNAL_VPLEGAL_CASES
75#define DEBUG_TYPE "expandvp"
77STATISTIC(NumFoldedVL,
"Number of folded vector length params");
78STATISTIC(NumLoweredVPOps,
"Number of folded vector predication operations");
85 if (
auto *ConstValue = dyn_cast<Constant>(SplattedVal))
86 return ConstValue->isAllOnesValue();
94 return ConstantInt::get(DivTy, 1u,
false);
99 auto *NewInst = dyn_cast<Instruction>(&NewVal);
100 if (!NewInst || !isa<FPMathOperator>(NewVal))
103 auto *OldFMOp = dyn_cast<FPMathOperator>(&VPI);
107 NewInst->setFastMathFlags(OldFMOp->getFastMathFlags());
120 if (isa<VPReductionIntrinsic>(VPI))
125 .
hasFnAttr(Attribute::AttrKind::Speculatable);
136struct CachingVPExpander {
166 std::pair<Value *, bool> foldEVLIntoMask(
VPIntrinsic &VPI);
179 unsigned UnpredicatedIntrinsicID);
183 unsigned UnpredicatedIntrinsicID);
207 bool UsingTTIOverrides;
225 for (
unsigned Idx = 0;
Idx < NumElems; ++
Idx)
226 ConstElems.
push_back(ConstantInt::get(LaneTy,
Idx,
false));
238 Type *BoolVecTy = VectorType::get(Builder.
getInt1Ty(), ElemCount);
240 M, Intrinsic::get_active_lane_mask, {BoolVecTy, EVLParam->
getType()});
243 return Builder.
CreateCall(ActiveMaskFunc, {ConstZero, EVLParam});
250 Value *IdxVec = createStepVector(Builder, LaneTy, NumElems);
255CachingVPExpander::expandPredicationInBinaryOperator(
IRBuilder<> &Builder,
258 "Implicitly dropping %evl in non-speculatable operator!");
275 case Instruction::UDiv:
276 case Instruction::SDiv:
277 case Instruction::URem:
278 case Instruction::SRem:
291Value *CachingVPExpander::expandPredicationToIntCall(
293 switch (UnpredicatedIntrinsicID) {
295 case Intrinsic::smax:
296 case Intrinsic::smin:
297 case Intrinsic::umax:
298 case Intrinsic::umin: {
302 VPI.
getModule(), UnpredicatedIntrinsicID, {VPI.getType()});
307 case Intrinsic::bswap:
308 case Intrinsic::bitreverse: {
311 VPI.
getModule(), UnpredicatedIntrinsicID, {VPI.getType()});
320Value *CachingVPExpander::expandPredicationToFPCall(
323 "Implicitly dropping %evl in non-speculatable operator!");
325 switch (UnpredicatedIntrinsicID) {
326 case Intrinsic::fabs:
327 case Intrinsic::sqrt: {
330 VPI.
getModule(), UnpredicatedIntrinsicID, {VPI.getType()});
335 case Intrinsic::maxnum:
336 case Intrinsic::minnum: {
340 VPI.
getModule(), UnpredicatedIntrinsicID, {VPI.getType()});
346 case Intrinsic::fmuladd:
347 case Intrinsic::experimental_constrained_fma:
348 case Intrinsic::experimental_constrained_fmuladd: {
353 VPI.
getModule(), UnpredicatedIntrinsicID, {VPI.getType()});
370 bool Negative =
false;
376 case Intrinsic::vp_reduce_add:
377 case Intrinsic::vp_reduce_or:
378 case Intrinsic::vp_reduce_xor:
379 case Intrinsic::vp_reduce_umax:
381 case Intrinsic::vp_reduce_mul:
382 return ConstantInt::get(EltTy, 1,
false);
383 case Intrinsic::vp_reduce_and:
384 case Intrinsic::vp_reduce_umin:
385 return ConstantInt::getAllOnesValue(EltTy);
386 case Intrinsic::vp_reduce_smin:
389 case Intrinsic::vp_reduce_smax:
392 case Intrinsic::vp_reduce_fmax:
393 case Intrinsic::vp_reduce_fmaximum:
396 case Intrinsic::vp_reduce_fmin:
397 case Intrinsic::vp_reduce_fminimum: {
398 bool PropagatesNaN = VID == Intrinsic::vp_reduce_fminimum ||
399 VID == Intrinsic::vp_reduce_fmaximum;
402 return (!
Flags.noNaNs() && !PropagatesNaN)
407 APFloat::getLargest(Semantics, Negative));
409 case Intrinsic::vp_reduce_fadd:
411 case Intrinsic::vp_reduce_fmul:
412 return ConstantFP::get(EltTy, 1.0);
417CachingVPExpander::expandPredicationInReduction(
IRBuilder<> &Builder,
420 "Implicitly dropping %evl in non-speculatable operator!");
427 auto *NeutralElt = getNeutralReductionElement(VPI, VPI.
getType());
429 cast<VectorType>(RedOp->
getType())->getElementCount(), NeutralElt);
430 RedOp = Builder.
CreateSelect(Mask, RedOp, NeutralVector);
439 case Intrinsic::vp_reduce_add:
443 case Intrinsic::vp_reduce_mul:
447 case Intrinsic::vp_reduce_and:
451 case Intrinsic::vp_reduce_or:
455 case Intrinsic::vp_reduce_xor:
459 case Intrinsic::vp_reduce_smax:
464 case Intrinsic::vp_reduce_smin:
469 case Intrinsic::vp_reduce_umax:
474 case Intrinsic::vp_reduce_umin:
479 case Intrinsic::vp_reduce_fmax:
485 case Intrinsic::vp_reduce_fmin:
491 case Intrinsic::vp_reduce_fmaximum:
497 case Intrinsic::vp_reduce_fminimum:
503 case Intrinsic::vp_reduce_fadd:
506 case Intrinsic::vp_reduce_fmul:
515Value *CachingVPExpander::expandPredicationToCastIntrinsic(
IRBuilder<> &Builder,
517 Value *CastOp =
nullptr;
521 case Intrinsic::vp_sext:
525 case Intrinsic::vp_zext:
529 case Intrinsic::vp_trunc:
533 case Intrinsic::vp_inttoptr:
537 case Intrinsic::vp_ptrtoint:
541 case Intrinsic::vp_fptosi:
546 case Intrinsic::vp_fptoui:
550 case Intrinsic::vp_sitofp:
554 case Intrinsic::vp_uitofp:
558 case Intrinsic::vp_fptrunc:
562 case Intrinsic::vp_fpext:
572CachingVPExpander::expandPredicationInMemoryIntrinsic(
IRBuilder<> &Builder,
585 Value *NewMemoryInst =
nullptr;
589 case Intrinsic::vp_store:
593 if (AlignOpt.has_value())
595 NewMemoryInst = NewStore;
598 DataParam, PtrParam, AlignOpt.
valueOrOne(), MaskParam);
601 case Intrinsic::vp_load:
605 if (AlignOpt.has_value())
607 NewMemoryInst = NewLoad;
613 case Intrinsic::vp_scatter: {
615 cast<VectorType>(DataParam->
getType())->getElementType();
618 AlignOpt.value_or(
DL.getPrefTypeAlign(ElementType)), MaskParam);
621 case Intrinsic::vp_gather: {
625 AlignOpt.value_or(
DL.getPrefTypeAlign(ElementType)), MaskParam,
nullptr,
633 return NewMemoryInst;
636Value *CachingVPExpander::expandPredicationInComparison(
IRBuilder<> &Builder,
639 "Implicitly dropping %evl in non-speculatable operator!");
648 auto *NewCmp = Builder.
CreateCmp(Pred, Op0, Op1);
654bool CachingVPExpander::discardEVLParameter(
VPIntrinsic &VPI) {
665 Value *MaxEVL =
nullptr;
675 MaxEVL = Builder.
CreateMul(VScale, FactorConst,
"scalable_size",
678 MaxEVL = ConstantInt::get(Int32Ty, StaticElemCount.
getFixedValue(),
false);
684std::pair<Value *, bool> CachingVPExpander::foldEVLIntoMask(
VPIntrinsic &VPI) {
691 return {&VPI,
false};
696 assert(OldMaskParam &&
"no mask param to fold the vl param into");
697 assert(OldEVLParam &&
"no EVL param to fold away");
704 Value *VLMask = convertEVLToMask(Builder, OldEVLParam, ElemCount);
709 discardEVLParameter(VPI);
711 "transformation did not render the evl param ineffective!");
718 LLVM_DEBUG(
dbgs() <<
"Lowering to unpredicated op: " << VPI <<
'\n');
726 return expandPredicationInBinaryOperator(Builder, VPI);
728 if (
auto *VPRI = dyn_cast<VPReductionIntrinsic>(&VPI))
729 return expandPredicationInReduction(Builder, *VPRI);
731 if (
auto *VPCmp = dyn_cast<VPCmpIntrinsic>(&VPI))
732 return expandPredicationInComparison(Builder, *VPCmp);
735 return expandPredicationToCastIntrinsic(Builder, VPI);
741 case Intrinsic::vp_fneg: {
746 case Intrinsic::vp_abs:
747 case Intrinsic::vp_smax:
748 case Intrinsic::vp_smin:
749 case Intrinsic::vp_umax:
750 case Intrinsic::vp_umin:
751 case Intrinsic::vp_bswap:
752 case Intrinsic::vp_bitreverse:
753 return expandPredicationToIntCall(Builder, VPI,
755 case Intrinsic::vp_fabs:
756 case Intrinsic::vp_sqrt:
757 case Intrinsic::vp_maxnum:
758 case Intrinsic::vp_minnum:
759 case Intrinsic::vp_maximum:
760 case Intrinsic::vp_minimum:
761 case Intrinsic::vp_fma:
762 case Intrinsic::vp_fmuladd:
763 return expandPredicationToFPCall(Builder, VPI,
765 case Intrinsic::vp_load:
766 case Intrinsic::vp_store:
767 case Intrinsic::vp_gather:
768 case Intrinsic::vp_scatter:
769 return expandPredicationInMemoryIntrinsic(Builder, VPI);
773 if (
Value *Call = expandPredicationToFPCall(Builder, VPI, *CID))
803CachingVPExpander::getVPLegalizationStrategy(
const VPIntrinsic &VPI)
const {
818CachingVPExpander::expandVectorPredication(
VPIntrinsic &VPI) {
819 auto Strategy = getVPLegalizationStrategy(VPI);
820 sanitizeStrategy(VPI, Strategy);
825 switch (Strategy.EVLParamStrategy) {
829 if (discardEVLParameter(VPI))
830 Changed = VPExpansionDetails::IntrinsicUpdated;
833 if (
auto [NewVPI, Folded] = foldEVLIntoMask(VPI); Folded) {
835 Changed = VPExpansionDetails::IntrinsicUpdated;
842 switch (Strategy.OpStrategy) {
848 if (
Value *V = expandPredication(VPI);
V != &VPI) {
850 Changed = VPExpansionDetails::IntrinsicReplaced;
862 return CachingVPExpander(
TTI).expandVectorPredication(VPI);
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
#define LLVM_LIKELY(EXPR)
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static VPTransform parseOverrideOption(const std::string &TextOpt)
static cl::opt< std::string > MaskTransformOverride("expandvp-override-mask-transform", cl::init(""), cl::Hidden, cl::desc("Options: <empty>" VPINTERNAL_VPLEGAL_CASES ". If non-empty, Ignore " "TargetTransformInfo and " "always use this transformation for the %mask parameter (Used in " "testing)."))
static cl::opt< std::string > EVLTransformOverride("expandvp-override-evl-transform", cl::init(""), cl::Hidden, cl::desc("Options: <empty>" VPINTERNAL_VPLEGAL_CASES ". If non-empty, ignore " "TargetTransformInfo and " "always use this transformation for the %evl parameter (Used in " "testing)."))
static void replaceOperation(Value &NewOp, VPIntrinsic &OldOp)
Transfer all properties from OldOp to NewOp and replace all uses.
static bool isAllTrueMask(Value *MaskVal)
static void transferDecorations(Value &NewVal, VPIntrinsic &VPI)
Transfer operation properties from OldVPI to NewVal.
static bool anyExpandVPOverridesSet()
static bool maySpeculateLanes(VPIntrinsic &VPI)
static Constant * getSafeDivisor(Type *DivTy)
#define VPINTERNAL_VPLEGAL_CASES
loop Loop Strength Reduction
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the function.
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
@ ICMP_ULT
unsigned less than
ConstantFP - Floating Point Values [float, double].
static Constant * getNegativeZero(Type *Ty)
static Constant * getQNaN(Type *Ty, bool Negative=false, APInt *Payload=nullptr)
static Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
This class represents an Operation in the Expression.
Convenience struct for specifying and reasoning about fast-math flags.
CallInst * CreateMulReduce(Value *Src)
Create a vector int mul reduction intrinsic of the source vector.
CallInst * CreateFAddReduce(Value *Acc, Value *Src)
Create a sequential vector fadd reduction intrinsic of the source vector.
Value * CreateBinaryIntrinsic(Intrinsic::ID ID, Value *LHS, Value *RHS, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with 2 operands which is mangled on the first type.
IntegerType * getInt1Ty()
Fetch the type representing a single bit.
Value * CreateSIToFP(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateFPTrunc(Value *V, Type *DestTy, const Twine &Name="")
CallInst * CreateAndReduce(Value *Src)
Create a vector int AND reduction intrinsic of the source vector.
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
CallInst * CreateConstrainedFPCall(Function *Callee, ArrayRef< Value * > Args, const Twine &Name="", std::optional< RoundingMode > Rounding=std::nullopt, std::optional< fp::ExceptionBehavior > Except=std::nullopt)
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Value * CreateFPToUI(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateIntToPtr(Value *V, Type *DestTy, const Twine &Name="")
CallInst * CreateAddReduce(Value *Src)
Create a vector int add reduction intrinsic of the source vector.
Value * CreateUIToFP(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
BasicBlock * GetInsertBlock() const
CallInst * CreateXorReduce(Value *Src)
Create a vector int XOR reduction intrinsic of the source vector.
CallInst * CreateOrReduce(Value *Src)
Create a vector int OR reduction intrinsic of the source vector.
CallInst * CreateFPMinReduce(Value *Src)
Create a vector float min reduction intrinsic of the source vector.
CallInst * CreateFPMaximumReduce(Value *Src)
Create a vector float maximum reduction intrinsic of the source vector.
CallInst * CreateFPMaxReduce(Value *Src)
Create a vector float max reduction intrinsic of the source vector.
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
CallInst * CreateIntMaxReduce(Value *Src, bool IsSigned=false)
Create a vector integer max reduction intrinsic of the source vector.
CallInst * CreateMaskedStore(Value *Val, Value *Ptr, Align Alignment, Value *Mask)
Create a call to Masked Store intrinsic.
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
CallInst * CreateIntMinReduce(Value *Src, bool IsSigned=false)
Create a vector integer min reduction intrinsic of the source vector.
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateFPExt(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
CallInst * CreateFMulReduce(Value *Acc, Value *Src)
Create a sequential vector fmul reduction intrinsic of the source vector.
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateFNeg(Value *V, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
CallInst * CreateMaskedScatter(Value *Val, Value *Ptrs, Align Alignment, Value *Mask=nullptr)
Create a call to Masked Scatter intrinsic.
CallInst * CreateFPMinimumReduce(Value *Src)
Create a vector float minimum reduction intrinsic of the source vector.
CallInst * CreateMaskedGather(Type *Ty, Value *Ptrs, Align Alignment, Value *Mask=nullptr, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Gather intrinsic.
Value * CreateFPToSI(Value *V, Type *DestTy, const Twine &Name="")
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
An instruction for reading from memory.
void setAlignment(Align Align)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setAlignment(Align Align)
A switch()-like statement whose cases are string literals.
The instances of the Type class are immutable: once they are created, they are never changed.
const fltSemantics & getFltSemantics() const
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
static IntegerType * getInt32Ty(LLVMContext &C)
Value * getOperand(unsigned i) const
static bool isVPCast(Intrinsic::ID ID)
CmpInst::Predicate getPredicate() const
This is the common base class for vector predication intrinsics.
std::optional< unsigned > getFunctionalIntrinsicID() const
bool canIgnoreVectorLengthParam() const
void setMaskParam(Value *)
Value * getVectorLengthParam() const
void setVectorLengthParam(Value *)
Value * getMemoryDataParam() const
Value * getMemoryPointerParam() const
std::optional< unsigned > getConstrainedIntrinsicID() const
MaybeAlign getPointerAlignment() const
Value * getMaskParam() const
ElementCount getStaticVectorLength() const
std::optional< unsigned > getFunctionalOpcode() const
This represents vector predication reduction intrinsics.
unsigned getStartParamPos() const
unsigned getVectorParamPos() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVMContext & getContext() const
All values hold a context through their type.
StringRef getName() const
Return a constant reference to the value's name.
constexpr ScalarTy getFixedValue() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
const ParentTy * getParent() const
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
bool isConstrainedFPIntrinsic(ID QID)
Returns true if the intrinsic ID is for one of the "Constrained Floating-Point Intrinsics".
AttributeList getAttributes(LLVMContext &C, ID id)
Return the attributes for an intrinsic.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
initializer< Ty > init(const Ty &Val)
ElementType
The element type of an SRV or UAV resource.
This is an optimization pass for GlobalISel generic memory operations.
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
bool isSafeToSpeculativelyExecuteWithOpcode(unsigned Opcode, const Instruction *Inst, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true)
This returns the same result as isSafeToSpeculativelyExecute if Opcode is the actual opcode of Inst.
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
VPExpansionDetails expandVectorPredicationIntrinsic(VPIntrinsic &VPI, const TargetTransformInfo &TTI)
Expand a vector predication intrinsic.
VPExpansionDetails
Represents the details the expansion of a VP intrinsic.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.