37#define VPINTERNAL_VPLEGAL_CASES \
38 VPINTERNAL_CASE(Legal) \
39 VPINTERNAL_CASE(Discard) \
40 VPINTERNAL_CASE(Convert)
42#define VPINTERNAL_CASE(X) "|" #X
48 ". If non-empty, ignore "
49 "TargetTransformInfo and "
50 "always use this transformation for the %evl parameter (Used in "
56 ". If non-empty, Ignore "
57 "TargetTransformInfo and "
58 "always use this transformation for the %mask parameter (Used in "
62#define VPINTERNAL_CASE(X) .Case(#X, VPLegalization::X)
68#undef VPINTERNAL_VPLEGAL_CASES
75#define DEBUG_TYPE "expandvp"
77STATISTIC(NumFoldedVL,
"Number of folded vector length params");
78STATISTIC(NumLoweredVPOps,
"Number of folded vector predication operations");
85 if (
auto *ConstValue = dyn_cast<Constant>(SplattedVal))
86 return ConstValue->isAllOnesValue();
94 return ConstantInt::get(DivTy, 1u,
false);
99 auto *NewInst = dyn_cast<Instruction>(&NewVal);
100 if (!NewInst || !isa<FPMathOperator>(NewVal))
103 auto *OldFMOp = dyn_cast<FPMathOperator>(&VPI);
107 NewInst->setFastMathFlags(OldFMOp->getFastMathFlags());
120 if (isa<VPReductionIntrinsic>(VPI))
125 .
hasFnAttr(Attribute::AttrKind::Speculatable);
136struct CachingVPExpander {
166 std::pair<Value *, bool> foldEVLIntoMask(
VPIntrinsic &VPI);
182 unsigned UnpredicatedIntrinsicID);
206 bool UsingTTIOverrides;
224 for (
unsigned Idx = 0;
Idx < NumElems; ++
Idx)
225 ConstElems.
push_back(ConstantInt::get(LaneTy,
Idx,
false));
236 Type *BoolVecTy = VectorType::get(Builder.
getInt1Ty(), ElemCount);
240 {BoolVecTy, EVLParam->
getType()},
241 {ConstZero, EVLParam});
248 Value *IdxVec = createStepVector(Builder, LaneTy, NumElems);
253CachingVPExpander::expandPredicationInBinaryOperator(
IRBuilder<> &Builder,
256 "Implicitly dropping %evl in non-speculatable operator!");
273 case Instruction::UDiv:
274 case Instruction::SDiv:
275 case Instruction::URem:
276 case Instruction::SRem:
304Value *CachingVPExpander::expandPredicationToFPCall(
307 "Implicitly dropping %evl in non-speculatable operator!");
309 switch (UnpredicatedIntrinsicID) {
310 case Intrinsic::fabs:
311 case Intrinsic::sqrt:
312 case Intrinsic::maxnum:
313 case Intrinsic::minnum: {
325 case Intrinsic::fmuladd:
326 case Intrinsic::experimental_constrained_fma:
327 case Intrinsic::experimental_constrained_fmuladd: {
332 VPI.
getModule(), UnpredicatedIntrinsicID, {VPI.getType()});
351 if (isa<FPMathOperator>(VPI))
357CachingVPExpander::expandPredicationInReduction(
IRBuilder<> &Builder,
360 "Implicitly dropping %evl in non-speculatable operator!");
367 auto *NeutralElt = getNeutralReductionElement(VPI, VPI.
getType());
369 cast<VectorType>(RedOp->
getType())->getElementCount(), NeutralElt);
370 RedOp = Builder.
CreateSelect(Mask, RedOp, NeutralVector);
379 case Intrinsic::vp_reduce_add:
380 case Intrinsic::vp_reduce_mul:
381 case Intrinsic::vp_reduce_and:
382 case Intrinsic::vp_reduce_or:
383 case Intrinsic::vp_reduce_xor: {
392 case Intrinsic::vp_reduce_smax:
393 case Intrinsic::vp_reduce_smin:
394 case Intrinsic::vp_reduce_umax:
395 case Intrinsic::vp_reduce_umin:
396 case Intrinsic::vp_reduce_fmax:
397 case Intrinsic::vp_reduce_fmin:
398 case Intrinsic::vp_reduce_fmaximum:
399 case Intrinsic::vp_reduce_fminimum: {
407 case Intrinsic::vp_reduce_fadd:
410 case Intrinsic::vp_reduce_fmul:
419Value *CachingVPExpander::expandPredicationToCastIntrinsic(
IRBuilder<> &Builder,
433CachingVPExpander::expandPredicationInMemoryIntrinsic(
IRBuilder<> &Builder,
446 Value *NewMemoryInst =
nullptr;
450 case Intrinsic::vp_store:
454 if (AlignOpt.has_value())
456 NewMemoryInst = NewStore;
459 DataParam, PtrParam, AlignOpt.
valueOrOne(), MaskParam);
462 case Intrinsic::vp_load:
466 if (AlignOpt.has_value())
468 NewMemoryInst = NewLoad;
474 case Intrinsic::vp_scatter: {
476 cast<VectorType>(DataParam->
getType())->getElementType();
479 AlignOpt.value_or(
DL.getPrefTypeAlign(ElementType)), MaskParam);
482 case Intrinsic::vp_gather: {
486 AlignOpt.value_or(
DL.getPrefTypeAlign(ElementType)), MaskParam,
nullptr,
494 return NewMemoryInst;
497Value *CachingVPExpander::expandPredicationInComparison(
IRBuilder<> &Builder,
500 "Implicitly dropping %evl in non-speculatable operator!");
509 auto *NewCmp = Builder.
CreateCmp(Pred, Op0, Op1);
515bool CachingVPExpander::discardEVLParameter(
VPIntrinsic &VPI) {
526 Value *MaxEVL =
nullptr;
534 MaxEVL = Builder.
CreateMul(VScale, FactorConst,
"scalable_size",
537 MaxEVL = ConstantInt::get(Int32Ty, StaticElemCount.
getFixedValue(),
false);
543std::pair<Value *, bool> CachingVPExpander::foldEVLIntoMask(
VPIntrinsic &VPI) {
550 return {&VPI,
false};
555 assert(OldMaskParam &&
"no mask param to fold the vl param into");
556 assert(OldEVLParam &&
"no EVL param to fold away");
563 Value *VLMask = convertEVLToMask(Builder, OldEVLParam, ElemCount);
568 discardEVLParameter(VPI);
570 "transformation did not render the evl param ineffective!");
577 LLVM_DEBUG(
dbgs() <<
"Lowering to unpredicated op: " << VPI <<
'\n');
585 return expandPredicationInBinaryOperator(Builder, VPI);
587 if (
auto *VPRI = dyn_cast<VPReductionIntrinsic>(&VPI))
588 return expandPredicationInReduction(Builder, *VPRI);
590 if (
auto *VPCmp = dyn_cast<VPCmpIntrinsic>(&VPI))
591 return expandPredicationInComparison(Builder, *VPCmp);
594 return expandPredicationToCastIntrinsic(Builder, VPI);
600 case Intrinsic::vp_fneg: {
605 case Intrinsic::vp_abs:
606 case Intrinsic::vp_smax:
607 case Intrinsic::vp_smin:
608 case Intrinsic::vp_umax:
609 case Intrinsic::vp_umin:
610 case Intrinsic::vp_bswap:
611 case Intrinsic::vp_bitreverse:
612 return expandPredicationToIntCall(Builder, VPI);
613 case Intrinsic::vp_fabs:
614 case Intrinsic::vp_sqrt:
615 case Intrinsic::vp_maxnum:
616 case Intrinsic::vp_minnum:
617 case Intrinsic::vp_maximum:
618 case Intrinsic::vp_minimum:
619 case Intrinsic::vp_fma:
620 case Intrinsic::vp_fmuladd:
621 return expandPredicationToFPCall(Builder, VPI,
623 case Intrinsic::vp_load:
624 case Intrinsic::vp_store:
625 case Intrinsic::vp_gather:
626 case Intrinsic::vp_scatter:
627 return expandPredicationInMemoryIntrinsic(Builder, VPI);
631 if (
Value *Call = expandPredicationToFPCall(Builder, VPI, *CID))
661CachingVPExpander::getVPLegalizationStrategy(
const VPIntrinsic &VPI)
const {
676CachingVPExpander::expandVectorPredication(
VPIntrinsic &VPI) {
677 auto Strategy = getVPLegalizationStrategy(VPI);
678 sanitizeStrategy(VPI, Strategy);
683 switch (Strategy.EVLParamStrategy) {
687 if (discardEVLParameter(VPI))
688 Changed = VPExpansionDetails::IntrinsicUpdated;
691 if (
auto [NewVPI, Folded] = foldEVLIntoMask(VPI); Folded) {
693 Changed = VPExpansionDetails::IntrinsicUpdated;
700 switch (Strategy.OpStrategy) {
706 if (
Value *V = expandPredication(VPI);
V != &VPI) {
708 Changed = VPExpansionDetails::IntrinsicReplaced;
720 return CachingVPExpander(
TTI).expandVectorPredication(VPI);
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
#define LLVM_LIKELY(EXPR)
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static VPTransform parseOverrideOption(const std::string &TextOpt)
static cl::opt< std::string > MaskTransformOverride("expandvp-override-mask-transform", cl::init(""), cl::Hidden, cl::desc("Options: <empty>" VPINTERNAL_VPLEGAL_CASES ". If non-empty, Ignore " "TargetTransformInfo and " "always use this transformation for the %mask parameter (Used in " "testing)."))
static cl::opt< std::string > EVLTransformOverride("expandvp-override-evl-transform", cl::init(""), cl::Hidden, cl::desc("Options: <empty>" VPINTERNAL_VPLEGAL_CASES ". If non-empty, ignore " "TargetTransformInfo and " "always use this transformation for the %evl parameter (Used in " "testing)."))
static void replaceOperation(Value &NewOp, VPIntrinsic &OldOp)
Transfer all properties from OldOp to NewOp and replace all uses.
static bool isAllTrueMask(Value *MaskVal)
static void transferDecorations(Value &NewVal, VPIntrinsic &VPI)
Transfer operation properties from OldVPI to NewVal.
static bool anyExpandVPOverridesSet()
static bool maySpeculateLanes(VPIntrinsic &VPI)
static Constant * getSafeDivisor(Type *DivTy)
#define VPINTERNAL_VPLEGAL_CASES
loop Loop Strength Reduction
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
This class represents an incoming formal argument to a Function.
bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the function.
@ ICMP_ULT
unsigned less than
static Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
Convenience struct for specifying and reasoning about fast-math flags.
CallInst * CreateUnaryIntrinsic(Intrinsic::ID ID, Value *V, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with 1 operand which is mangled on its type.
CallInst * CreateFAddReduce(Value *Acc, Value *Src)
Create a sequential vector fadd reduction intrinsic of the source vector.
Value * CreateBinaryIntrinsic(Intrinsic::ID ID, Value *LHS, Value *RHS, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with 2 operands which is mangled on the first type.
IntegerType * getInt1Ty()
Fetch the type representing a single bit.
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
CallInst * CreateConstrainedFPCall(Function *Callee, ArrayRef< Value * > Args, const Twine &Name="", std::optional< RoundingMode > Rounding=std::nullopt, std::optional< fp::ExceptionBehavior > Except=std::nullopt)
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr)
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
CallInst * CreateMaskedStore(Value *Val, Value *Ptr, Align Alignment, Value *Mask)
Create a call to Masked Store intrinsic.
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
CallInst * CreateFMulReduce(Value *Acc, Value *Src)
Create a sequential vector fmul reduction intrinsic of the source vector.
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateFNeg(Value *V, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
CallInst * CreateMaskedScatter(Value *Val, Value *Ptrs, Align Alignment, Value *Mask=nullptr)
Create a call to Masked Scatter intrinsic.
CallInst * CreateMaskedGather(Type *Ty, Value *Ptrs, Align Alignment, Value *Mask=nullptr, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Gather intrinsic.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
An instruction for reading from memory.
void setAlignment(Align Align)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setAlignment(Align Align)
A switch()-like statement whose cases are string literals.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
static IntegerType * getInt32Ty(LLVMContext &C)
Value * getOperand(unsigned i) const
unsigned getNumOperands() const
static bool isVPCast(Intrinsic::ID ID)
CmpInst::Predicate getPredicate() const
This is the common base class for vector predication intrinsics.
std::optional< unsigned > getFunctionalIntrinsicID() const
bool canIgnoreVectorLengthParam() const
void setMaskParam(Value *)
static std::optional< unsigned > getFunctionalOpcodeForVP(Intrinsic::ID ID)
Value * getVectorLengthParam() const
void setVectorLengthParam(Value *)
Value * getMemoryDataParam() const
Value * getMemoryPointerParam() const
std::optional< unsigned > getConstrainedIntrinsicID() const
MaybeAlign getPointerAlignment() const
Value * getMaskParam() const
ElementCount getStaticVectorLength() const
std::optional< unsigned > getFunctionalOpcode() const
This represents vector predication reduction intrinsics.
unsigned getStartParamPos() const
unsigned getVectorParamPos() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVMContext & getContext() const
All values hold a context through their type.
StringRef getName() const
Return a constant reference to the value's name.
constexpr ScalarTy getFixedValue() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
const ParentTy * getParent() const
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
bool isConstrainedFPIntrinsic(ID QID)
Returns true if the intrinsic ID is for one of the "Constrained Floating-Point Intrinsics".
AttributeList getAttributes(LLVMContext &C, ID id)
Return the attributes for an intrinsic.
initializer< Ty > init(const Ty &Val)
ElementType
The element type of an SRV or UAV resource.
This is an optimization pass for GlobalISel generic memory operations.
Intrinsic::ID getMinMaxReductionIntrinsicOp(Intrinsic::ID RdxID)
Returns the min/max intrinsic used when expanding a min/max reduction.
Value * getReductionIdentity(Intrinsic::ID RdxID, Type *Ty, FastMathFlags FMF)
Given information about an @llvm.vector.reduce.
unsigned getArithmeticReductionInstruction(Intrinsic::ID RdxID)
Returns the arithmetic instruction opcode used when expanding a reduction.
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
bool isSafeToSpeculativelyExecuteWithOpcode(unsigned Opcode, const Instruction *Inst, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true)
This returns the same result as isSafeToSpeculativelyExecute if Opcode is the actual opcode of Inst.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
VPExpansionDetails expandVectorPredicationIntrinsic(VPIntrinsic &VPI, const TargetTransformInfo &TTI)
Expand a vector predication intrinsic.
VPExpansionDetails
Represents the details the expansion of a VP intrinsic.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.