Go to the documentation of this file.
38 #define VPINTERNAL_VPLEGAL_CASES \
39 VPINTERNAL_CASE(Legal) \
40 VPINTERNAL_CASE(Discard) \
41 VPINTERNAL_CASE(Convert)
43 #define VPINTERNAL_CASE(X) "|" #X
49 ". If non-empty, ignore "
50 "TargetTransformInfo and "
51 "always use this transformation for the %evl parameter (Used in "
57 ". If non-empty, Ignore "
58 "TargetTransformInfo and "
59 "always use this transformation for the %mask parameter (Used in "
62 #undef VPINTERNAL_CASE
63 #define VPINTERNAL_CASE(X) .Case(#X, VPLegalization::X)
69 #undef VPINTERNAL_VPLEGAL_CASES
76 #define DEBUG_TYPE "expandvp"
78 STATISTIC(NumFoldedVL,
"Number of folded vector length params");
79 STATISTIC(NumLoweredVPOps,
"Number of folded vector predication operations");
85 auto *ConstVec = dyn_cast<ConstantVector>(MaskVal);
86 return ConstVec && ConstVec->isAllOnesValue();
97 auto *NewInst = dyn_cast<Instruction>(&NewVal);
98 if (!NewInst || !isa<FPMathOperator>(NewVal))
101 auto *OldFMOp = dyn_cast<FPMathOperator>(&VPI);
105 NewInst->setFastMathFlags(OldFMOp->getFastMathFlags());
118 if (isa<VPReductionIntrinsic>(VPI))
124 cast<Operator>(&VPI));
132 struct CachingVPExpander {
182 bool UsingTTIOverrides;
188 bool expandVectorPredication();
198 for (
unsigned Idx = 0; Idx < NumElems; ++Idx)
210 auto *
M =
Builder.GetInsertBlock()->getModule();
213 M, Intrinsic::get_active_lane_mask, {BoolVecTy, EVLParam->
getType()});
216 return Builder.CreateCall(ActiveMaskFunc, {ConstZero, EVLParam});
222 Value *VLSplat =
Builder.CreateVectorSplat(NumElems, EVLParam);
223 Value *IdxVec = createStepVector(
Builder, LaneTy, NumElems);
231 "Implicitly dropping %evl in non-speculatable operator!");
248 case Instruction::UDiv:
249 case Instruction::SDiv:
250 case Instruction::URem:
251 case Instruction::SRem:
254 Op1 =
Builder.CreateSelect(
Mask, Op1, SafeDivisor);
266 bool Negative =
false;
271 case Intrinsic::vp_reduce_add:
272 case Intrinsic::vp_reduce_or:
273 case Intrinsic::vp_reduce_xor:
274 case Intrinsic::vp_reduce_umax:
276 case Intrinsic::vp_reduce_mul:
278 case Intrinsic::vp_reduce_and:
279 case Intrinsic::vp_reduce_umin:
281 case Intrinsic::vp_reduce_smin:
284 case Intrinsic::vp_reduce_smax:
287 case Intrinsic::vp_reduce_fmax:
290 case Intrinsic::vp_reduce_fmin: {
297 APFloat::getLargest(Semantics, Negative));
299 case Intrinsic::vp_reduce_fadd:
301 case Intrinsic::vp_reduce_fmul:
310 "Implicitly dropping %evl in non-speculatable operator!");
317 auto *NeutralElt = getNeutralReductionElement(VPI, VPI.
getType());
318 auto *NeutralVector =
Builder.CreateVectorSplat(
319 cast<VectorType>(RedOp->
getType())->getElementCount(), NeutralElt);
320 RedOp =
Builder.CreateSelect(
Mask, RedOp, NeutralVector);
329 case Intrinsic::vp_reduce_add:
333 case Intrinsic::vp_reduce_mul:
337 case Intrinsic::vp_reduce_and:
341 case Intrinsic::vp_reduce_or:
345 case Intrinsic::vp_reduce_xor:
349 case Intrinsic::vp_reduce_smax:
354 case Intrinsic::vp_reduce_smin:
359 case Intrinsic::vp_reduce_umax:
364 case Intrinsic::vp_reduce_umin:
369 case Intrinsic::vp_reduce_fmax:
375 case Intrinsic::vp_reduce_fmin:
381 case Intrinsic::vp_reduce_fadd:
384 case Intrinsic::vp_reduce_fmul:
393 void CachingVPExpander::discardEVLParameter(
VPIntrinsic &VPI) {
404 Value *MaxEVL =
nullptr;
413 Value *VScale =
Builder.CreateCall(VScaleFunc, {},
"vscale");
414 MaxEVL =
Builder.CreateMul(VScale, FactorConst,
"scalable_size",
434 assert(OldMaskParam &&
"no mask param to fold the vl param into");
435 assert(OldEVLParam &&
"no EVL param to fold away");
442 Value *VLMask = convertEVLToMask(
Builder, OldEVLParam, ElemCount);
443 Value *NewMaskParam =
Builder.CreateAnd(VLMask, OldMaskParam);
447 discardEVLParameter(VPI);
449 "transformation did not render the evl param ineffective!");
456 LLVM_DEBUG(
dbgs() <<
"Lowering to unpredicated op: " << VPI <<
'\n');
464 return expandPredicationInBinaryOperator(
Builder, VPI);
466 if (
auto *VPRI = dyn_cast<VPReductionIntrinsic>(&VPI))
467 return expandPredicationInReduction(
Builder, *VPRI);
474 struct TransformJob {
478 : PI(PI), Strategy(InitStrat) {}
505 CachingVPExpander::getVPLegalizationStrategy(
const VPIntrinsic &VPI)
const {
520 bool CachingVPExpander::expandVectorPredication() {
526 auto *VPI = dyn_cast<VPIntrinsic>(&
I);
529 auto VPStrat = getVPLegalizationStrategy(*VPI);
530 sanitizeStrategy(*VPI, VPStrat);
531 if (!VPStrat.shouldDoNothing())
534 if (Worklist.empty())
539 <<
" instructions ::::\n");
540 for (TransformJob Job : Worklist) {
542 switch (Job.Strategy.EVLParamStrategy) {
546 discardEVLParameter(*Job.PI);
549 if (foldEVLIntoMask(*Job.PI))
556 switch (Job.Strategy.OpStrategy) {
562 expandPredication(*Job.PI);
568 assert(Job.isDone() &&
"incomplete transformation");
581 const auto *
TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
F);
582 CachingVPExpander VPExpander(
F, *
TTI);
583 return VPExpander.expandVectorPredication();
595 "Expand vector predication intrinsics",
false,
false)
602 return new ExpandVectorPredication();
608 CachingVPExpander VPExpander(
F,
TTI);
609 if (!VPExpander.expandVectorPredication())
static void transferDecorations(Value &NewVal, VPIntrinsic &VPI)
Transfer operation properties from OldVPI to NewVal.
A set of analyses that are preserved following a run of a transformation pass.
Analysis pass providing the TargetTransformInfo.
void initializeExpandVectorPredicationPass(PassRegistry &)
This is an optimization pass for GlobalISel generic memory operations.
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
const fltSemantics & getFltSemantics() const
void setVectorLengthParam(Value *)
bool canIgnoreVectorLengthParam() const
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
The instances of the Type class are immutable: once they are created, they are never changed.
static VPTransform parseOverrideOption(const std::string &TextOpt)
Convenience struct for specifying and reasoning about fast-math flags.
static Constant * getNegativeZero(Type *Ty)
bool isScalable() const
Returns whether the size is scaled by a runtime quantity (vscale).
static IntegerType * getInt32Ty(LLVMContext &C)
const APInt & umin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be unsigned.
void setMaskParam(Value *)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Expand vector predication intrinsics
This represents vector predication reduction intrinsics.
Represent the analysis usage information of a pass.
static Constant * getAllOnesValue(Type *Ty)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Legacy analysis pass which computes a DominatorTree.
STATISTIC(NumFunctions, "Total number of functions")
ConstantFP - Floating Point Values [float, double].
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
bool isSafeToSpeculativelyExecuteWithOpcode(unsigned Opcode, const Operator *Inst, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
This returns the same result as isSafeToSpeculativelyExecute if Opcode is the actual opcode of Inst.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE maxNum semantics.
Value * getVectorLengthParam() const
Optional< unsigned > getFunctionalOpcode() const
inst_range instructions(Function *F)
static Constant * getQNaN(Type *Ty, bool Negative=false, APInt *Payload=nullptr)
ElementCount getStaticVectorLength() const
This is an important base class in LLVM.
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
static cl::opt< std::string > EVLTransformOverride("expandvp-override-evl-transform", cl::init(""), cl::Hidden, cl::desc("Options: <empty>" VPINTERNAL_VPLEGAL_CASES ". If non-empty, ignore " "TargetTransformInfo and " "always use this transformation for the %evl parameter (Used in " "testing)."))
static void replaceOperation(Value &NewOp, VPIntrinsic &OldOp)
Transfer all properties from OldOp to NewOp and replace all uses.
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
initializer< Ty > init(const Ty &Val)
static bool anyExpandVPOverridesSet()
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
FunctionPass * createExpandVectorPredicationPass()
This pass expands the vector predication intrinsics into unpredicated instructions with selects or ju...
ScalarTy getKnownMinValue() const
Returns the minimum value this size can represent.
const APInt & smin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be signed.
FastMathFlags getFastMathFlags() const
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
void setPreservesCFG()
This function should be called by the pass, iff they do not:
@ ICMP_ULT
unsigned less than
ScalarTy getFixedValue() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Type * getType() const
All values are typed, get the type of this value.
Represents analyses that only rely on functions' control flow.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVMContext & getContext() const
All values hold a context through their type.
self_iterator getIterator()
static Constant * get(ArrayRef< Constant * > V)
unsigned getStartParamPos() const
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
StringRef getName() const
Return a constant reference to the value's name.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
static cl::opt< std::string > MaskTransformOverride("expandvp-override-mask-transform", cl::init(""), cl::Hidden, cl::desc("Options: <empty>" VPINTERNAL_VPLEGAL_CASES ". If non-empty, Ignore " "TargetTransformInfo and " "always use this transformation for the %mask parameter (Used in " "testing)."))
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
INITIALIZE_PASS_BEGIN(ExpandVectorPredication, "expandvp", "Expand vector predication intrinsics", false, false) INITIALIZE_PASS_END(ExpandVectorPredication
static bool runOnFunction(Function &F, bool PostInlining)
const APInt & umax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be unsigned.
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE minNum semantics.
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Should compile to something r4 addze r3 instead we get
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
This is the common base class for vector predication intrinsics.
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
TargetTransformInfo::VPLegalization VPLegalization
unsigned getVectorParamPos() const
static Constant * get(Type *Ty, double V)
This returns a ConstantFP, or a vector containing a splat of a ConstantFP, for the specified value in...
static bool maySpeculateLanes(VPIntrinsic &VPI)
#define VPINTERNAL_VPLEGAL_CASES
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
void preserveSet()
Mark an analysis set as preserved.
const BasicBlock * getParent() const
static Constant * getSafeDivisor(Type *DivTy)
#define LLVM_LIKELY(EXPR)
A switch()-like statement whose cases are string literals.
A container for analyses that lazily runs them and caches their results.
FunctionPass class - This class is used to implement most global optimizations.
Value * getMaskParam() const
AnalysisUsage & addRequired()
Value * getOperand(unsigned i) const
static bool isAllTrueMask(Value *MaskVal)
loop Loop Strength Reduction
constexpr T value_or(U &&alt) const &
LLVM Value Representation.
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
const APInt & smax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be signed.
reference emplace_back(ArgTypes &&... Args)