37 switch (
II->getIntrinsicID()) {
39 case Intrinsic::vector_reduce_fadd:
40 case Intrinsic::vector_reduce_fmul:
41 case Intrinsic::vector_reduce_add:
42 case Intrinsic::vector_reduce_mul:
43 case Intrinsic::vector_reduce_and:
44 case Intrinsic::vector_reduce_or:
45 case Intrinsic::vector_reduce_xor:
46 case Intrinsic::vector_reduce_smax:
47 case Intrinsic::vector_reduce_smin:
48 case Intrinsic::vector_reduce_umax:
49 case Intrinsic::vector_reduce_umin:
50 case Intrinsic::vector_reduce_fmax:
51 case Intrinsic::vector_reduce_fmin:
52 if (
TTI->shouldExpandReduction(
II))
60 for (
auto *
II : Worklist) {
66 TTI->getPreferredExpandedReductionShuffle(
II);
71 Builder.setFastMathFlags(FMF);
74 case Intrinsic::vector_reduce_fadd:
75 case Intrinsic::vector_reduce_fmul: {
78 Value *Acc =
II->getArgOperand(0);
79 Value *Vec =
II->getArgOperand(1);
97 case Intrinsic::vector_reduce_and:
98 case Intrinsic::vector_reduce_or: {
106 Value *Vec =
II->getArgOperand(0);
108 unsigned NumElts = FTy->getNumElements();
112 if (FTy->getElementType() == Builder.getInt1Ty()) {
113 Rdx = Builder.CreateBitCast(Vec, Builder.getIntNTy(NumElts));
114 if (
ID == Intrinsic::vector_reduce_and) {
115 Rdx = Builder.CreateICmpEQ(
118 assert(
ID == Intrinsic::vector_reduce_or &&
"Expected or reduction.");
119 Rdx = Builder.CreateIsNotNull(Rdx);
127 case Intrinsic::vector_reduce_add:
128 case Intrinsic::vector_reduce_mul:
129 case Intrinsic::vector_reduce_xor:
130 case Intrinsic::vector_reduce_smax:
131 case Intrinsic::vector_reduce_smin:
132 case Intrinsic::vector_reduce_umax:
133 case Intrinsic::vector_reduce_umin: {
134 Value *Vec =
II->getArgOperand(0);
148 case Intrinsic::vector_reduce_fmax:
149 case Intrinsic::vector_reduce_fmin: {
152 Value *Vec =
II->getArgOperand(0);
162 II->replaceAllUsesWith(Rdx);
163 II->eraseFromParent();
172 ExpandReductions() : FunctionPass(ID) {}
175 const auto *
TTI =&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
F);
176 auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
177 auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
178 auto *DT = DTWP ? &DTWP->getDomTree() :
nullptr;
179 auto *LI = LIWP ? &LIWP->getLoopInfo() :
nullptr;
180 return expandReductions(
F,
TTI, DT, LI);
183 void getAnalysisUsage(AnalysisUsage &AU)
const override {
191char ExpandReductions::ID;
193 "Expand reduction intrinsics",
false,
false)
199 return new ExpandReductions();
207 if (!expandReductions(
F, &
TTI, DT, LI))
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Expand Atomic instructions
static bool runOnFunction(Function &F, bool PostInlining)
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
Analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Convenience struct for specifying and reasoning about fast-math flags.
bool allowReassoc() const
Flag queries.
FunctionPass class - This class is used to implement most global optimizations.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Analysis pass that exposes the LoopInfo for a function.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserve()
Mark an analysis as preserved.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Analysis pass providing the TargetTransformInfo.
The instances of the Type class are immutable: once they are created, they are never changed.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
This is an optimization pass for GlobalISel generic memory operations.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI Value * getReductionIdentity(Intrinsic::ID RdxID, Type *Ty, FastMathFlags FMF)
Given information about an @llvm.vector.reduce.
LLVM_ABI unsigned getArithmeticReductionInstruction(Intrinsic::ID RdxID)
Returns the arithmetic instruction opcode used when expanding a reduction.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI Value * getShuffleReduction(IRBuilderBase &Builder, Value *Src, unsigned Op, TargetTransformInfo::ReductionShuffle RS, RecurKind MinMaxKind=RecurKind::None)
Generates a vector reduction using shufflevectors to reduce the value.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
RecurKind
These are the kinds of recurrences that we support.
LLVM_ABI FunctionPass * createExpandReductionsPass()
This pass expands the reduction intrinsics into sequences of shuffles.
LLVM_ABI Value * expandReductionViaLoop(IRBuilderBase &Builder, Value *Vec, unsigned RdxOpcode, Value *Acc, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr)
Expand a scalable vector reduction into a runtime loop that applies RdxOpcode element by element,...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI RecurKind getMinMaxReductionRecurKind(Intrinsic::ID RdxID)
Returns the recurence kind used when expanding a min/max reduction.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI Value * getOrderedReduction(IRBuilderBase &Builder, Value *Acc, Value *Src, unsigned Op, RecurKind MinMaxKind=RecurKind::None)
Generates an ordered vector reduction using extracts to reduce the value.