33 if (
auto *II = dyn_cast<IntrinsicInst>(&
I)) {
34 switch (II->getIntrinsicID()) {
36 case Intrinsic::vector_reduce_fadd:
37 case Intrinsic::vector_reduce_fmul:
38 case Intrinsic::vector_reduce_add:
39 case Intrinsic::vector_reduce_mul:
40 case Intrinsic::vector_reduce_and:
41 case Intrinsic::vector_reduce_or:
42 case Intrinsic::vector_reduce_xor:
43 case Intrinsic::vector_reduce_smax:
44 case Intrinsic::vector_reduce_smin:
45 case Intrinsic::vector_reduce_umax:
46 case Intrinsic::vector_reduce_umin:
47 case Intrinsic::vector_reduce_fmax:
48 case Intrinsic::vector_reduce_fmin:
57 for (
auto *II : Worklist) {
59 isa<FPMathOperator>(II) ? II->getFastMathFlags() :
FastMathFlags{};
66 Builder.setFastMathFlags(FMF);
69 case Intrinsic::vector_reduce_fadd:
70 case Intrinsic::vector_reduce_fmul: {
73 Value *Acc = II->getArgOperand(0);
74 Value *Vec = II->getArgOperand(1);
80 cast<FixedVectorType>(Vec->
getType())->getNumElements()))
88 case Intrinsic::vector_reduce_and:
89 case Intrinsic::vector_reduce_or: {
97 Value *Vec = II->getArgOperand(0);
98 auto *FTy = cast<FixedVectorType>(Vec->
getType());
99 unsigned NumElts = FTy->getNumElements();
103 if (FTy->getElementType() == Builder.getInt1Ty()) {
104 Rdx = Builder.CreateBitCast(Vec, Builder.getIntNTy(NumElts));
105 if (
ID == Intrinsic::vector_reduce_and) {
106 Rdx = Builder.CreateICmpEQ(
107 Rdx, ConstantInt::getAllOnesValue(Rdx->
getType()));
109 assert(
ID == Intrinsic::vector_reduce_or &&
"Expected or reduction.");
110 Rdx = Builder.CreateIsNotNull(Rdx);
118 case Intrinsic::vector_reduce_add:
119 case Intrinsic::vector_reduce_mul:
120 case Intrinsic::vector_reduce_xor:
121 case Intrinsic::vector_reduce_smax:
122 case Intrinsic::vector_reduce_smin:
123 case Intrinsic::vector_reduce_umax:
124 case Intrinsic::vector_reduce_umin: {
125 Value *Vec = II->getArgOperand(0);
127 cast<FixedVectorType>(Vec->
getType())->getNumElements()))
133 case Intrinsic::vector_reduce_fmax:
134 case Intrinsic::vector_reduce_fmin: {
137 Value *Vec = II->getArgOperand(0);
139 cast<FixedVectorType>(Vec->
getType())->getNumElements()) ||
147 II->replaceAllUsesWith(Rdx);
148 II->eraseFromParent();
162 const auto *
TTI =&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
F);
163 return expandReductions(
F,
TTI);
173char ExpandReductions::ID;
175 "Expand reduction intrinsics",
false,
false)
181 return new ExpandReductions();
187 if (!expandReductions(
F, &
TTI))
Expand Atomic instructions
expand Expand reduction intrinsics
static Expected< BitVector > expand(StringRef S, StringRef Original)
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Straight line strength reduction
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Represents analyses that only rely on functions' control flow.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Convenience struct for specifying and reasoning about fast-math flags.
bool allowReassoc() const
Flag queries.
FunctionPass class - This class is used to implement most global optimizations.
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void preserveSet()
Mark an analysis set as preserved.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Analysis pass providing the TargetTransformInfo.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
This is an optimization pass for GlobalISel generic memory operations.
unsigned getArithmeticReductionInstruction(Intrinsic::ID RdxID)
Returns the arithmetic instruction opcode used when expanding a reduction.
void initializeExpandReductionsPass(PassRegistry &)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Value * getShuffleReduction(IRBuilderBase &Builder, Value *Src, unsigned Op, RecurKind MinMaxKind=RecurKind::None)
Generates a vector reduction using shufflevectors to reduce the value.
RecurKind
These are the kinds of recurrences that we support.
FunctionPass * createExpandReductionsPass()
This pass expands the reduction intrinsics into sequences of shuffles.
RecurKind getMinMaxReductionRecurKind(Intrinsic::ID RdxID)
Returns the recurence kind used when expanding a min/max reduction.
Value * getOrderedReduction(IRBuilderBase &Builder, Value *Acc, Value *Src, unsigned Op, RecurKind MinMaxKind=RecurKind::None)
Generates an ordered vector reduction using extracts to reduce the value.