33 if (
auto *
II = dyn_cast<IntrinsicInst>(&
I)) {
34 switch (
II->getIntrinsicID()) {
36 case Intrinsic::vector_reduce_fadd:
37 case Intrinsic::vector_reduce_fmul:
38 case Intrinsic::vector_reduce_add:
39 case Intrinsic::vector_reduce_mul:
40 case Intrinsic::vector_reduce_and:
41 case Intrinsic::vector_reduce_or:
42 case Intrinsic::vector_reduce_xor:
43 case Intrinsic::vector_reduce_smax:
44 case Intrinsic::vector_reduce_smin:
45 case Intrinsic::vector_reduce_umax:
46 case Intrinsic::vector_reduce_umin:
47 case Intrinsic::vector_reduce_fmax:
48 case Intrinsic::vector_reduce_fmin:
57 for (
auto *
II : Worklist) {
68 Builder.setFastMathFlags(FMF);
71 case Intrinsic::vector_reduce_fadd:
72 case Intrinsic::vector_reduce_fmul: {
75 Value *Acc =
II->getArgOperand(0);
76 Value *Vec =
II->getArgOperand(1);
82 cast<FixedVectorType>(Vec->
getType())->getNumElements()))
90 case Intrinsic::vector_reduce_and:
91 case Intrinsic::vector_reduce_or: {
99 Value *Vec =
II->getArgOperand(0);
100 auto *FTy = cast<FixedVectorType>(Vec->
getType());
101 unsigned NumElts = FTy->getNumElements();
105 if (FTy->getElementType() == Builder.getInt1Ty()) {
106 Rdx = Builder.CreateBitCast(Vec, Builder.getIntNTy(NumElts));
107 if (
ID == Intrinsic::vector_reduce_and) {
108 Rdx = Builder.CreateICmpEQ(
109 Rdx, ConstantInt::getAllOnesValue(Rdx->
getType()));
111 assert(
ID == Intrinsic::vector_reduce_or &&
"Expected or reduction.");
112 Rdx = Builder.CreateIsNotNull(Rdx);
120 case Intrinsic::vector_reduce_add:
121 case Intrinsic::vector_reduce_mul:
122 case Intrinsic::vector_reduce_xor:
123 case Intrinsic::vector_reduce_smax:
124 case Intrinsic::vector_reduce_smin:
125 case Intrinsic::vector_reduce_umax:
126 case Intrinsic::vector_reduce_umin: {
127 Value *Vec =
II->getArgOperand(0);
129 cast<FixedVectorType>(Vec->
getType())->getNumElements()))
135 case Intrinsic::vector_reduce_fmax:
136 case Intrinsic::vector_reduce_fmin: {
139 Value *Vec =
II->getArgOperand(0);
141 cast<FixedVectorType>(Vec->
getType())->getNumElements()) ||
149 II->replaceAllUsesWith(Rdx);
150 II->eraseFromParent();
164 const auto *
TTI =&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
F);
165 return expandReductions(
F,
TTI);
175char ExpandReductions::ID;
177 "Expand reduction intrinsics",
false,
false)
183 return new ExpandReductions();
189 if (!expandReductions(
F, &
TTI))
Expand Atomic instructions
expand Expand reduction intrinsics
static Expected< BitVector > expand(StringRef S, StringRef Original)
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Straight line strength reduction
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Represents analyses that only rely on functions' control flow.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Convenience struct for specifying and reasoning about fast-math flags.
bool allowReassoc() const
Flag queries.
FunctionPass class - This class is used to implement most global optimizations.
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void preserveSet()
Mark an analysis set as preserved.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Analysis pass providing the TargetTransformInfo.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
This is an optimization pass for GlobalISel generic memory operations.
unsigned getArithmeticReductionInstruction(Intrinsic::ID RdxID)
Returns the arithmetic instruction opcode used when expanding a reduction.
void initializeExpandReductionsPass(PassRegistry &)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Value * getShuffleReduction(IRBuilderBase &Builder, Value *Src, unsigned Op, TargetTransformInfo::ReductionShuffle RS, RecurKind MinMaxKind=RecurKind::None)
Generates a vector reduction using shufflevectors to reduce the value.
RecurKind
These are the kinds of recurrences that we support.
FunctionPass * createExpandReductionsPass()
This pass expands the reduction intrinsics into sequences of shuffles.
RecurKind getMinMaxReductionRecurKind(Intrinsic::ID RdxID)
Returns the recurence kind used when expanding a min/max reduction.
Value * getOrderedReduction(IRBuilderBase &Builder, Value *Acc, Value *Src, unsigned Op, RecurKind MinMaxKind=RecurKind::None)
Generates an ordered vector reduction using extracts to reduce the value.