LLVM 19.0.0git
ExpandReductions.cpp
Go to the documentation of this file.
1//===- ExpandReductions.cpp - Expand reduction intrinsics -----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass implements IR expansion for reduction intrinsics, allowing targets
10// to enable the intrinsics until just before codegen.
11//
12//===----------------------------------------------------------------------===//
13
16#include "llvm/CodeGen/Passes.h"
17#include "llvm/IR/IRBuilder.h"
20#include "llvm/IR/Intrinsics.h"
22#include "llvm/Pass.h"
24
25using namespace llvm;
26
27namespace {
28
29bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
30 bool Changed = false;
32 for (auto &I : instructions(F)) {
33 if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
34 switch (II->getIntrinsicID()) {
35 default: break;
36 case Intrinsic::vector_reduce_fadd:
37 case Intrinsic::vector_reduce_fmul:
38 case Intrinsic::vector_reduce_add:
39 case Intrinsic::vector_reduce_mul:
40 case Intrinsic::vector_reduce_and:
41 case Intrinsic::vector_reduce_or:
42 case Intrinsic::vector_reduce_xor:
43 case Intrinsic::vector_reduce_smax:
44 case Intrinsic::vector_reduce_smin:
45 case Intrinsic::vector_reduce_umax:
46 case Intrinsic::vector_reduce_umin:
47 case Intrinsic::vector_reduce_fmax:
48 case Intrinsic::vector_reduce_fmin:
50 Worklist.push_back(II);
51
52 break;
53 }
54 }
55 }
56
57 for (auto *II : Worklist) {
58 FastMathFlags FMF =
59 isa<FPMathOperator>(II) ? II->getFastMathFlags() : FastMathFlags{};
60 Intrinsic::ID ID = II->getIntrinsicID();
62
63 Value *Rdx = nullptr;
64 IRBuilder<> Builder(II);
65 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
66 Builder.setFastMathFlags(FMF);
67 switch (ID) {
68 default: llvm_unreachable("Unexpected intrinsic!");
69 case Intrinsic::vector_reduce_fadd:
70 case Intrinsic::vector_reduce_fmul: {
71 // FMFs must be attached to the call, otherwise it's an ordered reduction
72 // and it can't be handled by generating a shuffle sequence.
73 Value *Acc = II->getArgOperand(0);
74 Value *Vec = II->getArgOperand(1);
75 unsigned RdxOpcode = getArithmeticReductionInstruction(ID);
76 if (!FMF.allowReassoc())
77 Rdx = getOrderedReduction(Builder, Acc, Vec, RdxOpcode, RK);
78 else {
79 if (!isPowerOf2_32(
80 cast<FixedVectorType>(Vec->getType())->getNumElements()))
81 continue;
82 Rdx = getShuffleReduction(Builder, Vec, RdxOpcode, RK);
83 Rdx = Builder.CreateBinOp((Instruction::BinaryOps)RdxOpcode, Acc, Rdx,
84 "bin.rdx");
85 }
86 break;
87 }
88 case Intrinsic::vector_reduce_and:
89 case Intrinsic::vector_reduce_or: {
90 // Canonicalize logical or/and reductions:
91 // Or reduction for i1 is represented as:
92 // %val = bitcast <ReduxWidth x i1> to iReduxWidth
93 // %res = cmp ne iReduxWidth %val, 0
94 // And reduction for i1 is represented as:
95 // %val = bitcast <ReduxWidth x i1> to iReduxWidth
96 // %res = cmp eq iReduxWidth %val, 11111
97 Value *Vec = II->getArgOperand(0);
98 auto *FTy = cast<FixedVectorType>(Vec->getType());
99 unsigned NumElts = FTy->getNumElements();
100 if (!isPowerOf2_32(NumElts))
101 continue;
102
103 if (FTy->getElementType() == Builder.getInt1Ty()) {
104 Rdx = Builder.CreateBitCast(Vec, Builder.getIntNTy(NumElts));
105 if (ID == Intrinsic::vector_reduce_and) {
106 Rdx = Builder.CreateICmpEQ(
107 Rdx, ConstantInt::getAllOnesValue(Rdx->getType()));
108 } else {
109 assert(ID == Intrinsic::vector_reduce_or && "Expected or reduction.");
110 Rdx = Builder.CreateIsNotNull(Rdx);
111 }
112 break;
113 }
114 unsigned RdxOpcode = getArithmeticReductionInstruction(ID);
115 Rdx = getShuffleReduction(Builder, Vec, RdxOpcode, RK);
116 break;
117 }
118 case Intrinsic::vector_reduce_add:
119 case Intrinsic::vector_reduce_mul:
120 case Intrinsic::vector_reduce_xor:
121 case Intrinsic::vector_reduce_smax:
122 case Intrinsic::vector_reduce_smin:
123 case Intrinsic::vector_reduce_umax:
124 case Intrinsic::vector_reduce_umin: {
125 Value *Vec = II->getArgOperand(0);
126 if (!isPowerOf2_32(
127 cast<FixedVectorType>(Vec->getType())->getNumElements()))
128 continue;
129 unsigned RdxOpcode = getArithmeticReductionInstruction(ID);
130 Rdx = getShuffleReduction(Builder, Vec, RdxOpcode, RK);
131 break;
132 }
133 case Intrinsic::vector_reduce_fmax:
134 case Intrinsic::vector_reduce_fmin: {
135 // We require "nnan" to use a shuffle reduction; "nsz" is implied by the
136 // semantics of the reduction.
137 Value *Vec = II->getArgOperand(0);
138 if (!isPowerOf2_32(
139 cast<FixedVectorType>(Vec->getType())->getNumElements()) ||
140 !FMF.noNaNs())
141 continue;
142 unsigned RdxOpcode = getArithmeticReductionInstruction(ID);
143 Rdx = getShuffleReduction(Builder, Vec, RdxOpcode, RK);
144 break;
145 }
146 }
147 II->replaceAllUsesWith(Rdx);
148 II->eraseFromParent();
149 Changed = true;
150 }
151 return Changed;
152}
153
154class ExpandReductions : public FunctionPass {
155public:
156 static char ID;
157 ExpandReductions() : FunctionPass(ID) {
159 }
160
161 bool runOnFunction(Function &F) override {
162 const auto *TTI =&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
163 return expandReductions(F, TTI);
164 }
165
166 void getAnalysisUsage(AnalysisUsage &AU) const override {
168 AU.setPreservesCFG();
169 }
170};
171}
172
173char ExpandReductions::ID;
174INITIALIZE_PASS_BEGIN(ExpandReductions, "expand-reductions",
175 "Expand reduction intrinsics", false, false)
179
181 return new ExpandReductions();
182}
183
186 const auto &TTI = AM.getResult<TargetIRAnalysis>(F);
187 if (!expandReductions(F, &TTI))
188 return PreservedAnalyses::all();
191 return PA;
192}
Expand Atomic instructions
expand Expand reduction intrinsics
expand reductions
static Expected< BitVector > expand(StringRef S, StringRef Original)
Definition: GlobPattern.cpp:21
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Straight line strength reduction
This pass exposes codegen information to IR-level passes.
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:348
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:500
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:269
Represents analyses that only rely on functions' control flow.
Definition: Analysis.h:70
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:20
bool allowReassoc() const
Flag queries.
Definition: FMF.h:65
bool noNaNs() const
Definition: FMF.h:66
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2649
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:98
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:109
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:115
void preserveSet()
Mark an analysis set as preserved.
Definition: Analysis.h:144
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
Analysis pass providing the TargetTransformInfo.
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool shouldExpandReduction(const IntrinsicInst *II) const
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
unsigned getArithmeticReductionInstruction(Intrinsic::ID RdxID)
Returns the arithmetic instruction opcode used when expanding a reduction.
Definition: LoopUtils.cpp:920
void initializeExpandReductionsPass(PassRegistry &)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:264
Value * getShuffleReduction(IRBuilderBase &Builder, Value *Src, unsigned Op, RecurKind MinMaxKind=RecurKind::None)
Generates a vector reduction using shufflevectors to reduce the value.
Definition: LoopUtils.cpp:1087
RecurKind
These are the kinds of recurrences that we support.
Definition: IVDescriptors.h:34
FunctionPass * createExpandReductionsPass()
This pass expands the reduction intrinsics into sequences of shuffles.
RecurKind getMinMaxReductionRecurKind(Intrinsic::ID RdxID)
Returns the recurence kind used when expanding a min/max reduction.
Definition: LoopUtils.cpp:995
Value * getOrderedReduction(IRBuilderBase &Builder, Value *Acc, Value *Src, unsigned Op, RecurKind MinMaxKind=RecurKind::None)
Generates an ordered vector reduction using extracts to reduce the value.
Definition: LoopUtils.cpp:1062