LLVM 22.0.0git
ExpandReductions.cpp
Go to the documentation of this file.
1//===- ExpandReductions.cpp - Expand reduction intrinsics -----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass implements IR expansion for reduction intrinsics, allowing targets
10// to enable the intrinsics until just before codegen.
11//
12//===----------------------------------------------------------------------===//
13
16#include "llvm/CodeGen/Passes.h"
17#include "llvm/IR/IRBuilder.h"
20#include "llvm/IR/Intrinsics.h"
22#include "llvm/Pass.h"
24
25using namespace llvm;
26
27namespace {
28
29bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
30 bool Changed = false;
32 for (auto &I : instructions(F)) {
33 if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
34 switch (II->getIntrinsicID()) {
35 default: break;
36 case Intrinsic::vector_reduce_fadd:
37 case Intrinsic::vector_reduce_fmul:
38 case Intrinsic::vector_reduce_add:
39 case Intrinsic::vector_reduce_mul:
40 case Intrinsic::vector_reduce_and:
41 case Intrinsic::vector_reduce_or:
42 case Intrinsic::vector_reduce_xor:
43 case Intrinsic::vector_reduce_smax:
44 case Intrinsic::vector_reduce_smin:
45 case Intrinsic::vector_reduce_umax:
46 case Intrinsic::vector_reduce_umin:
47 case Intrinsic::vector_reduce_fmax:
48 case Intrinsic::vector_reduce_fmin:
49 if (TTI->shouldExpandReduction(II))
50 Worklist.push_back(II);
51
52 break;
53 }
54 }
55 }
56
57 for (auto *II : Worklist) {
58 FastMathFlags FMF =
59 isa<FPMathOperator>(II) ? II->getFastMathFlags() : FastMathFlags{};
60 Intrinsic::ID ID = II->getIntrinsicID();
63 TTI->getPreferredExpandedReductionShuffle(II);
64
65 Value *Rdx = nullptr;
66 IRBuilder<> Builder(II);
67 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
68 Builder.setFastMathFlags(FMF);
69 switch (ID) {
70 default: llvm_unreachable("Unexpected intrinsic!");
71 case Intrinsic::vector_reduce_fadd:
72 case Intrinsic::vector_reduce_fmul: {
73 // FMFs must be attached to the call, otherwise it's an ordered reduction
74 // and it can't be handled by generating a shuffle sequence.
75 Value *Acc = II->getArgOperand(0);
76 Value *Vec = II->getArgOperand(1);
77 unsigned RdxOpcode = getArithmeticReductionInstruction(ID);
78 if (!FMF.allowReassoc())
79 Rdx = getOrderedReduction(Builder, Acc, Vec, RdxOpcode, RK);
80 else {
81 if (!isPowerOf2_32(
82 cast<FixedVectorType>(Vec->getType())->getNumElements()))
83 continue;
84 Rdx = getShuffleReduction(Builder, Vec, RdxOpcode, RS, RK);
85 Rdx = Builder.CreateBinOp((Instruction::BinaryOps)RdxOpcode, Acc, Rdx,
86 "bin.rdx");
87 }
88 break;
89 }
90 case Intrinsic::vector_reduce_and:
91 case Intrinsic::vector_reduce_or: {
92 // Canonicalize logical or/and reductions:
93 // Or reduction for i1 is represented as:
94 // %val = bitcast <ReduxWidth x i1> to iReduxWidth
95 // %res = cmp ne iReduxWidth %val, 0
96 // And reduction for i1 is represented as:
97 // %val = bitcast <ReduxWidth x i1> to iReduxWidth
98 // %res = cmp eq iReduxWidth %val, 11111
99 Value *Vec = II->getArgOperand(0);
100 auto *FTy = cast<FixedVectorType>(Vec->getType());
101 unsigned NumElts = FTy->getNumElements();
102 if (!isPowerOf2_32(NumElts))
103 continue;
104
105 if (FTy->getElementType() == Builder.getInt1Ty()) {
106 Rdx = Builder.CreateBitCast(Vec, Builder.getIntNTy(NumElts));
107 if (ID == Intrinsic::vector_reduce_and) {
108 Rdx = Builder.CreateICmpEQ(
110 } else {
111 assert(ID == Intrinsic::vector_reduce_or && "Expected or reduction.");
112 Rdx = Builder.CreateIsNotNull(Rdx);
113 }
114 break;
115 }
116 unsigned RdxOpcode = getArithmeticReductionInstruction(ID);
117 Rdx = getShuffleReduction(Builder, Vec, RdxOpcode, RS, RK);
118 break;
119 }
120 case Intrinsic::vector_reduce_add:
121 case Intrinsic::vector_reduce_mul:
122 case Intrinsic::vector_reduce_xor:
123 case Intrinsic::vector_reduce_smax:
124 case Intrinsic::vector_reduce_smin:
125 case Intrinsic::vector_reduce_umax:
126 case Intrinsic::vector_reduce_umin: {
127 Value *Vec = II->getArgOperand(0);
128 if (!isPowerOf2_32(
129 cast<FixedVectorType>(Vec->getType())->getNumElements()))
130 continue;
131 unsigned RdxOpcode = getArithmeticReductionInstruction(ID);
132 Rdx = getShuffleReduction(Builder, Vec, RdxOpcode, RS, RK);
133 break;
134 }
135 case Intrinsic::vector_reduce_fmax:
136 case Intrinsic::vector_reduce_fmin: {
137 // We require "nnan" to use a shuffle reduction; "nsz" is implied by the
138 // semantics of the reduction.
139 Value *Vec = II->getArgOperand(0);
140 if (!isPowerOf2_32(
141 cast<FixedVectorType>(Vec->getType())->getNumElements()) ||
142 !FMF.noNaNs())
143 continue;
144 unsigned RdxOpcode = getArithmeticReductionInstruction(ID);
145 Rdx = getShuffleReduction(Builder, Vec, RdxOpcode, RS, RK);
146 break;
147 }
148 }
149 II->replaceAllUsesWith(Rdx);
150 II->eraseFromParent();
151 Changed = true;
152 }
153 return Changed;
154}
155
156class ExpandReductions : public FunctionPass {
157public:
158 static char ID;
159 ExpandReductions() : FunctionPass(ID) {
161 }
162
163 bool runOnFunction(Function &F) override {
164 const auto *TTI =&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
165 return expandReductions(F, TTI);
166 }
167
168 void getAnalysisUsage(AnalysisUsage &AU) const override {
169 AU.addRequired<TargetTransformInfoWrapperPass>();
170 AU.setPreservesCFG();
171 }
172};
173}
174
175char ExpandReductions::ID;
176INITIALIZE_PASS_BEGIN(ExpandReductions, "expand-reductions",
177 "Expand reduction intrinsics", false, false)
179INITIALIZE_PASS_END(ExpandReductions, "expand-reductions",
180 "Expand reduction intrinsics", false, false)
181
183 return new ExpandReductions();
184}
185
188 const auto &TTI = AM.getResult<TargetIRAnalysis>(F);
189 if (!expandReductions(F, &TTI))
190 return PreservedAnalyses::all();
193 return PA;
194}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Expand Atomic instructions
static bool runOnFunction(Function &F, bool PostInlining)
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This pass exposes codegen information to IR-level passes.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:22
bool allowReassoc() const
Flag queries.
Definition FMF.h:64
bool noNaNs() const
Definition FMF.h:65
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2780
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition Analysis.h:151
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Analysis pass providing the TargetTransformInfo.
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
This is an optimization pass for GlobalISel generic memory operations.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
LLVM_ABI unsigned getArithmeticReductionInstruction(Intrinsic::ID RdxID)
Returns the arithmetic instruction opcode used when expanding a reduction.
LLVM_ABI void initializeExpandReductionsPass(PassRegistry &)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:288
LLVM_ABI Value * getShuffleReduction(IRBuilderBase &Builder, Value *Src, unsigned Op, TargetTransformInfo::ReductionShuffle RS, RecurKind MinMaxKind=RecurKind::None)
Generates a vector reduction using shufflevectors to reduce the value.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
TargetTransformInfo TTI
RecurKind
These are the kinds of recurrences that we support.
LLVM_ABI FunctionPass * createExpandReductionsPass()
This pass expands the reduction intrinsics into sequences of shuffles.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
LLVM_ABI RecurKind getMinMaxReductionRecurKind(Intrinsic::ID RdxID)
Returns the recurence kind used when expanding a min/max reduction.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI Value * getOrderedReduction(IRBuilderBase &Builder, Value *Acc, Value *Src, unsigned Op, RecurKind MinMaxKind=RecurKind::None)
Generates an ordered vector reduction using extracts to reduce the value.