LLVM  13.0.0git
ExpandReductions.cpp
Go to the documentation of this file.
1 //===--- ExpandReductions.cpp - Expand experimental reduction intrinsics --===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass implements IR expansion for reduction intrinsics, allowing targets
10 // to enable the intrinsics until just before codegen.
11 //
12 //===----------------------------------------------------------------------===//
13 
16 #include "llvm/CodeGen/Passes.h"
17 #include "llvm/IR/Function.h"
18 #include "llvm/IR/IRBuilder.h"
19 #include "llvm/IR/InstIterator.h"
20 #include "llvm/IR/IntrinsicInst.h"
21 #include "llvm/IR/Intrinsics.h"
22 #include "llvm/IR/Module.h"
23 #include "llvm/InitializePasses.h"
24 #include "llvm/Pass.h"
26 
27 using namespace llvm;
28 
29 namespace {
30 
31 unsigned getOpcode(Intrinsic::ID ID) {
32  switch (ID) {
33  case Intrinsic::vector_reduce_fadd:
34  return Instruction::FAdd;
35  case Intrinsic::vector_reduce_fmul:
36  return Instruction::FMul;
37  case Intrinsic::vector_reduce_add:
38  return Instruction::Add;
39  case Intrinsic::vector_reduce_mul:
40  return Instruction::Mul;
41  case Intrinsic::vector_reduce_and:
42  return Instruction::And;
43  case Intrinsic::vector_reduce_or:
44  return Instruction::Or;
45  case Intrinsic::vector_reduce_xor:
46  return Instruction::Xor;
47  case Intrinsic::vector_reduce_smax:
48  case Intrinsic::vector_reduce_smin:
49  case Intrinsic::vector_reduce_umax:
50  case Intrinsic::vector_reduce_umin:
51  return Instruction::ICmp;
52  case Intrinsic::vector_reduce_fmax:
53  case Intrinsic::vector_reduce_fmin:
54  return Instruction::FCmp;
55  default:
56  llvm_unreachable("Unexpected ID");
57  }
58 }
59 
60 RecurKind getRK(Intrinsic::ID ID) {
61  switch (ID) {
62  case Intrinsic::vector_reduce_smax:
63  return RecurKind::SMax;
64  case Intrinsic::vector_reduce_smin:
65  return RecurKind::SMin;
66  case Intrinsic::vector_reduce_umax:
67  return RecurKind::UMax;
68  case Intrinsic::vector_reduce_umin:
69  return RecurKind::UMin;
70  case Intrinsic::vector_reduce_fmax:
71  return RecurKind::FMax;
72  case Intrinsic::vector_reduce_fmin:
73  return RecurKind::FMin;
74  default:
75  return RecurKind::None;
76  }
77 }
78 
79 bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
80  bool Changed = false;
82  for (auto &I : instructions(F)) {
83  if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
84  switch (II->getIntrinsicID()) {
85  default: break;
86  case Intrinsic::vector_reduce_fadd:
87  case Intrinsic::vector_reduce_fmul:
88  case Intrinsic::vector_reduce_add:
89  case Intrinsic::vector_reduce_mul:
90  case Intrinsic::vector_reduce_and:
91  case Intrinsic::vector_reduce_or:
92  case Intrinsic::vector_reduce_xor:
93  case Intrinsic::vector_reduce_smax:
94  case Intrinsic::vector_reduce_smin:
95  case Intrinsic::vector_reduce_umax:
96  case Intrinsic::vector_reduce_umin:
97  case Intrinsic::vector_reduce_fmax:
98  case Intrinsic::vector_reduce_fmin:
99  if (TTI->shouldExpandReduction(II))
100  Worklist.push_back(II);
101 
102  break;
103  }
104  }
105  }
106 
107  for (auto *II : Worklist) {
108  FastMathFlags FMF =
109  isa<FPMathOperator>(II) ? II->getFastMathFlags() : FastMathFlags{};
110  Intrinsic::ID ID = II->getIntrinsicID();
111  RecurKind RK = getRK(ID);
112 
113  Value *Rdx = nullptr;
114  IRBuilder<> Builder(II);
116  Builder.setFastMathFlags(FMF);
117  switch (ID) {
118  default: llvm_unreachable("Unexpected intrinsic!");
119  case Intrinsic::vector_reduce_fadd:
120  case Intrinsic::vector_reduce_fmul: {
121  // FMFs must be attached to the call, otherwise it's an ordered reduction
122  // and it can't be handled by generating a shuffle sequence.
123  Value *Acc = II->getArgOperand(0);
124  Value *Vec = II->getArgOperand(1);
125  if (!FMF.allowReassoc())
126  Rdx = getOrderedReduction(Builder, Acc, Vec, getOpcode(ID), RK);
127  else {
128  if (!isPowerOf2_32(
129  cast<FixedVectorType>(Vec->getType())->getNumElements()))
130  continue;
131 
132  Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), RK);
133  Rdx = Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(ID),
134  Acc, Rdx, "bin.rdx");
135  }
136  break;
137  }
138  case Intrinsic::vector_reduce_add:
139  case Intrinsic::vector_reduce_mul:
140  case Intrinsic::vector_reduce_and:
141  case Intrinsic::vector_reduce_or:
142  case Intrinsic::vector_reduce_xor:
143  case Intrinsic::vector_reduce_smax:
144  case Intrinsic::vector_reduce_smin:
145  case Intrinsic::vector_reduce_umax:
146  case Intrinsic::vector_reduce_umin: {
147  Value *Vec = II->getArgOperand(0);
148  if (!isPowerOf2_32(
149  cast<FixedVectorType>(Vec->getType())->getNumElements()))
150  continue;
151 
152  Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), RK);
153  break;
154  }
155  case Intrinsic::vector_reduce_fmax:
156  case Intrinsic::vector_reduce_fmin: {
157  // We require "nnan" to use a shuffle reduction; "nsz" is implied by the
158  // semantics of the reduction.
159  Value *Vec = II->getArgOperand(0);
160  if (!isPowerOf2_32(
161  cast<FixedVectorType>(Vec->getType())->getNumElements()) ||
162  !FMF.noNaNs())
163  continue;
164 
165  Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), RK);
166  break;
167  }
168  }
169  II->replaceAllUsesWith(Rdx);
170  II->eraseFromParent();
171  Changed = true;
172  }
173  return Changed;
174 }
175 
176 class ExpandReductions : public FunctionPass {
177 public:
178  static char ID;
179  ExpandReductions() : FunctionPass(ID) {
181  }
182 
183  bool runOnFunction(Function &F) override {
184  const auto *TTI =&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
185  return expandReductions(F, TTI);
186  }
187 
188  void getAnalysisUsage(AnalysisUsage &AU) const override {
190  AU.setPreservesCFG();
191  }
192 };
193 }
194 
196 INITIALIZE_PASS_BEGIN(ExpandReductions, "expand-reductions",
197  "Expand reduction intrinsics", false, false)
201 
203  return new ExpandReductions();
204 }
205 
208  const auto &TTI = AM.getResult<TargetIRAnalysis>(F);
209  if (!expandReductions(F, &TTI))
210  return PreservedAnalyses::all();
212  PA.preserveSet<CFGAnalyses>();
213  return PA;
214 }
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
llvm::TargetIRAnalysis
Analysis pass providing the TargetTransformInfo.
Definition: TargetTransformInfo.h:2320
llvm
Definition: AllocatorList.h:23
IntrinsicInst.h
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:769
InstIterator.h
llvm::Function
Definition: Function.h:61
Pass.h
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1167
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:167
llvm::IRBuilder<>
llvm::getShuffleReduction
Value * getShuffleReduction(IRBuilderBase &Builder, Value *Src, unsigned Op, RecurKind MinMaxKind=RecurKind::None, ArrayRef< Value * > RedOps=None)
Generates a vector reduction using shufflevectors to reduce the value.
Definition: LoopUtils.cpp:977
Module.h
llvm::isPowerOf2_32
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:491
llvm::FastMathFlags
Convenience struct for specifying and reasoning about fast-math flags.
Definition: Operator.h:160
llvm::RecurKind::SMin
@ SMin
Signed integer min implemented in terms of select(cmp()).
llvm::FastMathFlags::noNaNs
bool noNaNs() const
Definition: Operator.h:206
llvm::ExpandReductionsPass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Definition: ExpandReductions.cpp:206
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::RecurKind
RecurKind
These are the kinds of recurrences that we support.
Definition: IVDescriptors.h:38
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::getOrderedReduction
Value * getOrderedReduction(IRBuilderBase &Builder, Value *Acc, Value *Src, unsigned Op, RecurKind MinMaxKind=RecurKind::None, ArrayRef< Value * > RedOps=None)
Generates an ordered vector reduction using extracts to reduce the value.
Definition: LoopUtils.cpp:948
Intrinsics.h
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
getOpcode
static Optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:199
llvm::FastMathFlags::allowReassoc
bool allowReassoc() const
Flag queries.
Definition: Operator.h:205
false
Definition: StackSlotColoring.cpp:142
LoopUtils.h
llvm::RecurKind::None
@ None
Not a recurrence.
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::RecurKind::UMin
@ UMin
Unisgned integer min implemented in terms of select(cmp()).
Passes.h
llvm::initializeExpandReductionsPass
void initializeExpandReductionsPass(PassRegistry &)
reductions
expand reductions
Definition: ExpandReductions.cpp:199
llvm::instructions
inst_range instructions(Function *F)
Definition: InstIterator.h:133
expand
static Expected< BitVector > expand(StringRef S, StringRef Original)
Definition: GlobPattern.cpp:27
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:2376
intrinsics
expand Expand reduction intrinsics
Definition: ExpandReductions.cpp:200
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
I
#define I(x, y, z)
Definition: MD5.cpp:59
IRBuilder.h
llvm::createExpandReductionsPass
FunctionPass * createExpandReductionsPass()
This pass expands the experimental reduction intrinsics into sequences of shuffles.
Definition: ExpandReductions.cpp:202
reduction
Straight line strength reduction
Definition: StraightLineStrengthReduce.cpp:267
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:651
llvm::RecurKind::UMax
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:253
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:136
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
llvm::CFGAnalyses
Represents analyses that only rely on functions' control flow.
Definition: PassManager.h:116
llvm::RecurKind::FMax
@ FMax
FP max implemented in terms of select(cmp()).
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(ExpandReductions, "expand-reductions", "Expand reduction intrinsics", false, false) INITIALIZE_PASS_END(ExpandReductions
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
Function.h
llvm::MCID::Add
@ Add
Definition: MCInstrDesc.h:183
llvm::Instruction::BinaryOps
BinaryOps
Definition: Instruction.h:773
llvm::PreservedAnalyses::preserveSet
void preserveSet()
Mark an analysis set as preserved.
Definition: PassManager.h:191
ExpandReductions.h
TargetTransformInfo.h
llvm::TargetTransformInfo::shouldExpandReduction
bool shouldExpandReduction(const IntrinsicInst *II) const
Definition: TargetTransformInfo.cpp:1035
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
llvm::RecurKind::FMin
@ FMin
FP min implemented in terms of select(cmp()).
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
InitializePasses.h
llvm::RecurKind::SMax
@ SMax
Signed integer max implemented in terms of select(cmp()).
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38