LLVM  12.0.0git
ExpandReductions.cpp
Go to the documentation of this file.
1 //===--- ExpandReductions.cpp - Expand experimental reduction intrinsics --===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass implements IR expansion for reduction intrinsics, allowing targets
10 // to enable the experimental intrinsics until just before codegen.
11 //
12 //===----------------------------------------------------------------------===//
13 
16 #include "llvm/CodeGen/Passes.h"
17 #include "llvm/IR/Function.h"
18 #include "llvm/IR/IRBuilder.h"
19 #include "llvm/IR/InstIterator.h"
20 #include "llvm/IR/IntrinsicInst.h"
21 #include "llvm/IR/Intrinsics.h"
22 #include "llvm/IR/Module.h"
23 #include "llvm/InitializePasses.h"
24 #include "llvm/Pass.h"
26 
27 using namespace llvm;
28 
29 namespace {
30 
31 unsigned getOpcode(Intrinsic::ID ID) {
32  switch (ID) {
33  case Intrinsic::experimental_vector_reduce_v2_fadd:
34  return Instruction::FAdd;
35  case Intrinsic::experimental_vector_reduce_v2_fmul:
36  return Instruction::FMul;
37  case Intrinsic::experimental_vector_reduce_add:
38  return Instruction::Add;
39  case Intrinsic::experimental_vector_reduce_mul:
40  return Instruction::Mul;
41  case Intrinsic::experimental_vector_reduce_and:
42  return Instruction::And;
43  case Intrinsic::experimental_vector_reduce_or:
44  return Instruction::Or;
45  case Intrinsic::experimental_vector_reduce_xor:
46  return Instruction::Xor;
47  case Intrinsic::experimental_vector_reduce_smax:
48  case Intrinsic::experimental_vector_reduce_smin:
49  case Intrinsic::experimental_vector_reduce_umax:
50  case Intrinsic::experimental_vector_reduce_umin:
51  return Instruction::ICmp;
52  case Intrinsic::experimental_vector_reduce_fmax:
53  case Intrinsic::experimental_vector_reduce_fmin:
54  return Instruction::FCmp;
55  default:
56  llvm_unreachable("Unexpected ID");
57  }
58 }
59 
61  switch (ID) {
62  case Intrinsic::experimental_vector_reduce_smax:
64  case Intrinsic::experimental_vector_reduce_smin:
66  case Intrinsic::experimental_vector_reduce_umax:
68  case Intrinsic::experimental_vector_reduce_umin:
70  case Intrinsic::experimental_vector_reduce_fmax:
72  case Intrinsic::experimental_vector_reduce_fmin:
74  default:
76  }
77 }
78 
79 bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
80  bool Changed = false;
82  for (auto &I : instructions(F)) {
83  if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
84  switch (II->getIntrinsicID()) {
85  default: break;
86  case Intrinsic::experimental_vector_reduce_v2_fadd:
87  case Intrinsic::experimental_vector_reduce_v2_fmul:
88  case Intrinsic::experimental_vector_reduce_add:
89  case Intrinsic::experimental_vector_reduce_mul:
90  case Intrinsic::experimental_vector_reduce_and:
91  case Intrinsic::experimental_vector_reduce_or:
92  case Intrinsic::experimental_vector_reduce_xor:
93  case Intrinsic::experimental_vector_reduce_smax:
94  case Intrinsic::experimental_vector_reduce_smin:
95  case Intrinsic::experimental_vector_reduce_umax:
96  case Intrinsic::experimental_vector_reduce_umin:
97  case Intrinsic::experimental_vector_reduce_fmax:
98  case Intrinsic::experimental_vector_reduce_fmin:
99  if (TTI->shouldExpandReduction(II))
100  Worklist.push_back(II);
101 
102  break;
103  }
104  }
105  }
106 
107  for (auto *II : Worklist) {
108  FastMathFlags FMF =
109  isa<FPMathOperator>(II) ? II->getFastMathFlags() : FastMathFlags{};
110  Intrinsic::ID ID = II->getIntrinsicID();
112 
113  Value *Rdx = nullptr;
114  IRBuilder<> Builder(II);
116  Builder.setFastMathFlags(FMF);
117  switch (ID) {
118  default: llvm_unreachable("Unexpected intrinsic!");
119  case Intrinsic::experimental_vector_reduce_v2_fadd:
120  case Intrinsic::experimental_vector_reduce_v2_fmul: {
121  // FMFs must be attached to the call, otherwise it's an ordered reduction
122  // and it can't be handled by generating a shuffle sequence.
123  Value *Acc = II->getArgOperand(0);
124  Value *Vec = II->getArgOperand(1);
125  if (!FMF.allowReassoc())
126  Rdx = getOrderedReduction(Builder, Acc, Vec, getOpcode(ID), MRK);
127  else {
128  if (!isPowerOf2_32(
129  cast<FixedVectorType>(Vec->getType())->getNumElements()))
130  continue;
131 
132  Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK);
133  Rdx = Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(ID),
134  Acc, Rdx, "bin.rdx");
135  }
136  break;
137  }
138  case Intrinsic::experimental_vector_reduce_add:
139  case Intrinsic::experimental_vector_reduce_mul:
140  case Intrinsic::experimental_vector_reduce_and:
141  case Intrinsic::experimental_vector_reduce_or:
142  case Intrinsic::experimental_vector_reduce_xor:
143  case Intrinsic::experimental_vector_reduce_smax:
144  case Intrinsic::experimental_vector_reduce_smin:
145  case Intrinsic::experimental_vector_reduce_umax:
146  case Intrinsic::experimental_vector_reduce_umin:
147  case Intrinsic::experimental_vector_reduce_fmax:
148  case Intrinsic::experimental_vector_reduce_fmin: {
149  Value *Vec = II->getArgOperand(0);
150  if (!isPowerOf2_32(
151  cast<FixedVectorType>(Vec->getType())->getNumElements()))
152  continue;
153 
154  Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK);
155  break;
156  }
157  }
158  II->replaceAllUsesWith(Rdx);
159  II->eraseFromParent();
160  Changed = true;
161  }
162  return Changed;
163 }
164 
165 class ExpandReductions : public FunctionPass {
166 public:
167  static char ID;
168  ExpandReductions() : FunctionPass(ID) {
170  }
171 
172  bool runOnFunction(Function &F) override {
173  const auto *TTI =&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
174  return expandReductions(F, TTI);
175  }
176 
177  void getAnalysisUsage(AnalysisUsage &AU) const override {
179  AU.setPreservesCFG();
180  }
181 };
182 }
183 
185 INITIALIZE_PASS_BEGIN(ExpandReductions, "expand-reductions",
186  "Expand reduction intrinsics", false, false)
189  "Expand reduction intrinsics", false, false)
190 
192  return new ExpandReductions();
193 }
194 
197  const auto &TTI = AM.getResult<TargetIRAnalysis>(F);
198  if (!expandReductions(F, &TTI))
199  return PreservedAnalyses::all();
201  PA.preserveSet<CFGAnalyses>();
202  return PA;
203 }
FunctionPass * createExpandReductionsPass()
This pass expands the experimental reduction intrinsics into sequences of shuffles.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:769
This class represents lattice values for constants.
Definition: AllocatorList.h:23
Analysis pass providing the TargetTransformInfo.
expand Expand reduction intrinsics
F(f)
AnalysisUsage & addRequired()
void initializeExpandReductionsPass(PassRegistry &)
static Optional< unsigned > getOpcode(ArrayRef< VPValue *> Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:196
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:244
Value * getShuffleReduction(IRBuilderBase &Builder, Value *Src, unsigned Op, RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind=RecurrenceDescriptor::MRK_Invalid, ArrayRef< Value *> RedOps=None)
Generates a vector reduction using shufflevectors to reduce the value.
Definition: LoopUtils.cpp:912
static bool runOnFunction(Function &F, bool PostInlining)
Wrapper pass for TargetTransformInfo.
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:154
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:492
Straight line strength reduction
Represent the analysis usage information of a pass.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
static Expected< BitVector > expand(StringRef S, StringRef Original)
Definition: GlobPattern.cpp:27
assume Assume Builder
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:160
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
expand reductions
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:883
Module.h This file contains the declarations for the Module class.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:253
Represents analyses that only rely on functions&#39; control flow.
Definition: PassManager.h:115
bool shouldExpandReduction(const IntrinsicInst *II) const
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
void preserveSet()
Mark an analysis set as preserved.
Definition: PassManager.h:190
#define I(x, y, z)
Definition: MD5.cpp:59
static Value * getNumElements(BasicBlock *Preheader, Value *BTC)
INITIALIZE_PASS_BEGIN(ExpandReductions, "expand-reductions", "Expand reduction intrinsics", false, false) INITIALIZE_PASS_END(ExpandReductions
LLVM Value Representation.
Definition: Value.h:74
bool allowReassoc() const
Flag queries.
Definition: Operator.h:205
Value * getOrderedReduction(IRBuilderBase &Builder, Value *Acc, Value *Src, unsigned Op, RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind=RecurrenceDescriptor::MRK_Invalid, ArrayRef< Value *> RedOps=None)
Generates an ordered vector reduction using extracts to reduce the value.
Definition: LoopUtils.cpp:881
Convenience struct for specifying and reasoning about fast-math flags.
Definition: Operator.h:160
inst_range instructions(Function *F)
Definition: InstIterator.h:133
A container for analyses that lazily runs them and caches their results.
This pass exposes codegen information to IR-level passes.
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)