LLVM  10.0.0svn
ExpandReductions.cpp
Go to the documentation of this file.
1 //===--- ExpandReductions.cpp - Expand experimental reduction intrinsics --===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass implements IR expansion for reduction intrinsics, allowing targets
10 // to enable the experimental intrinsics until just before codegen.
11 //
12 //===----------------------------------------------------------------------===//
13 
16 #include "llvm/CodeGen/Passes.h"
17 #include "llvm/IR/Function.h"
18 #include "llvm/IR/IRBuilder.h"
19 #include "llvm/IR/InstIterator.h"
20 #include "llvm/IR/IntrinsicInst.h"
21 #include "llvm/IR/Intrinsics.h"
22 #include "llvm/IR/Module.h"
23 #include "llvm/Pass.h"
25 
26 using namespace llvm;
27 
28 namespace {
29 
30 unsigned getOpcode(Intrinsic::ID ID) {
31  switch (ID) {
32  case Intrinsic::experimental_vector_reduce_v2_fadd:
33  return Instruction::FAdd;
34  case Intrinsic::experimental_vector_reduce_v2_fmul:
35  return Instruction::FMul;
36  case Intrinsic::experimental_vector_reduce_add:
37  return Instruction::Add;
38  case Intrinsic::experimental_vector_reduce_mul:
39  return Instruction::Mul;
40  case Intrinsic::experimental_vector_reduce_and:
41  return Instruction::And;
42  case Intrinsic::experimental_vector_reduce_or:
43  return Instruction::Or;
44  case Intrinsic::experimental_vector_reduce_xor:
45  return Instruction::Xor;
46  case Intrinsic::experimental_vector_reduce_smax:
47  case Intrinsic::experimental_vector_reduce_smin:
48  case Intrinsic::experimental_vector_reduce_umax:
49  case Intrinsic::experimental_vector_reduce_umin:
50  return Instruction::ICmp;
51  case Intrinsic::experimental_vector_reduce_fmax:
52  case Intrinsic::experimental_vector_reduce_fmin:
53  return Instruction::FCmp;
54  default:
55  llvm_unreachable("Unexpected ID");
56  }
57 }
58 
60  switch (ID) {
61  case Intrinsic::experimental_vector_reduce_smax:
63  case Intrinsic::experimental_vector_reduce_smin:
65  case Intrinsic::experimental_vector_reduce_umax:
67  case Intrinsic::experimental_vector_reduce_umin:
69  case Intrinsic::experimental_vector_reduce_fmax:
71  case Intrinsic::experimental_vector_reduce_fmin:
73  default:
75  }
76 }
77 
78 bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
79  bool Changed = false;
81  for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
82  if (auto II = dyn_cast<IntrinsicInst>(&*I))
83  Worklist.push_back(II);
84 
85  for (auto *II : Worklist) {
86  if (!TTI->shouldExpandReduction(II))
87  continue;
88 
89  FastMathFlags FMF =
90  isa<FPMathOperator>(II) ? II->getFastMathFlags() : FastMathFlags{};
91  Intrinsic::ID ID = II->getIntrinsicID();
93 
94  Value *Rdx = nullptr;
95  IRBuilder<> Builder(II);
96  IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
97  Builder.setFastMathFlags(FMF);
98  switch (ID) {
99  case Intrinsic::experimental_vector_reduce_v2_fadd:
100  case Intrinsic::experimental_vector_reduce_v2_fmul: {
101  // FMFs must be attached to the call, otherwise it's an ordered reduction
102  // and it can't be handled by generating a shuffle sequence.
103  Value *Acc = II->getArgOperand(0);
104  Value *Vec = II->getArgOperand(1);
105  if (!FMF.allowReassoc())
106  Rdx = getOrderedReduction(Builder, Acc, Vec, getOpcode(ID), MRK);
107  else {
108  Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK);
109  Rdx = Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(ID),
110  Acc, Rdx, "bin.rdx");
111  }
112  } break;
113  case Intrinsic::experimental_vector_reduce_add:
114  case Intrinsic::experimental_vector_reduce_mul:
115  case Intrinsic::experimental_vector_reduce_and:
116  case Intrinsic::experimental_vector_reduce_or:
117  case Intrinsic::experimental_vector_reduce_xor:
118  case Intrinsic::experimental_vector_reduce_smax:
119  case Intrinsic::experimental_vector_reduce_smin:
120  case Intrinsic::experimental_vector_reduce_umax:
121  case Intrinsic::experimental_vector_reduce_umin:
122  case Intrinsic::experimental_vector_reduce_fmax:
123  case Intrinsic::experimental_vector_reduce_fmin: {
124  Value *Vec = II->getArgOperand(0);
125  Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK);
126  } break;
127  default:
128  continue;
129  }
130  II->replaceAllUsesWith(Rdx);
131  II->eraseFromParent();
132  Changed = true;
133  }
134  return Changed;
135 }
136 
137 class ExpandReductions : public FunctionPass {
138 public:
139  static char ID;
140  ExpandReductions() : FunctionPass(ID) {
142  }
143 
144  bool runOnFunction(Function &F) override {
145  const auto *TTI =&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
146  return expandReductions(F, TTI);
147  }
148 
149  void getAnalysisUsage(AnalysisUsage &AU) const override {
151  AU.setPreservesCFG();
152  }
153 };
154 }
155 
157 INITIALIZE_PASS_BEGIN(ExpandReductions, "expand-reductions",
158  "Expand reduction intrinsics", false, false)
161  "Expand reduction intrinsics", false, false)
162 
164  return new ExpandReductions();
165 }
166 
169  const auto &TTI = AM.getResult<TargetIRAnalysis>(F);
170  if (!expandReductions(F, &TTI))
171  return PreservedAnalyses::all();
173  PA.preserveSet<CFGAnalyses>();
174  return PA;
175 }
FunctionPass * createExpandReductionsPass()
This pass expands the experimental reduction intrinsics into sequences of shuffles.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:776
This class represents lattice values for constants.
Definition: AllocatorList.h:23
Analysis pass providing the TargetTransformInfo.
expand Expand reduction intrinsics
F(f)
Value * getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op, RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind=RecurrenceDescriptor::MRK_Invalid, ArrayRef< Value *> RedOps=None)
Generates a vector reduction using shufflevectors to reduce the value.
Definition: LoopUtils.cpp:820
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:50
void initializeExpandReductionsPass(PassRegistry &)
inst_iterator inst_begin(Function *F)
Definition: InstIterator.h:131
static Optional< unsigned > getOpcode(ArrayRef< VPValue *> Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:196
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:779
Value * getOrderedReduction(IRBuilder<> &Builder, Value *Acc, Value *Src, unsigned Op, RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind=RecurrenceDescriptor::MRK_Invalid, ArrayRef< Value *> RedOps=None)
Generates an ordered vector reduction using extracts to reduce the value.
Definition: LoopUtils.cpp:789
static bool runOnFunction(Function &F, bool PostInlining)
Wrapper pass for TargetTransformInfo.
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:153
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Straight line strength reduction
Represent the analysis usage information of a pass.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
static Expected< BitVector > expand(StringRef S, StringRef Original)
Definition: GlobPattern.cpp:27
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:159
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
expand reductions
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
Module.h This file contains the declarations for the Module class.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:301
Represents analyses that only rely on functions&#39; control flow.
Definition: PassManager.h:114
bool shouldExpandReduction(const IntrinsicInst *II) const
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
void preserveSet()
Mark an analysis set as preserved.
Definition: PassManager.h:189
#define I(x, y, z)
Definition: MD5.cpp:58
INITIALIZE_PASS_BEGIN(ExpandReductions, "expand-reductions", "Expand reduction intrinsics", false, false) INITIALIZE_PASS_END(ExpandReductions
LLVM Value Representation.
Definition: Value.h:73
bool allowReassoc() const
Flag queries.
Definition: Operator.h:204
Convenience struct for specifying and reasoning about fast-math flags.
Definition: Operator.h:159
inst_iterator inst_end(Function *F)
Definition: InstIterator.h:132
A container for analyses that lazily runs them and caches their results.
This pass exposes codegen information to IR-level passes.