docs/doxygen/ExpandReductions_8cpp_source.html

//===- ExpandReductions.cpp - Expand reduction intrinsics -----------------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

// This pass implements IR expansion for reduction intrinsics, allowing targets

// to enable the intrinsics until just before codegen.

//

//===----------------------------------------------------------------------===//


#include "llvm/CodeGen/ExpandReductions.h"

#include "llvm/Analysis/TargetTransformInfo.h"

#include "llvm/CodeGen/Passes.h"

#include "llvm/IR/IRBuilder.h"

#include "llvm/IR/InstIterator.h"

#include "llvm/IR/IntrinsicInst.h"

#include "llvm/IR/Intrinsics.h"

#include "llvm/InitializePasses.h"

#include "llvm/Pass.h"

#include "llvm/Transforms/Utils/LoopUtils.h"


using namespace llvm;


namespace {


bool expandReductions(Function &F, const TargetTransformInfo *TTI) {

  bool Changed = false;

  SmallVector<IntrinsicInst *, 4> Worklist;

  for (auto &I : instructions(F)) {

    if (auto *II = dyn_cast<IntrinsicInst>(&I)) {

      switch (II->getIntrinsicID()) {

      default: break;

      case Intrinsic::vector_reduce_fadd:

      case Intrinsic::vector_reduce_fmul:

      case Intrinsic::vector_reduce_add:

      case Intrinsic::vector_reduce_mul:

      case Intrinsic::vector_reduce_and:

      case Intrinsic::vector_reduce_or:

      case Intrinsic::vector_reduce_xor:

      case Intrinsic::vector_reduce_smax:

      case Intrinsic::vector_reduce_smin:

      case Intrinsic::vector_reduce_umax:

      case Intrinsic::vector_reduce_umin:

      case Intrinsic::vector_reduce_fmax:

      case Intrinsic::vector_reduce_fmin:

        if (TTI->shouldExpandReduction(II))

          Worklist.push_back(II);


        break;

      }

    }

  }


  for (auto *II : Worklist) {

    FastMathFlags FMF =

        isa<FPMathOperator>(II) ? II->getFastMathFlags() : FastMathFlags{};

    Intrinsic::ID ID = II->getIntrinsicID();

    RecurKind RK = getMinMaxReductionRecurKind(ID);

    TargetTransformInfo::ReductionShuffle RS =

        TTI->getPreferredExpandedReductionShuffle(II);


    Value *Rdx = nullptr;

    IRBuilder<> Builder(II);

    IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);

    Builder.setFastMathFlags(FMF);

    switch (ID) {

    default: llvm_unreachable("Unexpected intrinsic!");

    case Intrinsic::vector_reduce_fadd:

    case Intrinsic::vector_reduce_fmul: {

      // FMFs must be attached to the call, otherwise it's an ordered reduction

      // and it can't be handled by generating a shuffle sequence.

      Value *Acc = II->getArgOperand(0);

      Value *Vec = II->getArgOperand(1);

      unsigned RdxOpcode = getArithmeticReductionInstruction(ID);

      if (!FMF.allowReassoc())

        Rdx = getOrderedReduction(Builder, Acc, Vec, RdxOpcode, RK);

      else {

        if (!isPowerOf2_32(

                cast<FixedVectorType>(Vec->getType())->getNumElements()))

          continue;

        Rdx = getShuffleReduction(Builder, Vec, RdxOpcode, RS, RK);

        Rdx = Builder.CreateBinOp((Instruction::BinaryOps)RdxOpcode, Acc, Rdx,

                                  "bin.rdx");

      }

      break;

    }

    case Intrinsic::vector_reduce_and:

    case Intrinsic::vector_reduce_or: {

      // Canonicalize logical or/and reductions:

      // Or reduction for i1 is represented as:

      // %val = bitcast <ReduxWidth x i1> to iReduxWidth

      // %res = cmp ne iReduxWidth %val, 0

      // And reduction for i1 is represented as:

      // %val = bitcast <ReduxWidth x i1> to iReduxWidth

      // %res = cmp eq iReduxWidth %val, 11111

      Value *Vec = II->getArgOperand(0);

      auto *FTy = cast<FixedVectorType>(Vec->getType());

      unsigned NumElts = FTy->getNumElements();

      if (!isPowerOf2_32(NumElts))

        continue;


      if (FTy->getElementType() == Builder.getInt1Ty()) {

        Rdx = Builder.CreateBitCast(Vec, Builder.getIntNTy(NumElts));

        if (ID == Intrinsic::vector_reduce_and) {

          Rdx = Builder.CreateICmpEQ(

              Rdx, ConstantInt::getAllOnesValue(Rdx->getType()));

        } else {

          assert(ID == Intrinsic::vector_reduce_or && "Expected or reduction.");

          Rdx = Builder.CreateIsNotNull(Rdx);

        }

        break;

      }

      unsigned RdxOpcode = getArithmeticReductionInstruction(ID);

      Rdx = getShuffleReduction(Builder, Vec, RdxOpcode, RS, RK);

      break;

    }

    case Intrinsic::vector_reduce_add:

    case Intrinsic::vector_reduce_mul:

    case Intrinsic::vector_reduce_xor:

    case Intrinsic::vector_reduce_smax:

    case Intrinsic::vector_reduce_smin:

    case Intrinsic::vector_reduce_umax:

    case Intrinsic::vector_reduce_umin: {

      Value *Vec = II->getArgOperand(0);

      if (!isPowerOf2_32(

              cast<FixedVectorType>(Vec->getType())->getNumElements()))

        continue;

      unsigned RdxOpcode = getArithmeticReductionInstruction(ID);

      Rdx = getShuffleReduction(Builder, Vec, RdxOpcode, RS, RK);

      break;

    }

    case Intrinsic::vector_reduce_fmax:

    case Intrinsic::vector_reduce_fmin: {

      // We require "nnan" to use a shuffle reduction; "nsz" is implied by the

      // semantics of the reduction.

      Value *Vec = II->getArgOperand(0);

      if (!isPowerOf2_32(

              cast<FixedVectorType>(Vec->getType())->getNumElements()) ||

          !FMF.noNaNs())

        continue;

      unsigned RdxOpcode = getArithmeticReductionInstruction(ID);

      Rdx = getShuffleReduction(Builder, Vec, RdxOpcode, RS, RK);

      break;

    }

    }

    II->replaceAllUsesWith(Rdx);

    II->eraseFromParent();

    Changed = true;

  }

  return Changed;

}


class ExpandReductions : public FunctionPass {

public:

  static char ID;

  ExpandReductions() : FunctionPass(ID) {

    initializeExpandReductionsPass(*PassRegistry::getPassRegistry());

  }


  bool runOnFunction(Function &F) override {

    const auto *TTI =&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);

    return expandReductions(F, TTI);

  }


  void getAnalysisUsage(AnalysisUsage &AU) const override {

    AU.addRequired<TargetTransformInfoWrapperPass>();

    AU.setPreservesCFG();

  }

};

}


char ExpandReductions::ID;

INITIALIZE_PASS_BEGIN(ExpandReductions, "expand-reductions",

                      "Expand reduction intrinsics", false, false)

INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)

INITIALIZE_PASS_END(ExpandReductions, "expand-reductions",

                    "Expand reduction intrinsics", false, false)


FunctionPass *llvm::createExpandReductionsPass() {

  return new ExpandReductions();

}


PreservedAnalyses ExpandReductionsPass::run(Function &F,

                                            FunctionAnalysisManager &AM) {

  const auto &TTI = AM.getResult<TargetIRAnalysis>(F);

  if (!expandReductions(F, &TTI))

    return PreservedAnalyses::all();

  PreservedAnalyses PA;

  PA.preserveSet<CFGAnalyses>();

  return PA;

}

instructions
Expand Atomic instructions
Definition: AtomicExpandPass.cpp:172

Passes.h

reductions
expand reductions
Definition: ExpandReductions.cpp:179

intrinsics
expand Expand reduction intrinsics
Definition: ExpandReductions.cpp:180

ExpandReductions.h

expand
static Expected< BitVector > expand(StringRef S, StringRef Original)
Definition: GlobPattern.cpp:21

IRBuilder.h

IntrinsicInst.h

InitializePasses.h

InstIterator.h

Intrinsics.h

LoopUtils.h

F
#define F(x, y, z)
Definition: MD5.cpp:55

I
#define I(x, y, z)
Definition: MD5.cpp:58

II
uint64_t IntrinsicInst * II
Definition: NVVMIntrRange.cpp:51

INITIALIZE_PASS_DEPENDENCY
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55

INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57

INITIALIZE_PASS_BEGIN
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52

Pass.h

assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

reduction
Straight line strength reduction
Definition: StraightLineStrengthReduce.cpp:266

TargetTransformInfo.h
This pass exposes codegen information to IR-level passes.

llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253

llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:410

llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47

llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75

llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:256

llvm::CFGAnalyses
Represents analyses that only rely on functions' control flow.
Definition: Analysis.h:72

llvm::ExpandReductionsPass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Definition: ExpandReductions.cpp:186

llvm::FastMathFlags
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:20

llvm::FastMathFlags::allowReassoc
bool allowReassoc() const
Flag queries.
Definition: FMF.h:65

llvm::FastMathFlags::noNaNs
bool noNaNs() const
Definition: FMF.h:66

llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310

llvm::FunctionPass::runOnFunction
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.

llvm::Function
Definition: Function.h:63

llvm::IRBuilder
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2705

llvm::Instruction::BinaryOps
BinaryOps
Definition: Instruction.h:972

llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:24

llvm::Pass::getAnalysisUsage
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:98

llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111

llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117

llvm::PreservedAnalyses::preserveSet
void preserveSet()
Mark an analysis set as preserved.
Definition: Analysis.h:146

llvm::SmallVectorTemplateBase::push_back
void push_back(const T &Elt)
Definition: SmallVector.h:413

llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196

llvm::TargetIRAnalysis
Analysis pass providing the TargetTransformInfo.
Definition: TargetTransformInfo.h:3189

llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:3245

llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:212

llvm::TargetTransformInfo::ReductionShuffle
ReductionShuffle
Definition: TargetTransformInfo.h:1783

llvm::TargetTransformInfo::getPreferredExpandedReductionShuffle
ReductionShuffle getPreferredExpandedReductionShuffle(const IntrinsicInst *II) const
Definition: TargetTransformInfo.cpp:1403

llvm::TargetTransformInfo::shouldExpandReduction
bool shouldExpandReduction(const IntrinsicInst *II) const
Definition: TargetTransformInfo.cpp:1398

llvm::Value
LLVM Value Representation.
Definition: Value.h:74

llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255

unsigned

llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143

false
Definition: StackSlotColoring.cpp:193

llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18

llvm::getArithmeticReductionInstruction
unsigned getArithmeticReductionInstruction(Intrinsic::ID RdxID)
Returns the arithmetic instruction opcode used when expanding a reduction.
Definition: LoopUtils.cpp:960

llvm::initializeExpandReductionsPass
void initializeExpandReductionsPass(PassRegistry &)

llvm::isPowerOf2_32
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:293

llvm::getShuffleReduction
Value * getShuffleReduction(IRBuilderBase &Builder, Value *Src, unsigned Op, TargetTransformInfo::ReductionShuffle RS, RecurKind MinMaxKind=RecurKind::None)
Generates a vector reduction using shufflevectors to reduce the value.
Definition: LoopUtils.cpp:1118

llvm::RecurKind
RecurKind
These are the kinds of recurrences that we support.
Definition: IVDescriptors.h:33

llvm::createExpandReductionsPass
FunctionPass * createExpandReductionsPass()
This pass expands the reduction intrinsics into sequences of shuffles.
Definition: ExpandReductions.cpp:182

llvm::getMinMaxReductionRecurKind
RecurKind getMinMaxReductionRecurKind(Intrinsic::ID RdxID)
Returns the recurence kind used when expanding a min/max reduction.
Definition: LoopUtils.cpp:1035

llvm::getOrderedReduction
Value * getOrderedReduction(IRBuilderBase &Builder, Value *Acc, Value *Src, unsigned Op, RecurKind MinMaxKind=RecurKind::None)
Generates an ordered vector reduction using extracts to reduce the value.
Definition: LoopUtils.cpp:1093