doxygen/HexagonGenWideningVecFloatInstr_8cpp_source.html

//===------------------- HexagonGenWideningVecFloatInstr.cpp --------------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

// Replace widening vector float operations with hexagon intrinsics.

//

//===----------------------------------------------------------------------===//

//

// Brief overview of working of GenWideningVecFloatInstr pass.

// This version of pass is replica of already existing pass(which will replace

// widen vector integer operations with it's respective intrinsics). In this

// pass we will generate hexagon intrinsics for widen vector float instructions.

//

// Example1(64 vector-width widening):

// %wide.load = load <64 x half>, <64 x half>* %0, align 2

// %wide.load53 = load <64 x half>, <64 x half>* %2, align 2

// %1 = fpext <64 x half> %wide.load to <64 x float>

// %3 = fpext <64 x half> %wide.load53 to <64 x float>

// %4 = fmul <64 x float> %1, %3

//

// If we run this pass on the above example, it will first find fmul

// instruction, and then it will check whether the operands of fmul instruction

// (%1 and %3) belongs to either of these categories [%1 ->fpext, %3 ->fpext]

// or [%1 ->fpext, %3 ->constant_vector] or [%1 ->constant_vector, %3 ->fpext].

// If it sees such pattern, then this pass will replace such pattern with

// appropriate hexagon intrinsics.

//

// After replacement:

// %wide.load = load <64 x half>, <64 x half>* %0, align 2

// %wide.load53 = load <64 x half>, <64 x half>* %2, align 2

// %3 = bitcast <64 x half> %wide.load to <32 x i32>

// %4 = bitcast <64 x half> %wide.load53 to <32 x i32>

// %5 = call <64 x i32> @llvm.hexagon.V6.vmpy.qf32.hf.128B(%3, %4)

// %6 = shufflevector <64 x i32> %5, <64 x i32> poison, <64 x i32> ShuffMask1

// %7 = call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %6)

// %8 = call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %6)

// %9 = call <32 x i32> @llvm.hexagon.V6.vconv.sf.qf32.128B(<32 x i32> %7)

// %10 = call <32 x i32> @llvm.hexagon.V6.vconv.sf.qf32.128B(<32 x i32> %8)

// %11 = bitcast <32 x i32> %9 to <32 x float>

// %12 = bitcast <32 x i32> %10 to <32 x float>

// %13 = shufflevector <32 x float> %12, <32 x float> %11, <64 x i32> ShuffMask2

//

//

//

// Example2(128 vector-width widening):

// %0 = bitcast half* %a to <128 x half>*

// %wide.load = load <128 x half>, <128 x half>* %0, align 2

// %1 = fpext <128 x half> %wide.load to <128 x float>

// %2 = bitcast half* %b to <128 x half>*

// %wide.load2 = load <128 x half>, <128 x half>* %2, align 2

// %3 = fpext <128 x half> %wide.load2 to <128 x float>

// %4 = fmul <128 x float> %1, %3

//

// After replacement:

// %0 = bitcast half* %a to <128 x half>*

// %wide.load = load <128 x half>, <128 x half>* %0, align 2

// %1 = bitcast half* %b to <128 x half>*

// %wide.load2 = load <128 x half>, <128 x half>* %1, align 2

// %2 = bitcast <128 x half> %wide.load to <64 x i32>

// %3 = call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %2)

// %4 = call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %2)

// %5 = bitcast <128 x half> %wide.load2 to <64 x i32>

// %6 = call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %5)

// %7 = call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %5)

// %8 = call <64 x i32> @llvm.hexagon.V6.vmpy.qf32.hf.128B(%3, %6)

// %9 = shufflevector <64 x i32> %8, <64 x i32> poison, <64 x i32> Mask1

// %10 = call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %9)

// %11 = call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %9)

// %12 = call <32 x i32> @llvm.hexagon.V6.vconv.sf.qf32.128B(<32 x i32> %10)

// %13 = call <32 x i32> @llvm.hexagon.V6.vconv.sf.qf32.128B(<32 x i32> %11)

// %14 = bitcast <32 x i32> %12 to <32 x float>

// %15 = bitcast <32 x i32> %13 to <32 x float>

// %16 = shufflevector <32 x float> %15, <32 x float> %14, <64 x i32> Mask2

// %17 = call <64 x i32> @llvm.hexagon.V6.vmpy.qf32.hf.128B(%4, %7)

// %18 = shufflevector <64 x i32> %17, <64 x i32> poison, <64 x i32> Mask1

// %19 = call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %18)

// %20 = call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %18)

// %21 = call <32 x i32> @llvm.hexagon.V6.vconv.sf.qf32.128B(<32 x i32> %19)

// %22 = call <32 x i32> @llvm.hexagon.V6.vconv.sf.qf32.128B(<32 x i32> %20)

// %23 = bitcast <32 x i32> %21 to <32 x float>

// %24 = bitcast <32 x i32> %22 to <32 x float>

// %25 = shufflevector <32 x float> %24, <32 x float> %23, <64 x i32> Mask2

// %26 = shufflevector <64 x float> %25, <64 x float> %16, <128 x i32> Mask3

//

//

//===----------------------------------------------------------------------===//

#include "HexagonTargetMachine.h"

#include "llvm/ADT/APInt.h"

#include "llvm/IR/BasicBlock.h"

#include "llvm/IR/Constants.h"

#include "llvm/IR/Function.h"

#include "llvm/IR/IRBuilder.h"

#include "llvm/IR/Instruction.h"

#include "llvm/IR/Instructions.h"

#include "llvm/IR/IntrinsicsHexagon.h"

#include "llvm/IR/PatternMatch.h"

#include "llvm/IR/Type.h"

#include "llvm/IR/Value.h"

#include "llvm/InitializePasses.h"

#include "llvm/Pass.h"

#include <algorithm>

#include <utility>


using namespace llvm;


namespace llvm {

void initializeHexagonGenWideningVecFloatInstrPass(PassRegistry &);

FunctionPass *

createHexagonGenWideningVecFloatInstr(const HexagonTargetMachine &);

} // end namespace llvm


namespace {


class HexagonGenWideningVecFloatInstr : public FunctionPass {

public:

  static char ID;


  HexagonGenWideningVecFloatInstr() : FunctionPass(ID) {

    initializeHexagonGenWideningVecFloatInstrPass(

        *PassRegistry::getPassRegistry());

  }


  HexagonGenWideningVecFloatInstr(const HexagonTargetMachine *TM)

      : FunctionPass(ID), TM(TM) {

    initializeHexagonGenWideningVecFloatInstrPass(

        *PassRegistry::getPassRegistry());

  }


  StringRef getPassName() const override {

    return "Hexagon generate widening vector float instructions";

  }


  bool runOnFunction(Function &F) override;


  void getAnalysisUsage(AnalysisUsage &AU) const override {

    FunctionPass::getAnalysisUsage(AU);

  }


private:

  Module *M = nullptr;

  const HexagonTargetMachine *TM = nullptr;

  const HexagonSubtarget *HST = nullptr;

  unsigned HwVLen;

  unsigned NumHalfEltsInFullVec;


  struct OPInfo {

    Value *OP;

    Value *ExtInOP;

    unsigned ExtInSize;

  };


  bool visitBlock(BasicBlock *B);

  bool processInstruction(Instruction *Inst);

  bool replaceWithIntrinsic(Instruction *Inst, OPInfo &OP1Info,

                            OPInfo &OP2Info);


  bool getOperandInfo(Value *V, OPInfo &OPI);

  bool isExtendedConstant(Constant *C);

  unsigned getElementSizeInBits(Value *V);

  Type *getElementTy(unsigned size, IRBuilder<> &IRB);


  Value *adjustExtensionForOp(OPInfo &OPI, IRBuilder<> &IRB,

                              unsigned NewEltsize, unsigned NumElts);


  std::pair<Value *, Value *> opSplit(Value *OP, Instruction *Inst);


  Value *createIntrinsic(Intrinsic::ID IntId, Instruction *Inst, Value *NewOP1,

                         Value *NewOP2, FixedVectorType *ResType,

                         unsigned NumElts, bool BitCastOp);

};


} // end anonymous namespace


char HexagonGenWideningVecFloatInstr::ID = 0;


INITIALIZE_PASS_BEGIN(HexagonGenWideningVecFloatInstr, "widening-vec-float",

                      "Hexagon generate "

                      "widening vector float instructions",

                      false, false)

INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)

INITIALIZE_PASS_END(HexagonGenWideningVecFloatInstr, "widening-vec-float",

                    "Hexagon generate "

                    "widening vector float instructions",

                    false, false)


bool HexagonGenWideningVecFloatInstr::isExtendedConstant(Constant *C) {

  if (Value *SplatV = C->getSplatValue()) {

    if (auto *CFP = dyn_cast<ConstantFP>(SplatV)) {

      bool Ignored;

      APFloat APF = CFP->getValueAPF();

      APFloat::opStatus sts = APF.convert(

          APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, &Ignored);

      if (sts == APFloat::opStatus::opOK || sts == APFloat::opStatus::opInexact)

        return true;

    }

    return false;

  }

  unsigned NumElts = cast<FixedVectorType>(C->getType())->getNumElements();

  for (unsigned i = 0, e = NumElts; i != e; ++i) {

    if (auto *CFP = dyn_cast<ConstantFP>(C->getAggregateElement(i))) {

      bool Ignored;

      APFloat APF = CFP->getValueAPF();

      APFloat::opStatus sts = APF.convert(

          APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, &Ignored);

      if (sts != APFloat::opStatus::opOK && sts != APFloat::opStatus::opInexact)

        return false;

      continue;

    }

    return false;

  }

  return true;

}


unsigned HexagonGenWideningVecFloatInstr::getElementSizeInBits(Value *V) {

  Type *ValTy = V->getType();

  Type *EltTy = ValTy;

  if (dyn_cast<Constant>(V)) {

    unsigned EltSize =

        cast<VectorType>(EltTy)->getElementType()->getPrimitiveSizeInBits();

    unsigned ReducedSize = EltSize / 2;


    return ReducedSize;

  }


  if (ValTy->isVectorTy())

    EltTy = cast<VectorType>(ValTy)->getElementType();

  return EltTy->getPrimitiveSizeInBits();

}


bool HexagonGenWideningVecFloatInstr::getOperandInfo(Value *V, OPInfo &OPI) {

  using namespace PatternMatch;

  OPI.OP = V;

  Value *ExtV = nullptr;

  Constant *C = nullptr;


  if (match(V, (m_FPExt(m_Value(ExtV)))) ||

      match(V,

            m_Shuffle(m_InsertElt(m_Poison(), m_FPExt(m_Value(ExtV)), m_Zero()),

                      m_Poison(), m_ZeroMask()))) {


    if (auto *ExtVType = dyn_cast<VectorType>(ExtV->getType())) {

      // Matches the first branch.

      if (ExtVType->getElementType()->isBFloatTy())

        // do not confuse bf16 with ieee-fp16.

        return false;

    } else {

      // Matches the second branch (insert element branch)

      if (ExtV->getType()->isBFloatTy())

        return false;

    }


    OPI.ExtInOP = ExtV;

    OPI.ExtInSize = getElementSizeInBits(OPI.ExtInOP);

    return true;

  }


  if (match(V, m_Constant(C))) {

    if (!isExtendedConstant(C))

      return false;

    OPI.ExtInOP = C;

    OPI.ExtInSize = getElementSizeInBits(OPI.ExtInOP);

    return true;

  }


  return false;

}


Type *HexagonGenWideningVecFloatInstr::getElementTy(unsigned size,

                                                    IRBuilder<> &IRB) {

  switch (size) {

  case 16:

    return IRB.getHalfTy();

  case 32:

    return IRB.getFloatTy();

  default:

    llvm_unreachable("Unhandled Element size");

  }

}


Value *HexagonGenWideningVecFloatInstr::adjustExtensionForOp(

    OPInfo &OPI, IRBuilder<> &IRB, unsigned NewExtSize, unsigned NumElts) {

  Value *V = OPI.ExtInOP;

  unsigned EltSize = getElementSizeInBits(OPI.ExtInOP);

  assert(NewExtSize >= EltSize);

  Type *EltType = getElementTy(NewExtSize, IRB);

  auto *NewOpTy = FixedVectorType::get(EltType, NumElts);


  if (auto *C = dyn_cast<Constant>(V))

    return IRB.CreateFPTrunc(C, NewOpTy);


  if (V->getType()->isVectorTy())

    if (NewExtSize == EltSize)

      return V;


  return nullptr;

}


std::pair<Value *, Value *>

HexagonGenWideningVecFloatInstr::opSplit(Value *OP, Instruction *Inst) {

  Type *InstTy = Inst->getType();

  unsigned NumElts = cast<FixedVectorType>(InstTy)->getNumElements();

  IRBuilder<> IRB(Inst);

  Intrinsic::ID IntHi = Intrinsic::hexagon_V6_hi_128B;

  Intrinsic::ID IntLo = Intrinsic::hexagon_V6_lo_128B;

  Function *ExtFHi = Intrinsic::getOrInsertDeclaration(M, IntHi);

  Function *ExtFLo = Intrinsic::getOrInsertDeclaration(M, IntLo);

  if (NumElts == 128) {

    auto *InType = FixedVectorType::get(IRB.getInt32Ty(), 64);

    OP = IRB.CreateBitCast(OP, InType);

  }

  Value *OP1Hi = IRB.CreateCall(ExtFHi, {OP});

  Value *OP1Lo = IRB.CreateCall(ExtFLo, {OP});

  return std::pair<Value *, Value *>(OP1Hi, OP1Lo);

}


Value *HexagonGenWideningVecFloatInstr::createIntrinsic(

    Intrinsic::ID IntId, Instruction *Inst, Value *NewOP1, Value *NewOP2,

    FixedVectorType *ResType, unsigned NumElts, bool BitCastOp) {


  IRBuilder<> IRB(Inst);

  Function *ExtF = Intrinsic::getOrInsertDeclaration(M, IntId);

  Function *ConvF = Intrinsic::getOrInsertDeclaration(

      M, Intrinsic::hexagon_V6_vconv_sf_qf32_128B);

  auto *InType = FixedVectorType::get(IRB.getInt32Ty(), 32);

  auto *RType = FixedVectorType::get(IRB.getFloatTy(), 32);


  // Make sure inputs to vmpy instrinsic are full vectors

  if (NumElts == NumHalfEltsInFullVec / 2) {

    SmallVector<Constant *, 16> ConcatMask1;

    for (unsigned i = 0; i < NumHalfEltsInFullVec; ++i)

      ConcatMask1.push_back(IRB.getInt32(i));

    NewOP1 =

        IRB.CreateShuffleVector(NewOP1, PoisonValue::get(NewOP1->getType()),

                                ConstantVector::get(ConcatMask1));

    NewOP2 =

        IRB.CreateShuffleVector(NewOP2, PoisonValue::get(NewOP2->getType()),

                                ConstantVector::get(ConcatMask1));

  }


  if (BitCastOp) {

    NewOP1 = IRB.CreateBitCast(NewOP1, InType);

    NewOP2 = IRB.CreateBitCast(NewOP2, InType);

  }


  Value *NewIn = IRB.CreateCall(ExtF, {NewOP1, NewOP2});

  // Interleave the output elements to ensure correct order in Hi and Lo vectors

  // Shuffled Mask: [0, 32, 1, 33, ..., 31, 63]

  // Hi: [0, 1, ..., 31] and Lo: [32, 33, ..., 63]

  SmallVector<Constant *, 16> Mask;

  unsigned HalfVecPoint = NumHalfEltsInFullVec / 2;

  for (unsigned i = 0; i < HalfVecPoint; ++i) {

    Mask.push_back(IRB.getInt32(i));

    Mask.push_back(IRB.getInt32(HalfVecPoint + i));

  }

  NewIn = IRB.CreateShuffleVector(NewIn, PoisonValue::get(NewIn->getType()),

                                  ConstantVector::get(Mask));


  std::pair<Value *, Value *> SplitOP = opSplit(NewIn, Inst);

  Value *ConvHi = IRB.CreateCall(ConvF, {SplitOP.first});

  ConvHi = IRB.CreateBitCast(ConvHi, RType);


  if (ResType->getNumElements() == NumHalfEltsInFullVec / 2) {

    return ConvHi;

  }


  Value *ConvLo = IRB.CreateCall(ConvF, {SplitOP.second});

  ConvLo = IRB.CreateBitCast(ConvLo, RType);


  SmallVector<Constant *, 16> ShuffleMask;

  for (unsigned i = 0; i < NumElts; ++i)

    ShuffleMask.push_back(IRB.getInt32(i));

  // Concat Hi and Lo.

  NewIn =

      IRB.CreateShuffleVector(ConvLo, ConvHi, ConstantVector::get(ShuffleMask));

  return NewIn;

}


bool HexagonGenWideningVecFloatInstr::replaceWithIntrinsic(Instruction *Inst,

                                                           OPInfo &OP1Info,

                                                           OPInfo &OP2Info) {

  Type *InstTy = Inst->getType();

  Type *EltTy = cast<FixedVectorType>(InstTy)->getElementType();

  unsigned NumElts = cast<FixedVectorType>(InstTy)->getNumElements();

  [[maybe_unused]] unsigned InstEltSize = EltTy->getPrimitiveSizeInBits();


  unsigned MaxEltSize = OP1Info.ExtInSize;

  unsigned NewOpEltSize = MaxEltSize;

  unsigned NewResEltSize = 2 * MaxEltSize;


  unsigned ResVLen = NewResEltSize * NumElts;

  if (NewOpEltSize > 16 || ((ResVLen > HwVLen) && (ResVLen % HwVLen) != 0))

    return false;


  Intrinsic::ID IntId = Intrinsic::hexagon_V6_vmpy_qf32_hf_128B;

  IRBuilder<> IRB(Inst);

  Value *NewOP1 = adjustExtensionForOp(OP1Info, IRB, NewOpEltSize, NumElts);

  Value *NewOP2 = adjustExtensionForOp(OP2Info, IRB, NewOpEltSize, NumElts);


  if (NewOP1 == nullptr || NewOP2 == nullptr)

    return false;


  if (ResVLen > 2 * HwVLen) {

    // The code written in this if block generates the widening code when

    // vector-width is 128:

    //

    // Step 1: Bitcast <128 x half> type to <64 x i32>

    // %wide.load = load <128 x half>, <128 x half>* %0 is bitcasted to,

    // bitcast <128 x half> %wide.load to <64 x i32>

    //

    // Step 2: Generate Hi and Lo vectors

    // call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %4)

    // call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %4)

    //

    // Perform above 2 steps for both the operands of fmul instruction

    //

    // Step 3: Generate vmpy_qf32_hf multiply instruction to multiply two Hi

    // vectors from both operands.

    // call <64 x i32> @llvm.hexagon.V6.vmpy.qf32.hf.128B(%5, %8)

    //

    // Step 4: Convert the resultant 'qf32' output to 'sf' format

    // %11 = shufflevector <64 x i32> %10, <64 x i32> poison, <64 x i32> Mask1

    // %12 = call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %11)

    // %13 = call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %11)

    // call <32 x i32> @llvm.hexagon.V6.vconv.sf.qf32.128B(<32 x i32> %12)

    // call <32 x i32> @llvm.hexagon.V6.vconv.sf.qf32.128B(<32 x i32> %13)

    //

    // Repeat steps 3 and 4 for mutiplication and conversion of Lo vectors.

    // Finally merge the output values in correct sequence using shuffle

    // vectors.


    assert(ResVLen == 4 * HwVLen);

    // Split the operands

    unsigned HalfElts = NumElts / 2;

    std::pair<Value *, Value *> SplitOP1 = opSplit(NewOP1, Inst);

    std::pair<Value *, Value *> SplitOP2 = opSplit(NewOP2, Inst);

    auto *castResType = FixedVectorType::get(IRB.getInt32Ty(), HalfElts);

    Value *NewInHi =

        createIntrinsic(IntId, Inst, SplitOP1.first, SplitOP2.first,

                        castResType, HalfElts, false);

    Value *NewInLo =

        createIntrinsic(IntId, Inst, SplitOP1.second, SplitOP2.second,

                        castResType, HalfElts, false);

    assert(InstEltSize == NewResEltSize);

    SmallVector<Constant *, 8> ShuffleMask;

    for (unsigned i = 0; i < NumElts; ++i)

      ShuffleMask.push_back(IRB.getInt32(i));

    // Concat Hi and Lo.

    Value *NewIn = IRB.CreateShuffleVector(NewInLo, NewInHi,

                                           ConstantVector::get(ShuffleMask));


    Inst->replaceAllUsesWith(NewIn);

    return true;

  }


  auto *ResType =

      FixedVectorType::get(getElementTy(NewResEltSize, IRB), NumElts);


  // The following widening code can only be generated in cases where

  // input vectors are 64xhalf/32xhalf and the results are 64xfloat/32xfloat

  // respectively.

  if (!(NumElts == NumHalfEltsInFullVec &&

        ResType->getNumElements() == NumHalfEltsInFullVec) &&

      !(NumElts == NumHalfEltsInFullVec / 2 &&

        ResType->getNumElements() == NumHalfEltsInFullVec / 2))

    return false;

  Value *NewIn =

      createIntrinsic(IntId, Inst, NewOP1, NewOP2, ResType, NumElts, true);


  Inst->replaceAllUsesWith(NewIn);

  return true;

}


// Process instruction and replace them with widening vector

// intrinsics if possible.

bool HexagonGenWideningVecFloatInstr::processInstruction(Instruction *Inst) {

  Type *InstTy = Inst->getType();

  if (!InstTy->isVectorTy() ||

      cast<FixedVectorType>(InstTy)->getNumElements() > 128)

    return false;

  unsigned InstLen = InstTy->getPrimitiveSizeInBits();

  if (!HST->isTypeForHVX(cast<VectorType>(InstTy)) && InstLen != 4 * HwVLen)

    return false;

  if (InstLen < HwVLen)

    return false;


  using namespace PatternMatch;


  Value *OP1 = nullptr, *OP2 = nullptr;

  OPInfo OP1Info, OP2Info;


  // Handle the case when Inst = fpext(fmul<64xhalf>(op1, op2)). The Inst can

  // be replaced with widening multiply.

  if (match(Inst, (m_FPExt((m_FMul(m_Value(OP1), m_Value(OP2))))))) {

    OP1Info.ExtInOP = OP1;

    OP1Info.ExtInSize = getElementSizeInBits(OP1);

    OP2Info.ExtInOP = OP2;

    OP2Info.ExtInSize = getElementSizeInBits(OP2);


    if (auto *Op1Vtype = dyn_cast<VectorType>(OP1->getType())) {

      if (!Op1Vtype->getElementType()->isHalfTy()) {

        return false;

      }

    } else {

      return false;

    }


    if (OP1Info.ExtInSize == OP2Info.ExtInSize && OP1Info.ExtInSize == 16 &&

        getElementSizeInBits(Inst) == 32) {

      return replaceWithIntrinsic(Inst, OP1Info, OP2Info);

    }

  }


  if (!match(Inst, (m_FMul(m_Value(OP1), m_Value(OP2)))))

    return false;


  if (!getOperandInfo(OP1, OP1Info) || !getOperandInfo(OP2, OP2Info))

    return false;


  if (!OP1Info.ExtInOP || !OP2Info.ExtInOP)

    return false;


  if (OP1Info.ExtInSize == OP2Info.ExtInSize && OP1Info.ExtInSize == 16) {

    return replaceWithIntrinsic(Inst, OP1Info, OP2Info);

  }


  return false;

}


bool HexagonGenWideningVecFloatInstr::visitBlock(BasicBlock *B) {

  bool Changed = false;

  for (auto &I : *B)

    Changed |= processInstruction(&I);

  return Changed;

}


bool HexagonGenWideningVecFloatInstr::runOnFunction(Function &F) {

  M = F.getParent();

  HST = TM->getSubtargetImpl(F);


  // Return if useHVX128BOps is not set. It can be enabled for 64B mode

  // but wil require some changes. For example, bitcast for intrinsics

  // assumes 128B mode.

  if (skipFunction(F) || !HST->useHVX128BOps())

    return false;


  unsigned VecLength = HST->getVectorLength(); // Vector Length in Bytes

  HwVLen = HST->getVectorLength() * 8;         // Vector Length in bits

  NumHalfEltsInFullVec =

      VecLength /

      2; // Number of half (2B) elements that fit into a full HVX vector

  bool Changed = false;

  for (auto &B : F)

    Changed |= visitBlock(&B);


  return Changed;

}


FunctionPass *


llvm::createHexagonGenWideningVecFloatInstr(const HexagonTargetMachine &TM) {

  return new HexagonGenWideningVecFloatInstr(&TM);

}


assert
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

APInt.h
This file implements a class to represent arbitrary precision integral constant values and operations...

B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

Constants.h
This file contains the declarations for the subclasses of Constant, which represent the different fla...

runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition EntryExitInstrumenter.cpp:103

HexagonTargetMachine.h

IRBuilder.h

BasicBlock.h

Function.h

Instruction.h

Type.h

Value.h

InitializePasses.h

Instructions.h

TemplateParamKind::Type
@ Type
Definition ItaniumDemangle.h:1243

F
#define F(x, y, z)
Definition MD5.cpp:54

I
#define I(x, y, z)
Definition MD5.cpp:57

Module
Machine Check Debug Module
Definition MachineCheckDebugify.cpp:124

INITIALIZE_PASS_DEPENDENCY
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42

INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44

INITIALIZE_PASS_BEGIN
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39

Pass.h

PatternMatch.h

getOperandInfo
static std::optional< OperandInfo > getOperandInfo(const MachineOperand &MO)
Definition RISCVVLOptimizer.cpp:851

OP
#define OP(OPC)
Definition Instruction.h:46

llvm::APFloatBase::rmNearestTiesToEven
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344

llvm::APFloatBase::IEEEhalf
static const fltSemantics & IEEEhalf()
Definition APFloat.h:294

llvm::APFloatBase::opStatus
opStatus
IEEE-754R 7: Default exception handling.
Definition APFloat.h:360

llvm::APFloatBase::opOK
@ opOK
Definition APFloat.h:361

llvm::APFloatBase::opInexact
@ opInexact
Definition APFloat.h:366

llvm::APFloat
Definition APFloat.h:940

llvm::APFloat::convert
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition APFloat.cpp:6053

llvm::ConstantVector::get
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
Definition Constants.cpp:1443

llvm::Constant
This is an important base class in LLVM.
Definition Constant.h:43

llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition Dominators.h:321

llvm::FixedVectorType::getNumElements
unsigned getNumElements() const
Definition DerivedTypes.h:637

llvm::FixedVectorType::get
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:802

llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314

llvm::HexagonSubtarget::getVectorLength
unsigned getVectorLength() const
Definition HexagonSubtarget.h:335

llvm::HexagonSubtarget::useHVX128BOps
bool useHVX128BOps() const
Definition HexagonSubtarget.h:285

llvm::HexagonSubtarget::isTypeForHVX
bool isTypeForHVX(Type *VecTy, bool IncludeBool=false) const
Definition HexagonSubtarget.cpp:207

llvm::HexagonTargetMachine
Definition HexagonTargetMachine.h:24

llvm::HexagonTargetMachine::getSubtargetImpl
const HexagonSubtarget * getSubtargetImpl(const Function &F) const override
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
Definition HexagonTargetMachine.cpp:253

llvm::IRBuilderBase::CreateFPTrunc
Value * CreateFPTrunc(Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition IRBuilder.h:2165

llvm::IRBuilderBase::getInt32Ty
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition IRBuilder.h:562

llvm::IRBuilderBase::getHalfTy
Type * getHalfTy()
Fetch the type representing a 16-bit floating point value.
Definition IRBuilder.h:580

llvm::IRBuilderBase::getInt32
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition IRBuilder.h:522

llvm::IRBuilderBase::CreateBitCast
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2207

llvm::IRBuilderBase::CreateShuffleVector
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition IRBuilder.h:2601

llvm::IRBuilderBase::getFloatTy
Type * getFloatTy()
Fetch the type representing a 32-bit floating point value.
Definition IRBuilder.h:590

llvm::IRBuilderBase::CreateCall
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition IRBuilder.h:2511

llvm::PassRegistry
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition PassRegistry.h:38

llvm::PassRegistry::getPassRegistry
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition PassRegistry.cpp:23

llvm::PoisonValue::get
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition Constants.cpp:1905

llvm::SmallVectorTemplateBase::push_back
void push_back(const T &Elt)
Definition SmallVector.h:417

llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:273

llvm::Type::isBFloatTy
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
Definition Type.h:145

llvm::Type::getPrimitiveSizeInBits
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197

llvm::Value
LLVM Value Representation.
Definition Value.h:75

llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256

llvm::Value::replaceAllUsesWith
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:546

Changed
Changed
Definition ObjCARCOpts.cpp:2369

llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition ErrorHandling.h:164

R600_InstFlag::OP2
@ OP2
Definition R600Defines.h:39

R600_InstFlag::OP1
@ OP1
Definition R600Defines.h:38

false
Definition MachinePipeliner.cpp:244

llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition BitmaskEnum.h:126

llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24

llvm::CallingConv::C
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34

llvm::ISD::Constant
@ Constant
Definition ISDOpcodes.h:86

llvm::Intrinsic::getOrInsertDeclaration
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
Definition Intrinsics.cpp:755

llvm::Intrinsic::ID
unsigned ID
Definition GenericSSAContext.h:28

llvm::M68k::MemAddrModeKind::V
@ V
Definition M68kBaseInfo.h:63

llvm::PatternMatch::m_Poison
class_match< PoisonValue > m_Poison()
Match an arbitrary poison constant.
Definition PatternMatch.h:173

llvm::PatternMatch::m_Constant
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition PatternMatch.h:178

llvm::PatternMatch::m_FMul
BinaryOp_match< LHS, RHS, Instruction::FMul > m_FMul(const LHS &L, const RHS &R)
Definition PatternMatch.h:1280

llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition PatternMatch.h:49

llvm::PatternMatch::m_Shuffle
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
Definition PatternMatch.h:2066

llvm::PatternMatch::m_FPExt
CastInst_match< OpTy, FPExtInst > m_FPExt(const OpTy &Op)
Definition PatternMatch.h:2358

llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition PatternMatch.h:105

llvm::PatternMatch::m_Zero
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition PatternMatch.h:624

llvm::PatternMatch::m_InsertElt
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
Definition PatternMatch.h:1984

llvm::codeview::PublicSymFlags::Function
@ Function
Definition CodeView.h:408

llvm::numbers::e
constexpr double e
Definition STLForwardCompat.h:61

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition AddressRanges.h:18

llvm::Value
FunctionAddr VTableAddr Value
Definition InstrProf.h:137

llvm::size
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1655

llvm::dyn_cast
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643

llvm::SmallVector
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
Definition SmallVector.h:1129

llvm::initializeHexagonGenWideningVecFloatInstrPass
void initializeHexagonGenWideningVecFloatInstrPass(PassRegistry &)

llvm::IRBuilder
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >

llvm::createHexagonGenWideningVecFloatInstr
FunctionPass * createHexagonGenWideningVecFloatInstr(const HexagonTargetMachine &)
Definition HexagonGenWideningVecFloatInstr.cpp:563

llvm::cast
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559