docs/doxygen/RISCVVLOptimizer_8cpp_source.html

//===-------------- RISCVVLOptimizer.cpp - VL Optimizer -------------------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===---------------------------------------------------------------------===//

//

// This pass reduces the VL where possible at the MI level, before VSETVLI

// instructions are inserted.

//

// The purpose of this optimization is to make the VL argument, for instructions

// that have a VL argument, as small as possible. This is implemented by

// visiting each instruction in reverse order and checking that if it has a VL

// argument, whether the VL can be reduced.

//

//===---------------------------------------------------------------------===//


#include "RISCV.h"

#include "RISCVSubtarget.h"

#include "llvm/ADT/SetVector.h"

#include "llvm/CodeGen/MachineDominators.h"

#include "llvm/CodeGen/MachineFunctionPass.h"

#include "llvm/InitializePasses.h"


using namespace llvm;


#define DEBUG_TYPE "riscv-vl-optimizer"

#define PASS_NAME "RISC-V VL Optimizer"


namespace {


class RISCVVLOptimizer : public MachineFunctionPass {

  const MachineRegisterInfo *MRI;

  const MachineDominatorTree *MDT;


public:

  static char ID;


  RISCVVLOptimizer() : MachineFunctionPass(ID) {}


  bool runOnMachineFunction(MachineFunction &MF) override;


  void getAnalysisUsage(AnalysisUsage &AU) const override {

    AU.setPreservesCFG();

    AU.addRequired<MachineDominatorTreeWrapperPass>();

    MachineFunctionPass::getAnalysisUsage(AU);

  }


  StringRef getPassName() const override { return PASS_NAME; }


private:

  std::optional<MachineOperand> getMinimumVLForUser(MachineOperand &UserOp);

  /// Returns the largest common VL MachineOperand that may be used to optimize

  /// MI. Returns std::nullopt if it failed to find a suitable VL.

  std::optional<MachineOperand> checkUsers(MachineInstr &MI);

  bool tryReduceVL(MachineInstr &MI);

  bool isCandidate(const MachineInstr &MI) const;

};


} // end anonymous namespace


char RISCVVLOptimizer::ID = 0;

INITIALIZE_PASS_BEGIN(RISCVVLOptimizer, DEBUG_TYPE, PASS_NAME, false, false)

INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)

INITIALIZE_PASS_END(RISCVVLOptimizer, DEBUG_TYPE, PASS_NAME, false, false)


FunctionPass *llvm::createRISCVVLOptimizerPass() {

  return new RISCVVLOptimizer();

}


/// Return true if R is a physical or virtual vector register, false otherwise.

static bool isVectorRegClass(Register R, const MachineRegisterInfo *MRI) {

  if (R.isPhysical())

    return RISCV::VRRegClass.contains(R);

  const TargetRegisterClass *RC = MRI->getRegClass(R);

  return RISCVRI::isVRegClass(RC->TSFlags);

}


/// Represents the EMUL and EEW of a MachineOperand.

struct OperandInfo {

  // Represent as 1,2,4,8, ... and fractional indicator. This is because

  // EMUL can take on values that don't map to RISCVII::VLMUL values exactly.

  // For example, a mask operand can have an EMUL less than MF8.

  std::optional<std::pair<unsigned, bool>> EMUL;


  unsigned Log2EEW;


  OperandInfo(RISCVII::VLMUL EMUL, unsigned Log2EEW)

      : EMUL(RISCVVType::decodeVLMUL(EMUL)), Log2EEW(Log2EEW) {}


  OperandInfo(std::pair<unsigned, bool> EMUL, unsigned Log2EEW)

      : EMUL(EMUL), Log2EEW(Log2EEW) {}


  OperandInfo(unsigned Log2EEW) : Log2EEW(Log2EEW) {}


  OperandInfo() = delete;


  static bool EMULAndEEWAreEqual(const OperandInfo &A, const OperandInfo &B) {

    return A.Log2EEW == B.Log2EEW && A.EMUL->first == B.EMUL->first &&

           A.EMUL->second == B.EMUL->second;

  }


  static bool EEWAreEqual(const OperandInfo &A, const OperandInfo &B) {

    return A.Log2EEW == B.Log2EEW;

  }


  void print(raw_ostream &OS) const {

    if (EMUL) {

      OS << "EMUL: m";

      if (EMUL->second)

        OS << "f";

      OS << EMUL->first;

    } else

      OS << "EMUL: unknown\n";

    OS << ", EEW: " << (1 << Log2EEW);

  }

};


LLVM_ATTRIBUTE_UNUSED

static raw_ostream &operator<<(raw_ostream &OS, const OperandInfo &OI) {

  OI.print(OS);

  return OS;

}


LLVM_ATTRIBUTE_UNUSED

static raw_ostream &operator<<(raw_ostream &OS,

                               const std::optional<OperandInfo> &OI) {

  if (OI)

    OI->print(OS);

  else

    OS << "nullopt";

  return OS;

}


namespace llvm {

namespace RISCVVType {

/// Return EMUL = (EEW / SEW) * LMUL where EEW comes from Log2EEW and LMUL and

/// SEW are from the TSFlags of MI.

static std::pair<unsigned, bool>

getEMULEqualsEEWDivSEWTimesLMUL(unsigned Log2EEW, const MachineInstr &MI) {

  RISCVII::VLMUL MIVLMUL = RISCVII::getLMul(MI.getDesc().TSFlags);

  auto [MILMUL, MILMULIsFractional] = RISCVVType::decodeVLMUL(MIVLMUL);

  unsigned MILog2SEW =

      MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();


  // Mask instructions will have 0 as the SEW operand. But the LMUL of these

  // instructions is calculated is as if the SEW operand was 3 (e8).

  if (MILog2SEW == 0)

    MILog2SEW = 3;


  unsigned MISEW = 1 << MILog2SEW;


  unsigned EEW = 1 << Log2EEW;

  // Calculate (EEW/SEW)*LMUL preserving fractions less than 1. Use GCD

  // to put fraction in simplest form.

  unsigned Num = EEW, Denom = MISEW;

  int GCD = MILMULIsFractional ? std::gcd(Num, Denom * MILMUL)

                               : std::gcd(Num * MILMUL, Denom);

  Num = MILMULIsFractional ? Num / GCD : Num * MILMUL / GCD;

  Denom = MILMULIsFractional ? Denom * MILMUL / GCD : Denom / GCD;

  return std::make_pair(Num > Denom ? Num : Denom, Denom > Num);

}

} // end namespace RISCVVType

} // end namespace llvm


/// Dest has EEW=SEW. Source EEW=SEW/Factor (i.e. F2 => EEW/2).

/// SEW comes from TSFlags of MI.

static unsigned getIntegerExtensionOperandEEW(unsigned Factor,

                                              const MachineInstr &MI,

                                              const MachineOperand &MO) {

  unsigned MILog2SEW =

      MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();


  if (MO.getOperandNo() == 0)

    return MILog2SEW;


  unsigned MISEW = 1 << MILog2SEW;

  unsigned EEW = MISEW / Factor;

  unsigned Log2EEW = Log2_32(EEW);


  return Log2EEW;

}


/// Check whether MO is a mask operand of MI.

static bool isMaskOperand(const MachineInstr &MI, const MachineOperand &MO,

                          const MachineRegisterInfo *MRI) {


  if (!MO.isReg() || !isVectorRegClass(MO.getReg(), MRI))

    return false;


  const MCInstrDesc &Desc = MI.getDesc();

  return Desc.operands()[MO.getOperandNo()].RegClass == RISCV::VMV0RegClassID;

}


static std::optional<unsigned>

getOperandLog2EEW(const MachineOperand &MO, const MachineRegisterInfo *MRI) {

  const MachineInstr &MI = *MO.getParent();

  const RISCVVPseudosTable::PseudoInfo *RVV =

      RISCVVPseudosTable::getPseudoInfo(MI.getOpcode());

  assert(RVV && "Could not find MI in PseudoTable");


  // MI has a SEW associated with it. The RVV specification defines

  // the EEW of each operand and definition in relation to MI.SEW.

  unsigned MILog2SEW =

      MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();


  const bool HasPassthru = RISCVII::isFirstDefTiedToFirstUse(MI.getDesc());


  // We bail out early for instructions that have passthru with non NoRegister,

  // which means they are using TU policy. We are not interested in these

  // since they must preserve the entire register content.

  if (HasPassthru && MO.getOperandNo() == MI.getNumExplicitDefs() &&

      (MO.getReg() != RISCV::NoRegister))

    return std::nullopt;


  bool IsMODef = MO.getOperandNo() == 0;


  // All mask operands have EEW=1

  if (isMaskOperand(MI, MO, MRI))

    return 0;


  // switch against BaseInstr to reduce number of cases that need to be

  // considered.

  switch (RVV->BaseInstr) {


  // 6. Configuration-Setting Instructions

  // Configuration setting instructions do not read or write vector registers

  case RISCV::VSETIVLI:

  case RISCV::VSETVL:

  case RISCV::VSETVLI:

    llvm_unreachable("Configuration setting instructions do not read or write "

                     "vector registers");


  // Vector Loads and Stores

  // Vector Unit-Stride Instructions

  // Vector Strided Instructions

  /// Dest EEW encoded in the instruction

  case RISCV::VLM_V:

  case RISCV::VSM_V:

    return 0;

  case RISCV::VLE8_V:

  case RISCV::VSE8_V:

  case RISCV::VLSE8_V:

  case RISCV::VSSE8_V:

    return 3;

  case RISCV::VLE16_V:

  case RISCV::VSE16_V:

  case RISCV::VLSE16_V:

  case RISCV::VSSE16_V:

    return 4;

  case RISCV::VLE32_V:

  case RISCV::VSE32_V:

  case RISCV::VLSE32_V:

  case RISCV::VSSE32_V:

    return 5;

  case RISCV::VLE64_V:

  case RISCV::VSE64_V:

  case RISCV::VLSE64_V:

  case RISCV::VSSE64_V:

    return 6;


  // Vector Indexed Instructions

  // vs(o|u)xei<eew>.v

  // Dest/Data (operand 0) EEW=SEW.  Source EEW=<eew>.

  case RISCV::VLUXEI8_V:

  case RISCV::VLOXEI8_V:

  case RISCV::VSUXEI8_V:

  case RISCV::VSOXEI8_V: {

    if (MO.getOperandNo() == 0)

      return MILog2SEW;

    return 3;

  }

  case RISCV::VLUXEI16_V:

  case RISCV::VLOXEI16_V:

  case RISCV::VSUXEI16_V:

  case RISCV::VSOXEI16_V: {

    if (MO.getOperandNo() == 0)

      return MILog2SEW;

    return 4;

  }

  case RISCV::VLUXEI32_V:

  case RISCV::VLOXEI32_V:

  case RISCV::VSUXEI32_V:

  case RISCV::VSOXEI32_V: {

    if (MO.getOperandNo() == 0)

      return MILog2SEW;

    return 5;

  }

  case RISCV::VLUXEI64_V:

  case RISCV::VLOXEI64_V:

  case RISCV::VSUXEI64_V:

  case RISCV::VSOXEI64_V: {

    if (MO.getOperandNo() == 0)

      return MILog2SEW;

    return 6;

  }


  // Vector Integer Arithmetic Instructions

  // Vector Single-Width Integer Add and Subtract

  case RISCV::VADD_VI:

  case RISCV::VADD_VV:

  case RISCV::VADD_VX:

  case RISCV::VSUB_VV:

  case RISCV::VSUB_VX:

  case RISCV::VRSUB_VI:

  case RISCV::VRSUB_VX:

  // Vector Bitwise Logical Instructions

  // Vector Single-Width Shift Instructions

  // EEW=SEW.

  case RISCV::VAND_VI:

  case RISCV::VAND_VV:

  case RISCV::VAND_VX:

  case RISCV::VOR_VI:

  case RISCV::VOR_VV:

  case RISCV::VOR_VX:

  case RISCV::VXOR_VI:

  case RISCV::VXOR_VV:

  case RISCV::VXOR_VX:

  case RISCV::VSLL_VI:

  case RISCV::VSLL_VV:

  case RISCV::VSLL_VX:

  case RISCV::VSRL_VI:

  case RISCV::VSRL_VV:

  case RISCV::VSRL_VX:

  case RISCV::VSRA_VI:

  case RISCV::VSRA_VV:

  case RISCV::VSRA_VX:

  // Vector Integer Min/Max Instructions

  // EEW=SEW.

  case RISCV::VMINU_VV:

  case RISCV::VMINU_VX:

  case RISCV::VMIN_VV:

  case RISCV::VMIN_VX:

  case RISCV::VMAXU_VV:

  case RISCV::VMAXU_VX:

  case RISCV::VMAX_VV:

  case RISCV::VMAX_VX:

  // Vector Single-Width Integer Multiply Instructions

  // Source and Dest EEW=SEW.

  case RISCV::VMUL_VV:

  case RISCV::VMUL_VX:

  case RISCV::VMULH_VV:

  case RISCV::VMULH_VX:

  case RISCV::VMULHU_VV:

  case RISCV::VMULHU_VX:

  case RISCV::VMULHSU_VV:

  case RISCV::VMULHSU_VX:

  // Vector Integer Divide Instructions

  // EEW=SEW.

  case RISCV::VDIVU_VV:

  case RISCV::VDIVU_VX:

  case RISCV::VDIV_VV:

  case RISCV::VDIV_VX:

  case RISCV::VREMU_VV:

  case RISCV::VREMU_VX:

  case RISCV::VREM_VV:

  case RISCV::VREM_VX:

  // Vector Single-Width Integer Multiply-Add Instructions

  // EEW=SEW.

  case RISCV::VMACC_VV:

  case RISCV::VMACC_VX:

  case RISCV::VNMSAC_VV:

  case RISCV::VNMSAC_VX:

  case RISCV::VMADD_VV:

  case RISCV::VMADD_VX:

  case RISCV::VNMSUB_VV:

  case RISCV::VNMSUB_VX:

  // Vector Integer Merge Instructions

  // Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions

  // EEW=SEW, except the mask operand has EEW=1. Mask operand is handled

  // before this switch.

  case RISCV::VMERGE_VIM:

  case RISCV::VMERGE_VVM:

  case RISCV::VMERGE_VXM:

  case RISCV::VADC_VIM:

  case RISCV::VADC_VVM:

  case RISCV::VADC_VXM:

  case RISCV::VSBC_VVM:

  case RISCV::VSBC_VXM:

  // Vector Integer Move Instructions

  // Vector Fixed-Point Arithmetic Instructions

  // Vector Single-Width Saturating Add and Subtract

  // Vector Single-Width Averaging Add and Subtract

  // EEW=SEW.

  case RISCV::VMV_V_I:

  case RISCV::VMV_V_V:

  case RISCV::VMV_V_X:

  case RISCV::VSADDU_VI:

  case RISCV::VSADDU_VV:

  case RISCV::VSADDU_VX:

  case RISCV::VSADD_VI:

  case RISCV::VSADD_VV:

  case RISCV::VSADD_VX:

  case RISCV::VSSUBU_VV:

  case RISCV::VSSUBU_VX:

  case RISCV::VSSUB_VV:

  case RISCV::VSSUB_VX:

  case RISCV::VAADDU_VV:

  case RISCV::VAADDU_VX:

  case RISCV::VAADD_VV:

  case RISCV::VAADD_VX:

  case RISCV::VASUBU_VV:

  case RISCV::VASUBU_VX:

  case RISCV::VASUB_VV:

  case RISCV::VASUB_VX:

  // Vector Single-Width Fractional Multiply with Rounding and Saturation

  // EEW=SEW. The instruction produces 2*SEW product internally but

  // saturates to fit into SEW bits.

  case RISCV::VSMUL_VV:

  case RISCV::VSMUL_VX:

  // Vector Single-Width Scaling Shift Instructions

  // EEW=SEW.

  case RISCV::VSSRL_VI:

  case RISCV::VSSRL_VV:

  case RISCV::VSSRL_VX:

  case RISCV::VSSRA_VI:

  case RISCV::VSSRA_VV:

  case RISCV::VSSRA_VX:

  // Vector Permutation Instructions

  // Integer Scalar Move Instructions

  // Floating-Point Scalar Move Instructions

  // EEW=SEW.

  case RISCV::VMV_X_S:

  case RISCV::VMV_S_X:

  case RISCV::VFMV_F_S:

  case RISCV::VFMV_S_F:

  // Vector Slide Instructions

  // EEW=SEW.

  case RISCV::VSLIDEUP_VI:

  case RISCV::VSLIDEUP_VX:

  case RISCV::VSLIDEDOWN_VI:

  case RISCV::VSLIDEDOWN_VX:

  case RISCV::VSLIDE1UP_VX:

  case RISCV::VFSLIDE1UP_VF:

  case RISCV::VSLIDE1DOWN_VX:

  case RISCV::VFSLIDE1DOWN_VF:

  // Vector Register Gather Instructions

  // EEW=SEW. For mask operand, EEW=1.

  case RISCV::VRGATHER_VI:

  case RISCV::VRGATHER_VV:

  case RISCV::VRGATHER_VX:

  // Vector Compress Instruction

  // EEW=SEW.

  case RISCV::VCOMPRESS_VM:

  // Vector Element Index Instruction

  case RISCV::VID_V:

  // Vector Single-Width Floating-Point Add/Subtract Instructions

  case RISCV::VFADD_VF:

  case RISCV::VFADD_VV:

  case RISCV::VFSUB_VF:

  case RISCV::VFSUB_VV:

  case RISCV::VFRSUB_VF:

  // Vector Single-Width Floating-Point Multiply/Divide Instructions

  case RISCV::VFMUL_VF:

  case RISCV::VFMUL_VV:

  case RISCV::VFDIV_VF:

  case RISCV::VFDIV_VV:

  case RISCV::VFRDIV_VF:

  // Vector Floating-Point Square-Root Instruction

  case RISCV::VFSQRT_V:

  // Vector Floating-Point Reciprocal Square-Root Estimate Instruction

  case RISCV::VFRSQRT7_V:

  // Vector Floating-Point Reciprocal Estimate Instruction

  case RISCV::VFREC7_V:

  // Vector Floating-Point MIN/MAX Instructions

  case RISCV::VFMIN_VF:

  case RISCV::VFMIN_VV:

  case RISCV::VFMAX_VF:

  case RISCV::VFMAX_VV:

  // Vector Floating-Point Sign-Injection Instructions

  case RISCV::VFSGNJ_VF:

  case RISCV::VFSGNJ_VV:

  case RISCV::VFSGNJN_VV:

  case RISCV::VFSGNJN_VF:

  case RISCV::VFSGNJX_VF:

  case RISCV::VFSGNJX_VV:

  // Vector Floating-Point Classify Instruction

  case RISCV::VFCLASS_V:

  // Vector Floating-Point Move Instruction

  case RISCV::VFMV_V_F:

  // Single-Width Floating-Point/Integer Type-Convert Instructions

  case RISCV::VFCVT_XU_F_V:

  case RISCV::VFCVT_X_F_V:

  case RISCV::VFCVT_RTZ_XU_F_V:

  case RISCV::VFCVT_RTZ_X_F_V:

  case RISCV::VFCVT_F_XU_V:

  case RISCV::VFCVT_F_X_V:

  // Vector Floating-Point Merge Instruction

  case RISCV::VFMERGE_VFM:

  // Vector count population in mask vcpop.m

  // vfirst find-first-set mask bit

  case RISCV::VCPOP_M:

  case RISCV::VFIRST_M:

    return MILog2SEW;


  // Vector Widening Integer Add/Subtract

  // Def uses EEW=2*SEW . Operands use EEW=SEW.

  case RISCV::VWADDU_VV:

  case RISCV::VWADDU_VX:

  case RISCV::VWSUBU_VV:

  case RISCV::VWSUBU_VX:

  case RISCV::VWADD_VV:

  case RISCV::VWADD_VX:

  case RISCV::VWSUB_VV:

  case RISCV::VWSUB_VX:

  case RISCV::VWSLL_VI:

  // Vector Widening Integer Multiply Instructions

  // Destination EEW=2*SEW. Source EEW=SEW.

  case RISCV::VWMUL_VV:

  case RISCV::VWMUL_VX:

  case RISCV::VWMULSU_VV:

  case RISCV::VWMULSU_VX:

  case RISCV::VWMULU_VV:

  case RISCV::VWMULU_VX:

  // Vector Widening Integer Multiply-Add Instructions

  // Destination EEW=2*SEW. Source EEW=SEW.

  // A SEW-bit*SEW-bit multiply of the sources forms a 2*SEW-bit value, which

  // is then added to the 2*SEW-bit Dest. These instructions never have a

  // passthru operand.

  case RISCV::VWMACCU_VV:

  case RISCV::VWMACCU_VX:

  case RISCV::VWMACC_VV:

  case RISCV::VWMACC_VX:

  case RISCV::VWMACCSU_VV:

  case RISCV::VWMACCSU_VX:

  case RISCV::VWMACCUS_VX:

  // Vector Widening Floating-Point Fused Multiply-Add Instructions

  case RISCV::VFWMACC_VF:

  case RISCV::VFWMACC_VV:

  case RISCV::VFWNMACC_VF:

  case RISCV::VFWNMACC_VV:

  case RISCV::VFWMSAC_VF:

  case RISCV::VFWMSAC_VV:

  case RISCV::VFWNMSAC_VF:

  case RISCV::VFWNMSAC_VV:

  // Vector Widening Floating-Point Add/Subtract Instructions

  // Dest EEW=2*SEW. Source EEW=SEW.

  case RISCV::VFWADD_VV:

  case RISCV::VFWADD_VF:

  case RISCV::VFWSUB_VV:

  case RISCV::VFWSUB_VF:

  // Vector Widening Floating-Point Multiply

  case RISCV::VFWMUL_VF:

  case RISCV::VFWMUL_VV:

  // Widening Floating-Point/Integer Type-Convert Instructions

  case RISCV::VFWCVT_XU_F_V:

  case RISCV::VFWCVT_X_F_V:

  case RISCV::VFWCVT_RTZ_XU_F_V:

  case RISCV::VFWCVT_RTZ_X_F_V:

  case RISCV::VFWCVT_F_XU_V:

  case RISCV::VFWCVT_F_X_V:

  case RISCV::VFWCVT_F_F_V:

  case RISCV::VFWCVTBF16_F_F_V:

    return IsMODef ? MILog2SEW + 1 : MILog2SEW;


  // Def and Op1 uses EEW=2*SEW. Op2 uses EEW=SEW.

  case RISCV::VWADDU_WV:

  case RISCV::VWADDU_WX:

  case RISCV::VWSUBU_WV:

  case RISCV::VWSUBU_WX:

  case RISCV::VWADD_WV:

  case RISCV::VWADD_WX:

  case RISCV::VWSUB_WV:

  case RISCV::VWSUB_WX:

  // Vector Widening Floating-Point Add/Subtract Instructions

  case RISCV::VFWADD_WF:

  case RISCV::VFWADD_WV:

  case RISCV::VFWSUB_WF:

  case RISCV::VFWSUB_WV: {

    bool IsOp1 = HasPassthru ? MO.getOperandNo() == 2 : MO.getOperandNo() == 1;

    bool TwoTimes = IsMODef || IsOp1;

    return TwoTimes ? MILog2SEW + 1 : MILog2SEW;

  }


  // Vector Integer Extension

  case RISCV::VZEXT_VF2:

  case RISCV::VSEXT_VF2:

    return getIntegerExtensionOperandEEW(2, MI, MO);

  case RISCV::VZEXT_VF4:

  case RISCV::VSEXT_VF4:

    return getIntegerExtensionOperandEEW(4, MI, MO);

  case RISCV::VZEXT_VF8:

  case RISCV::VSEXT_VF8:

    return getIntegerExtensionOperandEEW(8, MI, MO);


  // Vector Narrowing Integer Right Shift Instructions

  // Destination EEW=SEW, Op 1 has EEW=2*SEW. Op2 has EEW=SEW

  case RISCV::VNSRL_WX:

  case RISCV::VNSRL_WI:

  case RISCV::VNSRL_WV:

  case RISCV::VNSRA_WI:

  case RISCV::VNSRA_WV:

  case RISCV::VNSRA_WX:

  // Vector Narrowing Fixed-Point Clip Instructions

  // Destination and Op1 EEW=SEW. Op2 EEW=2*SEW.

  case RISCV::VNCLIPU_WI:

  case RISCV::VNCLIPU_WV:

  case RISCV::VNCLIPU_WX:

  case RISCV::VNCLIP_WI:

  case RISCV::VNCLIP_WV:

  case RISCV::VNCLIP_WX:

  // Narrowing Floating-Point/Integer Type-Convert Instructions

  case RISCV::VFNCVT_XU_F_W:

  case RISCV::VFNCVT_X_F_W:

  case RISCV::VFNCVT_RTZ_XU_F_W:

  case RISCV::VFNCVT_RTZ_X_F_W:

  case RISCV::VFNCVT_F_XU_W:

  case RISCV::VFNCVT_F_X_W:

  case RISCV::VFNCVT_F_F_W:

  case RISCV::VFNCVT_ROD_F_F_W:

  case RISCV::VFNCVTBF16_F_F_W: {

    bool IsOp1 = HasPassthru ? MO.getOperandNo() == 2 : MO.getOperandNo() == 1;

    bool TwoTimes = IsOp1;

    return TwoTimes ? MILog2SEW + 1 : MILog2SEW;

  }


  // Vector Mask Instructions

  // Vector Mask-Register Logical Instructions

  // vmsbf.m set-before-first mask bit

  // vmsif.m set-including-first mask bit

  // vmsof.m set-only-first mask bit

  // EEW=1

  // We handle the cases when operand is a v0 mask operand above the switch,

  // but these instructions may use non-v0 mask operands and need to be handled

  // specifically.

  case RISCV::VMAND_MM:

  case RISCV::VMNAND_MM:

  case RISCV::VMANDN_MM:

  case RISCV::VMXOR_MM:

  case RISCV::VMOR_MM:

  case RISCV::VMNOR_MM:

  case RISCV::VMORN_MM:

  case RISCV::VMXNOR_MM:

  case RISCV::VMSBF_M:

  case RISCV::VMSIF_M:

  case RISCV::VMSOF_M: {

    return MILog2SEW;

  }


  // Vector Iota Instruction

  // EEW=SEW, except the mask operand has EEW=1. Mask operand is not handled

  // before this switch.

  case RISCV::VIOTA_M: {

    if (IsMODef || MO.getOperandNo() == 1)

      return MILog2SEW;

    return 0;

  }


  // Vector Integer Compare Instructions

  // Dest EEW=1. Source EEW=SEW.

  case RISCV::VMSEQ_VI:

  case RISCV::VMSEQ_VV:

  case RISCV::VMSEQ_VX:

  case RISCV::VMSNE_VI:

  case RISCV::VMSNE_VV:

  case RISCV::VMSNE_VX:

  case RISCV::VMSLTU_VV:

  case RISCV::VMSLTU_VX:

  case RISCV::VMSLT_VV:

  case RISCV::VMSLT_VX:

  case RISCV::VMSLEU_VV:

  case RISCV::VMSLEU_VI:

  case RISCV::VMSLEU_VX:

  case RISCV::VMSLE_VV:

  case RISCV::VMSLE_VI:

  case RISCV::VMSLE_VX:

  case RISCV::VMSGTU_VI:

  case RISCV::VMSGTU_VX:

  case RISCV::VMSGT_VI:

  case RISCV::VMSGT_VX:

  // Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions

  // Dest EEW=1. Source EEW=SEW. Mask source operand handled above this switch.

  case RISCV::VMADC_VIM:

  case RISCV::VMADC_VVM:

  case RISCV::VMADC_VXM:

  case RISCV::VMSBC_VVM:

  case RISCV::VMSBC_VXM:

  // Dest EEW=1. Source EEW=SEW.

  case RISCV::VMADC_VV:

  case RISCV::VMADC_VI:

  case RISCV::VMADC_VX:

  case RISCV::VMSBC_VV:

  case RISCV::VMSBC_VX:

  // 13.13. Vector Floating-Point Compare Instructions

  // Dest EEW=1. Source EEW=SEW

  case RISCV::VMFEQ_VF:

  case RISCV::VMFEQ_VV:

  case RISCV::VMFNE_VF:

  case RISCV::VMFNE_VV:

  case RISCV::VMFLT_VF:

  case RISCV::VMFLT_VV:

  case RISCV::VMFLE_VF:

  case RISCV::VMFLE_VV:

  case RISCV::VMFGT_VF:

  case RISCV::VMFGE_VF: {

    if (IsMODef)

      return 0;

    return MILog2SEW;

  }


  // Vector Reduction Operations

  // Vector Single-Width Integer Reduction Instructions

  case RISCV::VREDAND_VS:

  case RISCV::VREDMAX_VS:

  case RISCV::VREDMAXU_VS:

  case RISCV::VREDMIN_VS:

  case RISCV::VREDMINU_VS:

  case RISCV::VREDOR_VS:

  case RISCV::VREDSUM_VS:

  case RISCV::VREDXOR_VS:

  // Vector Single-Width Floating-Point Reduction Instructions

  case RISCV::VFREDMAX_VS:

  case RISCV::VFREDMIN_VS:

  case RISCV::VFREDOSUM_VS:

  case RISCV::VFREDUSUM_VS: {

    return MILog2SEW;

  }


  // Vector Widening Integer Reduction Instructions

  // The Dest and VS1 read only element 0 for the vector register. Return

  // 2*EEW for these. VS2 has EEW=SEW and EMUL=LMUL.

  case RISCV::VWREDSUM_VS:

  case RISCV::VWREDSUMU_VS:

  // Vector Widening Floating-Point Reduction Instructions

  case RISCV::VFWREDOSUM_VS:

  case RISCV::VFWREDUSUM_VS: {

    bool TwoTimes = IsMODef || MO.getOperandNo() == 3;

    return TwoTimes ? MILog2SEW + 1 : MILog2SEW;

  }


  default:

    return std::nullopt;

  }

}


static std::optional<OperandInfo>

getOperandInfo(const MachineOperand &MO, const MachineRegisterInfo *MRI) {

  const MachineInstr &MI = *MO.getParent();

  const RISCVVPseudosTable::PseudoInfo *RVV =

      RISCVVPseudosTable::getPseudoInfo(MI.getOpcode());

  assert(RVV && "Could not find MI in PseudoTable");


  std::optional<unsigned> Log2EEW = getOperandLog2EEW(MO, MRI);

  if (!Log2EEW)

    return std::nullopt;


  switch (RVV->BaseInstr) {

  // Vector Reduction Operations

  // Vector Single-Width Integer Reduction Instructions

  // Vector Widening Integer Reduction Instructions

  // Vector Widening Floating-Point Reduction Instructions

  // The Dest and VS1 only read element 0 of the vector register. Return just

  // the EEW for these.

  case RISCV::VREDAND_VS:

  case RISCV::VREDMAX_VS:

  case RISCV::VREDMAXU_VS:

  case RISCV::VREDMIN_VS:

  case RISCV::VREDMINU_VS:

  case RISCV::VREDOR_VS:

  case RISCV::VREDSUM_VS:

  case RISCV::VREDXOR_VS:

  case RISCV::VWREDSUM_VS:

  case RISCV::VWREDSUMU_VS:

  case RISCV::VFWREDOSUM_VS:

  case RISCV::VFWREDUSUM_VS:

    if (MO.getOperandNo() != 2)

      return OperandInfo(*Log2EEW);

    break;

  };


  // All others have EMUL=EEW/SEW*LMUL

  return OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(*Log2EEW, MI),

                     *Log2EEW);

}


/// Return true if this optimization should consider MI for VL reduction. This

/// white-list approach simplifies this optimization for instructions that may

/// have more complex semantics with relation to how it uses VL.

static bool isSupportedInstr(const MachineInstr &MI) {

  const RISCVVPseudosTable::PseudoInfo *RVV =

      RISCVVPseudosTable::getPseudoInfo(MI.getOpcode());


  if (!RVV)

    return false;


  switch (RVV->BaseInstr) {

  // Vector Unit-Stride Instructions

  // Vector Strided Instructions

  case RISCV::VLM_V:

  case RISCV::VLE8_V:

  case RISCV::VLSE8_V:

  case RISCV::VLE16_V:

  case RISCV::VLSE16_V:

  case RISCV::VLE32_V:

  case RISCV::VLSE32_V:

  case RISCV::VLE64_V:

  case RISCV::VLSE64_V:

  // Vector Indexed Instructions

  case RISCV::VLUXEI8_V:

  case RISCV::VLOXEI8_V:

  case RISCV::VLUXEI16_V:

  case RISCV::VLOXEI16_V:

  case RISCV::VLUXEI32_V:

  case RISCV::VLOXEI32_V:

  case RISCV::VLUXEI64_V:

  case RISCV::VLOXEI64_V: {

    for (const MachineMemOperand *MMO : MI.memoperands())

      if (MMO->isVolatile())

        return false;

    return true;

  }


  // Vector Single-Width Integer Add and Subtract

  case RISCV::VADD_VI:

  case RISCV::VADD_VV:

  case RISCV::VADD_VX:

  case RISCV::VSUB_VV:

  case RISCV::VSUB_VX:

  case RISCV::VRSUB_VI:

  case RISCV::VRSUB_VX:

  // Vector Bitwise Logical Instructions

  // Vector Single-Width Shift Instructions

  case RISCV::VAND_VI:

  case RISCV::VAND_VV:

  case RISCV::VAND_VX:

  case RISCV::VOR_VI:

  case RISCV::VOR_VV:

  case RISCV::VOR_VX:

  case RISCV::VXOR_VI:

  case RISCV::VXOR_VV:

  case RISCV::VXOR_VX:

  case RISCV::VSLL_VI:

  case RISCV::VSLL_VV:

  case RISCV::VSLL_VX:

  case RISCV::VSRL_VI:

  case RISCV::VSRL_VV:

  case RISCV::VSRL_VX:

  case RISCV::VSRA_VI:

  case RISCV::VSRA_VV:

  case RISCV::VSRA_VX:

  // Vector Widening Integer Add/Subtract

  case RISCV::VWADDU_VV:

  case RISCV::VWADDU_VX:

  case RISCV::VWSUBU_VV:

  case RISCV::VWSUBU_VX:

  case RISCV::VWADD_VV:

  case RISCV::VWADD_VX:

  case RISCV::VWSUB_VV:

  case RISCV::VWSUB_VX:

  case RISCV::VWADDU_WV:

  case RISCV::VWADDU_WX:

  case RISCV::VWSUBU_WV:

  case RISCV::VWSUBU_WX:

  case RISCV::VWADD_WV:

  case RISCV::VWADD_WX:

  case RISCV::VWSUB_WV:

  case RISCV::VWSUB_WX:

  // Vector Integer Extension

  case RISCV::VZEXT_VF2:

  case RISCV::VSEXT_VF2:

  case RISCV::VZEXT_VF4:

  case RISCV::VSEXT_VF4:

  case RISCV::VZEXT_VF8:

  case RISCV::VSEXT_VF8:

  // Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions

  // FIXME: Add support

  case RISCV::VMADC_VV:

  case RISCV::VMADC_VI:

  case RISCV::VMADC_VX:

  case RISCV::VMSBC_VV:

  case RISCV::VMSBC_VX:

  // Vector Narrowing Integer Right Shift Instructions

  case RISCV::VNSRL_WX:

  case RISCV::VNSRL_WI:

  case RISCV::VNSRL_WV:

  case RISCV::VNSRA_WI:

  case RISCV::VNSRA_WV:

  case RISCV::VNSRA_WX:

  // Vector Integer Compare Instructions

  case RISCV::VMSEQ_VI:

  case RISCV::VMSEQ_VV:

  case RISCV::VMSEQ_VX:

  case RISCV::VMSNE_VI:

  case RISCV::VMSNE_VV:

  case RISCV::VMSNE_VX:

  case RISCV::VMSLTU_VV:

  case RISCV::VMSLTU_VX:

  case RISCV::VMSLT_VV:

  case RISCV::VMSLT_VX:

  case RISCV::VMSLEU_VV:

  case RISCV::VMSLEU_VI:

  case RISCV::VMSLEU_VX:

  case RISCV::VMSLE_VV:

  case RISCV::VMSLE_VI:

  case RISCV::VMSLE_VX:

  case RISCV::VMSGTU_VI:

  case RISCV::VMSGTU_VX:

  case RISCV::VMSGT_VI:

  case RISCV::VMSGT_VX:

  // Vector Integer Min/Max Instructions

  case RISCV::VMINU_VV:

  case RISCV::VMINU_VX:

  case RISCV::VMIN_VV:

  case RISCV::VMIN_VX:

  case RISCV::VMAXU_VV:

  case RISCV::VMAXU_VX:

  case RISCV::VMAX_VV:

  case RISCV::VMAX_VX:

  // Vector Single-Width Integer Multiply Instructions

  case RISCV::VMUL_VV:

  case RISCV::VMUL_VX:

  case RISCV::VMULH_VV:

  case RISCV::VMULH_VX:

  case RISCV::VMULHU_VV:

  case RISCV::VMULHU_VX:

  case RISCV::VMULHSU_VV:

  case RISCV::VMULHSU_VX:

  // Vector Integer Divide Instructions

  case RISCV::VDIVU_VV:

  case RISCV::VDIVU_VX:

  case RISCV::VDIV_VV:

  case RISCV::VDIV_VX:

  case RISCV::VREMU_VV:

  case RISCV::VREMU_VX:

  case RISCV::VREM_VV:

  case RISCV::VREM_VX:

  // Vector Widening Integer Multiply Instructions

  case RISCV::VWMUL_VV:

  case RISCV::VWMUL_VX:

  case RISCV::VWMULSU_VV:

  case RISCV::VWMULSU_VX:

  case RISCV::VWMULU_VV:

  case RISCV::VWMULU_VX:

  // Vector Single-Width Integer Multiply-Add Instructions

  case RISCV::VMACC_VV:

  case RISCV::VMACC_VX:

  case RISCV::VNMSAC_VV:

  case RISCV::VNMSAC_VX:

  case RISCV::VMADD_VV:

  case RISCV::VMADD_VX:

  case RISCV::VNMSUB_VV:

  case RISCV::VNMSUB_VX:

  // Vector Integer Merge Instructions

  case RISCV::VMERGE_VIM:

  case RISCV::VMERGE_VVM:

  case RISCV::VMERGE_VXM:

  // Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions

  case RISCV::VADC_VIM:

  case RISCV::VADC_VVM:

  case RISCV::VADC_VXM:

  // Vector Widening Integer Multiply-Add Instructions

  case RISCV::VWMACCU_VV:

  case RISCV::VWMACCU_VX:

  case RISCV::VWMACC_VV:

  case RISCV::VWMACC_VX:

  case RISCV::VWMACCSU_VV:

  case RISCV::VWMACCSU_VX:

  case RISCV::VWMACCUS_VX:

  // Vector Integer Merge Instructions

  // FIXME: Add support

  // Vector Integer Move Instructions

  // FIXME: Add support

  case RISCV::VMV_V_I:

  case RISCV::VMV_V_X:

  case RISCV::VMV_V_V:

  // Vector Single-Width Averaging Add and Subtract

  case RISCV::VAADDU_VV:

  case RISCV::VAADDU_VX:

  case RISCV::VAADD_VV:

  case RISCV::VAADD_VX:

  case RISCV::VASUBU_VV:

  case RISCV::VASUBU_VX:

  case RISCV::VASUB_VV:

  case RISCV::VASUB_VX:


  // Vector Crypto

  case RISCV::VWSLL_VI:


  // Vector Mask Instructions

  // Vector Mask-Register Logical Instructions

  // vmsbf.m set-before-first mask bit

  // vmsif.m set-including-first mask bit

  // vmsof.m set-only-first mask bit

  // Vector Iota Instruction

  // Vector Element Index Instruction

  case RISCV::VMAND_MM:

  case RISCV::VMNAND_MM:

  case RISCV::VMANDN_MM:

  case RISCV::VMXOR_MM:

  case RISCV::VMOR_MM:

  case RISCV::VMNOR_MM:

  case RISCV::VMORN_MM:

  case RISCV::VMXNOR_MM:

  case RISCV::VMSBF_M:

  case RISCV::VMSIF_M:

  case RISCV::VMSOF_M:

  case RISCV::VIOTA_M:

  case RISCV::VID_V:

  // Vector Single-Width Floating-Point Add/Subtract Instructions

  case RISCV::VFADD_VF:

  case RISCV::VFADD_VV:

  case RISCV::VFSUB_VF:

  case RISCV::VFSUB_VV:

  case RISCV::VFRSUB_VF:

  // Vector Widening Floating-Point Add/Subtract Instructions

  case RISCV::VFWADD_VV:

  case RISCV::VFWADD_VF:

  case RISCV::VFWSUB_VV:

  case RISCV::VFWSUB_VF:

  case RISCV::VFWADD_WF:

  case RISCV::VFWADD_WV:

  case RISCV::VFWSUB_WF:

  case RISCV::VFWSUB_WV:

  // Vector Single-Width Floating-Point Multiply/Divide Instructions

  case RISCV::VFMUL_VF:

  case RISCV::VFMUL_VV:

  case RISCV::VFDIV_VF:

  case RISCV::VFDIV_VV:

  case RISCV::VFRDIV_VF:

  // Vector Widening Floating-Point Multiply

  case RISCV::VFWMUL_VF:

  case RISCV::VFWMUL_VV:

  // Vector Floating-Point Compare Instructions

  case RISCV::VMFEQ_VF:

  case RISCV::VMFEQ_VV:

  case RISCV::VMFNE_VF:

  case RISCV::VMFNE_VV:

  case RISCV::VMFLT_VF:

  case RISCV::VMFLT_VV:

  case RISCV::VMFLE_VF:

  case RISCV::VMFLE_VV:

  case RISCV::VMFGT_VF:

  case RISCV::VMFGE_VF:

  // Single-Width Floating-Point/Integer Type-Convert Instructions

  case RISCV::VFCVT_XU_F_V:

  case RISCV::VFCVT_X_F_V:

  case RISCV::VFCVT_RTZ_XU_F_V:

  case RISCV::VFCVT_RTZ_X_F_V:

  case RISCV::VFCVT_F_XU_V:

  case RISCV::VFCVT_F_X_V:

  // Widening Floating-Point/Integer Type-Convert Instructions

  case RISCV::VFWCVT_XU_F_V:

  case RISCV::VFWCVT_X_F_V:

  case RISCV::VFWCVT_RTZ_XU_F_V:

  case RISCV::VFWCVT_RTZ_X_F_V:

  case RISCV::VFWCVT_F_XU_V:

  case RISCV::VFWCVT_F_X_V:

  case RISCV::VFWCVT_F_F_V:

  case RISCV::VFWCVTBF16_F_F_V:

  // Narrowing Floating-Point/Integer Type-Convert Instructions

  case RISCV::VFNCVT_XU_F_W:

  case RISCV::VFNCVT_X_F_W:

  case RISCV::VFNCVT_RTZ_XU_F_W:

  case RISCV::VFNCVT_RTZ_X_F_W:

  case RISCV::VFNCVT_F_XU_W:

  case RISCV::VFNCVT_F_X_W:

  case RISCV::VFNCVT_F_F_W:

  case RISCV::VFNCVT_ROD_F_F_W:

  case RISCV::VFNCVTBF16_F_F_W:

    return true;

  }


  return false;

}


/// Return true if MO is a vector operand but is used as a scalar operand.

static bool isVectorOpUsedAsScalarOp(MachineOperand &MO) {

  MachineInstr *MI = MO.getParent();

  const RISCVVPseudosTable::PseudoInfo *RVV =

      RISCVVPseudosTable::getPseudoInfo(MI->getOpcode());


  if (!RVV)

    return false;


  switch (RVV->BaseInstr) {

  // Reductions only use vs1[0] of vs1

  case RISCV::VREDAND_VS:

  case RISCV::VREDMAX_VS:

  case RISCV::VREDMAXU_VS:

  case RISCV::VREDMIN_VS:

  case RISCV::VREDMINU_VS:

  case RISCV::VREDOR_VS:

  case RISCV::VREDSUM_VS:

  case RISCV::VREDXOR_VS:

  case RISCV::VWREDSUM_VS:

  case RISCV::VWREDSUMU_VS:

  case RISCV::VFREDMAX_VS:

  case RISCV::VFREDMIN_VS:

  case RISCV::VFREDOSUM_VS:

  case RISCV::VFREDUSUM_VS:

  case RISCV::VFWREDOSUM_VS:

  case RISCV::VFWREDUSUM_VS:

    return MO.getOperandNo() == 3;

  case RISCV::VMV_X_S:

  case RISCV::VFMV_F_S:

    return MO.getOperandNo() == 1;

  default:

    return false;

  }

}


/// Return true if MI may read elements past VL.

static bool mayReadPastVL(const MachineInstr &MI) {

  const RISCVVPseudosTable::PseudoInfo *RVV =

      RISCVVPseudosTable::getPseudoInfo(MI.getOpcode());

  if (!RVV)

    return true;


  switch (RVV->BaseInstr) {

  // vslidedown instructions may read elements past VL. They are handled

  // according to current tail policy.

  case RISCV::VSLIDEDOWN_VI:

  case RISCV::VSLIDEDOWN_VX:

  case RISCV::VSLIDE1DOWN_VX:

  case RISCV::VFSLIDE1DOWN_VF:


  // vrgather instructions may read the source vector at any index < VLMAX,

  // regardless of VL.

  case RISCV::VRGATHER_VI:

  case RISCV::VRGATHER_VV:

  case RISCV::VRGATHER_VX:

  case RISCV::VRGATHEREI16_VV:

    return true;


  default:

    return false;

  }

}


bool RISCVVLOptimizer::isCandidate(const MachineInstr &MI) const {

  const MCInstrDesc &Desc = MI.getDesc();

  if (!RISCVII::hasVLOp(Desc.TSFlags) || !RISCVII::hasSEWOp(Desc.TSFlags))

    return false;

  if (MI.getNumDefs() != 1)

    return false;


  // If we're not using VLMAX, then we need to be careful whether we are using

  // TA/TU when there is a non-undef Passthru. But when we are using VLMAX, it

  // does not matter whether we are using TA/TU with a non-undef Passthru, since

  // there are no tail elements to be preserved.

  unsigned VLOpNum = RISCVII::getVLOpNum(Desc);

  const MachineOperand &VLOp = MI.getOperand(VLOpNum);

  if (VLOp.isReg() || VLOp.getImm() != RISCV::VLMaxSentinel) {

    // If MI has a non-undef passthru, we will not try to optimize it since

    // that requires us to preserve tail elements according to TA/TU.

    // Otherwise, The MI has an undef Passthru, so it doesn't matter whether we

    // are using TA/TU.

    bool HasPassthru = RISCVII::isFirstDefTiedToFirstUse(Desc);

    unsigned PassthruOpIdx = MI.getNumExplicitDefs();

    if (HasPassthru &&

        MI.getOperand(PassthruOpIdx).getReg() != RISCV::NoRegister) {

      LLVM_DEBUG(

          dbgs() << "  Not a candidate because it uses non-undef passthru"

                    " with non-VLMAX VL\n");

      return false;

    }

  }


  // If the VL is 1, then there is no need to reduce it. This is an

  // optimization, not needed to preserve correctness.

  if (VLOp.isImm() && VLOp.getImm() == 1) {

    LLVM_DEBUG(dbgs() << "  Not a candidate because VL is already 1\n");

    return false;

  }


  if (MI.mayRaiseFPException()) {

    LLVM_DEBUG(dbgs() << "Not a candidate because may raise FP exception\n");

    return false;

  }


  // Some instructions that produce vectors have semantics that make it more

  // difficult to determine whether the VL can be reduced. For example, some

  // instructions, such as reductions, may write lanes past VL to a scalar

  // register. Other instructions, such as some loads or stores, may write

  // lower lanes using data from higher lanes. There may be other complex

  // semantics not mentioned here that make it hard to determine whether

  // the VL can be optimized. As a result, a white-list of supported

  // instructions is used. Over time, more instructions can be supported

  // upon careful examination of their semantics under the logic in this

  // optimization.

  // TODO: Use a better approach than a white-list, such as adding

  // properties to instructions using something like TSFlags.

  if (!isSupportedInstr(MI)) {

    LLVM_DEBUG(dbgs() << "Not a candidate due to unsupported instruction\n");

    return false;

  }


  LLVM_DEBUG(dbgs() << "Found a candidate for VL reduction: " << MI << "\n");

  return true;

}


std::optional<MachineOperand>

RISCVVLOptimizer::getMinimumVLForUser(MachineOperand &UserOp) {

  const MachineInstr &UserMI = *UserOp.getParent();

  const MCInstrDesc &Desc = UserMI.getDesc();


  if (!RISCVII::hasVLOp(Desc.TSFlags) || !RISCVII::hasSEWOp(Desc.TSFlags)) {

    LLVM_DEBUG(dbgs() << "    Abort due to lack of VL, assume that"

                         " use VLMAX\n");

    return std::nullopt;

  }


  // Instructions like reductions may use a vector register as a scalar

  // register. In this case, we should treat it as only reading the first lane.

  if (isVectorOpUsedAsScalarOp(UserOp)) {

    [[maybe_unused]] Register R = UserOp.getReg();

    [[maybe_unused]] const TargetRegisterClass *RC = MRI->getRegClass(R);

    assert(RISCV::VRRegClass.hasSubClassEq(RC) &&

           "Expect LMUL 1 register class for vector as scalar operands!");

    LLVM_DEBUG(dbgs() << "    Used this operand as a scalar operand\n");


    return MachineOperand::CreateImm(1);

  }


  unsigned VLOpNum = RISCVII::getVLOpNum(Desc);

  const MachineOperand &VLOp = UserMI.getOperand(VLOpNum);

  // Looking for an immediate or a register VL that isn't X0.

  assert((!VLOp.isReg() || VLOp.getReg() != RISCV::X0) &&

         "Did not expect X0 VL");

  return VLOp;

}


std::optional<MachineOperand> RISCVVLOptimizer::checkUsers(MachineInstr &MI) {

  // FIXME: Avoid visiting each user for each time we visit something on the

  // worklist, combined with an extra visit from the outer loop. Restructure

  // along lines of an instcombine style worklist which integrates the outer

  // pass.

  std::optional<MachineOperand> CommonVL;

  for (auto &UserOp : MRI->use_operands(MI.getOperand(0).getReg())) {

    const MachineInstr &UserMI = *UserOp.getParent();

    LLVM_DEBUG(dbgs() << "  Checking user: " << UserMI << "\n");

    if (mayReadPastVL(UserMI)) {

      LLVM_DEBUG(dbgs() << "    Abort because used by unsafe instruction\n");

      return std::nullopt;

    }


    // Tied operands might pass through.

    if (UserOp.isTied()) {

      LLVM_DEBUG(dbgs() << "    Abort because user used as tied operand\n");

      return std::nullopt;

    }


    auto VLOp = getMinimumVLForUser(UserOp);

    if (!VLOp)

      return std::nullopt;


    // Use the largest VL among all the users. If we cannot determine this

    // statically, then we cannot optimize the VL.

    if (!CommonVL || RISCV::isVLKnownLE(*CommonVL, *VLOp)) {

      CommonVL = *VLOp;

      LLVM_DEBUG(dbgs() << "    User VL is: " << VLOp << "\n");

    } else if (!RISCV::isVLKnownLE(*VLOp, *CommonVL)) {

      LLVM_DEBUG(dbgs() << "    Abort because cannot determine a common VL\n");

      return std::nullopt;

    }


    if (!RISCVII::hasSEWOp(UserMI.getDesc().TSFlags)) {

      LLVM_DEBUG(dbgs() << "    Abort due to lack of SEW operand\n");

      return std::nullopt;

    }


    std::optional<OperandInfo> ConsumerInfo = getOperandInfo(UserOp, MRI);

    std::optional<OperandInfo> ProducerInfo =

        getOperandInfo(MI.getOperand(0), MRI);

    if (!ConsumerInfo || !ProducerInfo) {

      LLVM_DEBUG(dbgs() << "    Abort due to unknown operand information.\n");

      LLVM_DEBUG(dbgs() << "      ConsumerInfo is: " << ConsumerInfo << "\n");

      LLVM_DEBUG(dbgs() << "      ProducerInfo is: " << ProducerInfo << "\n");

      return std::nullopt;

    }


    // If the operand is used as a scalar operand, then the EEW must be

    // compatible. Otherwise, the EMUL *and* EEW must be compatible.

    bool IsVectorOpUsedAsScalarOp = isVectorOpUsedAsScalarOp(UserOp);

    if ((IsVectorOpUsedAsScalarOp &&

         !OperandInfo::EEWAreEqual(*ConsumerInfo, *ProducerInfo)) ||

        (!IsVectorOpUsedAsScalarOp &&

         !OperandInfo::EMULAndEEWAreEqual(*ConsumerInfo, *ProducerInfo))) {

      LLVM_DEBUG(

          dbgs()

          << "    Abort due to incompatible information for EMUL or EEW.\n");

      LLVM_DEBUG(dbgs() << "      ConsumerInfo is: " << ConsumerInfo << "\n");

      LLVM_DEBUG(dbgs() << "      ProducerInfo is: " << ProducerInfo << "\n");

      return std::nullopt;

    }

  }


  return CommonVL;

}


bool RISCVVLOptimizer::tryReduceVL(MachineInstr &OrigMI) {

  SetVector<MachineInstr *> Worklist;

  Worklist.insert(&OrigMI);


  bool MadeChange = false;

  while (!Worklist.empty()) {

    MachineInstr &MI = *Worklist.pop_back_val();

    LLVM_DEBUG(dbgs() << "Trying to reduce VL for " << MI << "\n");


    if (!isVectorRegClass(MI.getOperand(0).getReg(), MRI))

      continue;


    auto CommonVL = checkUsers(MI);

    if (!CommonVL)

      continue;


    assert((CommonVL->isImm() || CommonVL->getReg().isVirtual()) &&

           "Expected VL to be an Imm or virtual Reg");


    unsigned VLOpNum = RISCVII::getVLOpNum(MI.getDesc());

    MachineOperand &VLOp = MI.getOperand(VLOpNum);


    if (!RISCV::isVLKnownLE(*CommonVL, VLOp)) {

      LLVM_DEBUG(dbgs() << "    Abort due to CommonVL not <= VLOp.\n");

      continue;

    }


    if (CommonVL->isImm()) {

      LLVM_DEBUG(dbgs() << "  Reduce VL from " << VLOp << " to "

                        << CommonVL->getImm() << " for " << MI << "\n");

      VLOp.ChangeToImmediate(CommonVL->getImm());

    } else {

      const MachineInstr *VLMI = MRI->getVRegDef(CommonVL->getReg());

      if (!MDT->dominates(VLMI, &MI))

        continue;

      LLVM_DEBUG(

          dbgs() << "  Reduce VL from " << VLOp << " to "

                 << printReg(CommonVL->getReg(), MRI->getTargetRegisterInfo())

                 << " for " << MI << "\n");


      // All our checks passed. We can reduce VL.

      VLOp.ChangeToRegister(CommonVL->getReg(), false);

    }


    MadeChange = true;


    // Now add all inputs to this instruction to the worklist.

    for (auto &Op : MI.operands()) {

      if (!Op.isReg() || !Op.isUse() || !Op.getReg().isVirtual())

        continue;


      if (!isVectorRegClass(Op.getReg(), MRI))

        continue;


      MachineInstr *DefMI = MRI->getVRegDef(Op.getReg());


      if (!isCandidate(*DefMI))

        continue;


      Worklist.insert(DefMI);

    }

  }


  return MadeChange;

}


bool RISCVVLOptimizer::runOnMachineFunction(MachineFunction &MF) {

  if (skipFunction(MF.getFunction()))

    return false;


  MRI = &MF.getRegInfo();

  MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();


  const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();

  if (!ST.hasVInstructions())

    return false;


  bool MadeChange = false;

  for (MachineBasicBlock &MBB : MF) {

    // Visit instructions in reverse order.

    for (auto &MI : make_range(MBB.rbegin(), MBB.rend())) {

      if (!isCandidate(MI))

        continue;


      MadeChange |= tryReduceVL(MI);

    }

  }


  return MadeChange;

}

MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105

DefMI
MachineInstrBuilder MachineInstrBuilder & DefMI
Definition: AArch64ExpandPseudoInsts.cpp:113

MBB
MachineBasicBlock & MBB
Definition: ARMSLSHardening.cpp:71

B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

A
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

LLVM_ATTRIBUTE_UNUSED
#define LLVM_ATTRIBUTE_UNUSED
Definition: Compiler.h:282

LLVM_DEBUG
#define LLVM_DEBUG(...)
Definition: Debug.h:106

MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:112

InitializePasses.h

MachineDominators.h

MachineFunctionPass.h

isCandidate
static bool isCandidate(const MachineInstr *MI, Register &DefedReg, Register FrameReg)
Definition: MachineLateInstrsCleanup.cpp:152

INITIALIZE_PASS_DEPENDENCY
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55

INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57

INITIALIZE_PASS_BEGIN
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52

RISCVSubtarget.h

mayReadPastVL
static bool mayReadPastVL(const MachineInstr &MI)
Return true if MI may read elements past VL.
Definition: RISCVVLOptimizer.cpp:1104

operator<<
static LLVM_ATTRIBUTE_UNUSED raw_ostream & operator<<(raw_ostream &OS, const OperandInfo &OI)
Definition: RISCVVLOptimizer.cpp:121

getIntegerExtensionOperandEEW
static unsigned getIntegerExtensionOperandEEW(unsigned Factor, const MachineInstr &MI, const MachineOperand &MO)
Dest has EEW=SEW.
Definition: RISCVVLOptimizer.cpp:169

isVectorOpUsedAsScalarOp
static bool isVectorOpUsedAsScalarOp(MachineOperand &MO)
Return true if MO is a vector operand but is used as a scalar operand.
Definition: RISCVVLOptimizer.cpp:1068

getOperandLog2EEW
static std::optional< unsigned > getOperandLog2EEW(const MachineOperand &MO, const MachineRegisterInfo *MRI)
Definition: RISCVVLOptimizer.cpp:197

isVectorRegClass
static bool isVectorRegClass(Register R, const MachineRegisterInfo *MRI)
Return true if R is a physical or virtual vector register, false otherwise.
Definition: RISCVVLOptimizer.cpp:73

isSupportedInstr
static bool isSupportedInstr(const MachineInstr &MI)
Return true if this optimization should consider MI for VL reduction.
Definition: RISCVVLOptimizer.cpp:780

PASS_NAME
#define PASS_NAME
Definition: RISCVVLOptimizer.cpp:29

DEBUG_TYPE
#define DEBUG_TYPE
Definition: RISCVVLOptimizer.cpp:28

isMaskOperand
static bool isMaskOperand(const MachineInstr &MI, const MachineOperand &MO, const MachineRegisterInfo *MRI)
Check whether MO is a mask operand of MI.
Definition: RISCVVLOptimizer.cpp:186

getOperandInfo
static std::optional< OperandInfo > getOperandInfo(const MachineOperand &MO, const MachineRegisterInfo *MRI)
Definition: RISCVVLOptimizer.cpp:738

RISCV.h

assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

OS
raw_pwrite_stream & OS
Definition: SampleProfWriter.cpp:51

SetVector.h
This file implements a set that has insertion order iteration characteristics.

PASS_NAME
#define PASS_NAME
Definition: TypePromotion.cpp:43

llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47

llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75

llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:256

llvm::DWARFExpression::Operation
This class represents an Operation in the Expression.
Definition: DWARFExpression.h:32

llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310

llvm::MCInstrDesc
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198

llvm::MCInstrDesc::TSFlags
uint64_t TSFlags
Definition: MCInstrDesc.h:215

llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:125

llvm::MachineBasicBlock::rend
reverse_iterator rend()
Definition: MachineBasicBlock.h:365

llvm::MachineBasicBlock::rbegin
reverse_iterator rbegin()
Definition: MachineBasicBlock.h:359

llvm::MachineDominatorTreeWrapperPass
Analysis pass which computes a MachineDominatorTree.
Definition: MachineDominators.h:131

llvm::MachineDominatorTree
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
Definition: MachineDominators.h:75

llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30

llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:169

llvm::MachineFunctionPass::runOnMachineFunction
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...

llvm::MachineFunction
Definition: MachineFunction.h:258

llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:724

llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:734

llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:695

llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:69

llvm::MachineInstr::getDesc
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:572

llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:585

llvm::MachineMemOperand
A description of a memory reference used in the backend.
Definition: MachineMemOperand.h:129

llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:48

llvm::MachineOperand::getOperandNo
unsigned getOperandNo() const
Returns the index of this operand in the instruction that it belongs to.
Definition: MachineOperand.cpp:56

llvm::MachineOperand::getImm
int64_t getImm() const
Definition: MachineOperand.h:556

llvm::MachineOperand::isReg
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Definition: MachineOperand.h:329

llvm::MachineOperand::isImm
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Definition: MachineOperand.h:331

llvm::MachineOperand::ChangeToImmediate
void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
Definition: MachineOperand.cpp:162

llvm::MachineOperand::isTied
bool isTied() const
Definition: MachineOperand.h:450

llvm::MachineOperand::ChangeToRegister
void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value.
Definition: MachineOperand.cpp:273

llvm::MachineOperand::getParent
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
Definition: MachineOperand.h:243

llvm::MachineOperand::CreateImm
static MachineOperand CreateImm(int64_t Val)
Definition: MachineOperand.h:819

llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition: MachineOperand.h:369

llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:51

llvm::Pass::getPassName
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81

llvm::RISCVSubtarget
Definition: RISCVSubtarget.h:78

llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19

llvm::SetVector
A vector that has set insertion semantics.
Definition: SetVector.h:57

llvm::SetVector::empty
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:93

llvm::SetVector::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162

llvm::SetVector::pop_back_val
value_type pop_back_val()
Definition: SetVector.h:285

llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51

llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:44

llvm::TargetRegisterClass::TSFlags
const uint8_t TSFlags
Configurable target specific flags.
Definition: TargetRegisterInfo.h:62

llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52

unsigned

llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143

false
Definition: StackSlotColoring.cpp:193

llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73

llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24

llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:371

llvm::RISCVII::getVLOpNum
static unsigned getVLOpNum(const MCInstrDesc &Desc)
Definition: RISCVBaseInfo.h:200

llvm::RISCVII::getLMul
static VLMUL getLMul(uint64_t TSFlags)
Definition: RISCVBaseInfo.h:148

llvm::RISCVII::hasVLOp
static bool hasVLOp(uint64_t TSFlags)
Definition: RISCVBaseInfo.h:164

llvm::RISCVII::VLMUL
VLMUL
Definition: RISCVTargetParser.h:69

llvm::RISCVII::getSEWOpNum
static unsigned getSEWOpNum(const MCInstrDesc &Desc)
Definition: RISCVBaseInfo.h:217

llvm::RISCVII::hasSEWOp
static bool hasSEWOp(uint64_t TSFlags)
Definition: RISCVBaseInfo.h:160

llvm::RISCVII::isFirstDefTiedToFirstUse
static bool isFirstDefTiedToFirstUse(const MCInstrDesc &Desc)
Definition: RISCVBaseInfo.h:264

llvm::RISCVRI::isVRegClass
static bool isVRegClass(uint64_t TSFlags)
Definition: RISCVRegisterInfo.h:41

llvm::RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL
static std::pair< unsigned, bool > getEMULEqualsEEWDivSEWTimesLMUL(unsigned Log2EEW, const MachineInstr &MI)
Return EMUL = (EEW / SEW) * LMUL where EEW comes from Log2EEW and LMUL and SEW are from the TSFlags o...
Definition: RISCVVLOptimizer.cpp:141

llvm::RISCVVType::decodeVLMUL
std::pair< unsigned, bool > decodeVLMUL(RISCVII::VLMUL VLMUL)
Definition: RISCVTargetParser.cpp:182

llvm::RISCV::isVLKnownLE
bool isVLKnownLE(const MachineOperand &LHS, const MachineOperand &RHS)
Given two VL operands, do we know that LHS <= RHS?
Definition: RISCVInstrInfo.cpp:4234

llvm::RISCV::VLMaxSentinel
static constexpr int64_t VLMaxSentinel
Definition: RISCVInstrInfo.h:355

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18

llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition: iterator_range.h:77

llvm::createRISCVVLOptimizerPass
FunctionPass * createRISCVVLOptimizerPass()
Definition: RISCVVLOptimizer.cpp:68

llvm::Log2_32
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:340

llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163

llvm::VFISAKind::RVV
@ RVV

llvm::printReg
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Definition: TargetRegisterInfo.cpp:107

OperandInfo
Represents the EMUL and EEW of a MachineOperand.
Definition: RISCVVLOptimizer.cpp:81

OperandInfo::EEWAreEqual
static bool EEWAreEqual(const OperandInfo &A, const OperandInfo &B)
Definition: RISCVVLOptimizer.cpp:104

OperandInfo::OperandInfo
OperandInfo(std::pair< unsigned, bool > EMUL, unsigned Log2EEW)
Definition: RISCVVLOptimizer.cpp:92

OperandInfo::OperandInfo
OperandInfo(unsigned Log2EEW)
Definition: RISCVVLOptimizer.cpp:95

OperandInfo::print
void print(raw_ostream &OS) const
Definition: RISCVVLOptimizer.cpp:108

OperandInfo::Log2EEW
unsigned Log2EEW
Definition: RISCVVLOptimizer.cpp:87

OperandInfo::EMULAndEEWAreEqual
static bool EMULAndEEWAreEqual(const OperandInfo &A, const OperandInfo &B)
Definition: RISCVVLOptimizer.cpp:99

OperandInfo::OperandInfo
OperandInfo()=delete

OperandInfo::OperandInfo
OperandInfo(RISCVII::VLMUL EMUL, unsigned Log2EEW)
Definition: RISCVVLOptimizer.cpp:89

OperandInfo::EMUL
std::optional< std::pair< unsigned, bool > > EMUL
Definition: RISCVVLOptimizer.cpp:85

llvm::DWARFExpression::Operation::Description
Description of the encoding of one expression Op.
Definition: DWARFExpression.h:66

llvm::RISCVVPseudosTable::PseudoInfo
Definition: RISCVInstrInfo.h:375